From c0e388f09bfde68764f3ca757c2607f3ac2b144d Mon Sep 17 00:00:00 2001 From: Benjamin Renard Date: Sun, 8 Oct 2023 23:19:07 +0200 Subject: [PATCH] doc: clean all-in-one HTML file --- doc/Makefile | 4 +++- doc/clean-all-in-one-html-file.py | 40 +++++++++++++++++++++++++++++++ doc/requirements.txt | 1 + 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 doc/clean-all-in-one-html-file.py diff --git a/doc/Makefile b/doc/Makefile index 61567e8c..2b03d305 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -4,7 +4,9 @@ public_html: venv venv/bin/mkdocs build -s LdapSaisie.html: venv public_html - venv/bin/htmlark public_html/print_page/index.html -o LdapSaisie.html + venv/bin/htmlark public_html/print_page/index.html -o LdapSaisie.tmp.html + venv/bin/python clean-all-in-one-html-file.py LdapSaisie.tmp.html LdapSaisie.html + rm -f LdapSaisie.tmp.html LdapSaisie.pdf: LdapSaisie.html docker run -v $(CURDIR):/workspace pink33n/html-to-pdf --url file:///workspace/LdapSaisie.html --pdf LdapSaisie.pdf diff --git a/doc/clean-all-in-one-html-file.py b/doc/clean-all-in-one-html-file.py new file mode 100644 index 00000000..22dd3155 --- /dev/null +++ b/doc/clean-all-in-one-html-file.py @@ -0,0 +1,40 @@ +import sys +import os.path + +from bs4 import BeautifulSoup + +# Check & handle arguments +if len(sys.argv) < 2: + print(f'Usage: {sys.argv[0]} [/path/to/input.html] [/path/to/output.html]') + sys.exit(1) + +input_path = sys.argv[1] + +if not os.path.exists(input_path): + print(f'{input_path} not found') + sys.exit(1) + +output_path = sys.argv[2] if len(sys.argv) > 2 else input_path + +# Open & parse HTML input file +with open(input_path, 'r') as fp: + soup = BeautifulSoup(fp, 'html.parser') + +# Delete some useless elements +to_delete = [ + (['div'], {'class': 'md-sidebar'}), + (['div'], {'class': 'md-search'}), + (['label'], {'for': '__search'}), + (['div'], {'id': 'print-site-banner'}), + (['div'], {'class': 'md-header__source'}), +] +for args, kwargs in to_delete: + for el in soup.find_all(*args, **kwargs): + el.decompose() + +# Change LdapSaisie logo header link to JS scroll top action +soup.find('a', attrs={'class': 'md-logo'})['href'] = 'javascript:window.scrollTo(0,0)' + +# Store resulting HTML document in output file +with open(output_path, 'w') as fp: + fp.write(str(soup)) diff --git a/doc/requirements.txt b/doc/requirements.txt index 00364688..5da1695e 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -8,3 +8,4 @@ requests htmlark mike git+https://github.com/jimporter/mike.git@master +beautifulsoup4