doc: clean all-in-one HTML file

This commit is contained in:
Benjamin Renard 2023-10-08 23:19:07 +02:00
parent cfa2ff1e6d
commit c0e388f09b
Signed by: bn8
GPG key ID: 3E2E1CE1907115BC
3 changed files with 44 additions and 1 deletions

View file

@ -4,7 +4,9 @@ public_html: venv
venv/bin/mkdocs build -s
LdapSaisie.html: venv public_html
venv/bin/htmlark public_html/print_page/index.html -o LdapSaisie.html
venv/bin/htmlark public_html/print_page/index.html -o LdapSaisie.tmp.html
venv/bin/python clean-all-in-one-html-file.py LdapSaisie.tmp.html LdapSaisie.html
rm -f LdapSaisie.tmp.html
LdapSaisie.pdf: LdapSaisie.html
docker run -v $(CURDIR):/workspace pink33n/html-to-pdf --url file:///workspace/LdapSaisie.html --pdf LdapSaisie.pdf

View file

@ -0,0 +1,40 @@
import sys
import os.path
from bs4 import BeautifulSoup
# Check & handle arguments
if len(sys.argv) < 2:
print(f'Usage: {sys.argv[0]} [/path/to/input.html] [/path/to/output.html]')
sys.exit(1)
input_path = sys.argv[1]
if not os.path.exists(input_path):
print(f'{input_path} not found')
sys.exit(1)
output_path = sys.argv[2] if len(sys.argv) > 2 else input_path
# Open & parse HTML input file
with open(input_path, 'r') as fp:
soup = BeautifulSoup(fp, 'html.parser')
# Delete some useless elements
to_delete = [
(['div'], {'class': 'md-sidebar'}),
(['div'], {'class': 'md-search'}),
(['label'], {'for': '__search'}),
(['div'], {'id': 'print-site-banner'}),
(['div'], {'class': 'md-header__source'}),
]
for args, kwargs in to_delete:
for el in soup.find_all(*args, **kwargs):
el.decompose()
# Change LdapSaisie logo header link to JS scroll top action
soup.find('a', attrs={'class': 'md-logo'})['href'] = 'javascript:window.scrollTo(0,0)'
# Store resulting HTML document in output file
with open(output_path, 'w') as fp:
fp.write(str(soup))

View file

@ -8,3 +8,4 @@ requests
htmlark
mike
git+https://github.com/jimporter/mike.git@master
beautifulsoup4