import sys import os.path from bs4 import BeautifulSoup # Check & handle arguments if len(sys.argv) < 2: print(f'Usage: {sys.argv[0]} [/path/to/input.html] [/path/to/output.html]') sys.exit(1) input_path = sys.argv[1] if not os.path.exists(input_path): print(f'{input_path} not found') sys.exit(1) output_path = sys.argv[2] if len(sys.argv) > 2 else input_path # Open & parse HTML input file with open(input_path, 'r') as fp: soup = BeautifulSoup(fp, 'html.parser') # Delete some useless elements to_delete = [ (['div'], {'class': 'md-sidebar'}), (['div'], {'class': 'md-search'}), (['label'], {'for': '__search'}), (['div'], {'id': 'print-site-banner'}), (['div'], {'class': 'md-header__source'}), ] for args, kwargs in to_delete: for el in soup.find_all(*args, **kwargs): el.decompose() # Change LdapSaisie logo header link to JS scroll top action soup.find('a', attrs={'class': 'md-logo'})['href'] = 'javascript:window.scrollTo(0,0)' # Store resulting HTML document in output file with open(output_path, 'w') as fp: fp.write(str(soup))