diff options
author | B Stack <bgstack15@gmail.com> | 2020-06-09 16:55:15 -0400 |
---|---|---|
committer | B Stack <bgstack15@gmail.com> | 2020-06-09 16:55:15 -0400 |
commit | 0c80c29d0fde63d9617d5769038963375e698628 (patch) | |
tree | da4141376f5f0437ab5e659ebb4f8bdf69a9e0de /remove-useless.py | |
download | glip-0c80c29d0fde63d9617d5769038963375e698628.tar.gz glip-0c80c29d0fde63d9617d5769038963375e698628.tar.bz2 glip-0c80c29d0fde63d9617d5769038963375e698628.zip |
initial commit
Diffstat (limited to 'remove-useless.py')
-rwxr-xr-x | remove-useless.py | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/remove-useless.py b/remove-useless.py new file mode 100755 index 0000000..e68f458 --- /dev/null +++ b/remove-useless.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +# Startdate: 2020-05-30 19:30 +# Purpose: remove key, useless html elements from slurped pages +from bs4 import BeautifulSoup +import sys + +def remove_useless(contents): + soup = BeautifulSoup(contents,"html.parser") + try: + sidebar = soup.find(class_="nav-sidebar") + sidebar.replace_with("") + except: + pass + try: + navbar = soup.find(class_="navbar-gitlab") + navbar.replace_with("") + except: + pass + try: + rightbar = soup.find(class_="issuable-context-form") + rightbar.replace_with("") + except: + pass + try: + rightbar = soup.find(class_="js-issuable-sidebar") + rightbar.replace_with("") + except: + pass + try: + rightbar = soup.find(class_="js-issuable-actions") + rightbar.replace_with("") + except: + pass + try: + rightbar = soup.find(class_="js-noteable-awards") + rightbar.replace_with("") + except: + pass + try: + rightbar = soup.find(class_="disabled-comment") + rightbar.replace_with("") + except: + pass + try: + rightbar = soup.find(class_="notes-form") + rightbar.replace_with("") + except: + pass + try: + rightbar = soup.find(class_="btn-edit") + rightbar.replace_with("") + except: + pass + try: + rightbar = soup.find(class_="js-issuable-edit") + rightbar.replace_with("") + except: + pass + try: + mylist = soup.find_all(class_="note-actions") + for i in mylist: + i.replace_with("") + except: + pass + try: + mylist = soup.find_all(class_="emoji-block") + for i in mylist: + i.replace_with("") + except: + return soup + +# this works, for the single file called +#with open(sys.argv[1],"r") as infile: +# lines = infile.read() + +with open("output/files-for-timestamps.txt") as f: + lines = [line.rstrip() for line in f] + +for thisfile in lines: + print("Removing useless html in file",thisfile) + with open(thisfile) as tf: + output=remove_useless(tf.read()) + with open(thisfile,"w",encoding='utf-8') as tf: + tf.write(str(output.prettify())) |