summaryrefslogtreecommitdiff
path: root/remove-useless.py
diff options
context:
space:
mode:
authorB Stack <bgstack15@gmail.com>2020-06-09 16:55:15 -0400
committerB Stack <bgstack15@gmail.com>2020-06-09 16:55:15 -0400
commit0c80c29d0fde63d9617d5769038963375e698628 (patch)
treeda4141376f5f0437ab5e659ebb4f8bdf69a9e0de /remove-useless.py
downloadglip-0c80c29d0fde63d9617d5769038963375e698628.tar.gz
glip-0c80c29d0fde63d9617d5769038963375e698628.tar.bz2
glip-0c80c29d0fde63d9617d5769038963375e698628.zip
initial commit
Diffstat (limited to 'remove-useless.py')
-rwxr-xr-xremove-useless.py84
1 files changed, 84 insertions, 0 deletions
diff --git a/remove-useless.py b/remove-useless.py
new file mode 100755
index 0000000..e68f458
--- /dev/null
+++ b/remove-useless.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+# Startdate: 2020-05-30 19:30
+# Purpose: remove key, useless html elements from slurped pages
+from bs4 import BeautifulSoup
+import sys
+
+def remove_useless(contents):
+ soup = BeautifulSoup(contents,"html.parser")
+ try:
+ sidebar = soup.find(class_="nav-sidebar")
+ sidebar.replace_with("")
+ except:
+ pass
+ try:
+ navbar = soup.find(class_="navbar-gitlab")
+ navbar.replace_with("")
+ except:
+ pass
+ try:
+ rightbar = soup.find(class_="issuable-context-form")
+ rightbar.replace_with("")
+ except:
+ pass
+ try:
+ rightbar = soup.find(class_="js-issuable-sidebar")
+ rightbar.replace_with("")
+ except:
+ pass
+ try:
+ rightbar = soup.find(class_="js-issuable-actions")
+ rightbar.replace_with("")
+ except:
+ pass
+ try:
+ rightbar = soup.find(class_="js-noteable-awards")
+ rightbar.replace_with("")
+ except:
+ pass
+ try:
+ rightbar = soup.find(class_="disabled-comment")
+ rightbar.replace_with("")
+ except:
+ pass
+ try:
+ rightbar = soup.find(class_="notes-form")
+ rightbar.replace_with("")
+ except:
+ pass
+ try:
+ rightbar = soup.find(class_="btn-edit")
+ rightbar.replace_with("")
+ except:
+ pass
+ try:
+ rightbar = soup.find(class_="js-issuable-edit")
+ rightbar.replace_with("")
+ except:
+ pass
+ try:
+ mylist = soup.find_all(class_="note-actions")
+ for i in mylist:
+ i.replace_with("")
+ except:
+ pass
+ try:
+ mylist = soup.find_all(class_="emoji-block")
+ for i in mylist:
+ i.replace_with("")
+ except:
+ return soup
+
+# this works, for the single file called
+#with open(sys.argv[1],"r") as infile:
+# lines = infile.read()
+
+with open("output/files-for-timestamps.txt") as f:
+ lines = [line.rstrip() for line in f]
+
+for thisfile in lines:
+ print("Removing useless html in file",thisfile)
+ with open(thisfile) as tf:
+ output=remove_useless(tf.read())
+ with open(thisfile,"w",encoding='utf-8') as tf:
+ tf.write(str(output.prettify()))
bgstack15