summaryrefslogtreecommitdiff
path: root/remove-useless.py
blob: a5dd17e5e887a6fc6e27483ae4023536cbac1f25 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python3
# Startdate: 2020-05-30 19:30
# Purpose: remove key, useless html elements from slurped pages
from bs4 import BeautifulSoup
import sys

def remove_useless(contents):
   soup = BeautifulSoup(contents,"html.parser")
   try:
      sidebar = soup.find(class_="nav-sidebar")
      sidebar.replace_with("")
   except:
      pass
   try:
      navbar = soup.find(class_="navbar-gitlab")
      navbar.replace_with("")
   except:
      pass
   try:
      rightbar = soup.find(class_="issuable-context-form")
      rightbar.replace_with("")
   except:
      pass
   try:
      rightbar = soup.find(class_="js-issuable-sidebar")
      rightbar.replace_with("")
   except:
      pass
   try:
      rightbar = soup.find(class_="js-issuable-actions")
      rightbar.replace_with("")
   except:
      pass
   try:
      rightbar = soup.find(class_="js-noteable-awards")
      rightbar.replace_with("")
   except:
      pass
   try:
      rightbar = soup.find(class_="disabled-comment")
      rightbar.replace_with("")
   except:
      pass
   try:
      rightbar = soup.find(class_="notes-form")
      rightbar.replace_with("")
   except:
      pass
   try:
      rightbar = soup.find(class_="btn-edit")
      rightbar.replace_with("")
   except:
      pass
   try:
      rightbar = soup.find(class_="js-issuable-edit")
      rightbar.replace_with("")
   except:
      pass
   try:
      mylist = soup.find_all(class_="note-actions")
      for i in mylist:
         i.replace_with("")
   except:
      pass
   try:
      mylist = soup.find_all(class_="emoji-block")
      for i in mylist:
         i.replace_with("")
   except:
      pass
   try:
      mylist = soup.find_all(class_="broadcast-message")
      for i in mylist:
         i.replace_with("")
   except:
      pass
   return soup

# this works, for the single file called
#with open(sys.argv[1],"r") as infile:
#   lines = infile.read()

with open("output/files-for-timestamps.txt") as f:
   lines = [line.rstrip() for line in f]

for thisfile in lines:
   print("Removing useless html in file",thisfile)
   with open(thisfile) as tf:
      output=remove_useless(tf.read())
   with open(thisfile,"w",encoding='utf-8') as tf:
      tf.write(str(output.prettify()))
bgstack15