blob: e68f45824b8e14e89af029bd50fef84993a78742 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
#!/usr/bin/env python3
# Startdate: 2020-05-30 19:30
# Purpose: remove key, useless html elements from slurped pages
from bs4 import BeautifulSoup
import sys
def remove_useless(contents):
soup = BeautifulSoup(contents,"html.parser")
try:
sidebar = soup.find(class_="nav-sidebar")
sidebar.replace_with("")
except:
pass
try:
navbar = soup.find(class_="navbar-gitlab")
navbar.replace_with("")
except:
pass
try:
rightbar = soup.find(class_="issuable-context-form")
rightbar.replace_with("")
except:
pass
try:
rightbar = soup.find(class_="js-issuable-sidebar")
rightbar.replace_with("")
except:
pass
try:
rightbar = soup.find(class_="js-issuable-actions")
rightbar.replace_with("")
except:
pass
try:
rightbar = soup.find(class_="js-noteable-awards")
rightbar.replace_with("")
except:
pass
try:
rightbar = soup.find(class_="disabled-comment")
rightbar.replace_with("")
except:
pass
try:
rightbar = soup.find(class_="notes-form")
rightbar.replace_with("")
except:
pass
try:
rightbar = soup.find(class_="btn-edit")
rightbar.replace_with("")
except:
pass
try:
rightbar = soup.find(class_="js-issuable-edit")
rightbar.replace_with("")
except:
pass
try:
mylist = soup.find_all(class_="note-actions")
for i in mylist:
i.replace_with("")
except:
pass
try:
mylist = soup.find_all(class_="emoji-block")
for i in mylist:
i.replace_with("")
except:
return soup
# this works, for the single file called
#with open(sys.argv[1],"r") as infile:
# lines = infile.read()
with open("output/files-for-timestamps.txt") as f:
lines = [line.rstrip() for line in f]
for thisfile in lines:
print("Removing useless html in file",thisfile)
with open(thisfile) as tf:
output=remove_useless(tf.read())
with open(thisfile,"w",encoding='utf-8') as tf:
tf.write(str(output.prettify()))
|