blob: a5642579527c1b335563bf34cbd60447d93c76ed (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
#!/usr/bin/env python3
# Startdate: 2020-05-29 20:40
# Purpose: convert timestamps on gitlab issue web page into UTC
# History:
# 2020-05-30 09:24 add loop through files listed in output/files-for-timestamps.txt
# Usage:
# ls -1 /mnt/public/www/issues/output*.html > output/files-for-timestamps.txt
# ./fix-timestamps.py
# References:
# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#pretty-printing
# https://gitlab.com/bgstack15/vooblystats/-/blob/master/vooblystats.py
# https://bgstack15.wordpress.com/2020/02/16/python3-convert-relative-date-to-utc-timestamp/
# Improve:
# this is hardcoded to work when the pages are shown in EDT.
from bs4 import BeautifulSoup
from datetime import timedelta
from parsedatetime import Calendar
from pytz import timezone
def fix_timestamps(page_text):
soup = BeautifulSoup(page_text,"html.parser")
cal = Calendar()
x = 0
for i in soup.find_all(name='time'):
x = x + 1
j = i.attrs["data-original-title"]
if 'EDT' == j[-3:] or 'EST' == j[-3:]:
tzobject=timezone("US/Eastern")
else:
tzobject=timezone("UTC")
dto, _ = cal.parseDT(datetimeString=j,tzinfo=timezone("US/Eastern"))
add_hours = int((str(dto)[-6:])[:3])
j = (timedelta(hours=-add_hours) + dto).strftime('%Y-%m-%dT%H:%MZ')
# second precision %S is not needed for this use case.
i.string = j
return soup
with open("output/files-for-timestamps.txt") as f:
lines = [line.rstrip() for line in f]
for thisfile in lines:
print("Fixing timestamps in file",thisfile)
with open(thisfile) as tf:
output=fix_timestamps(tf.read())
with open(thisfile,"w",encoding='utf-8') as tf:
tf.write(str(output.prettify()))
|