diff options
Diffstat (limited to 'fix-timestamps.py')
-rwxr-xr-x | fix-timestamps.py | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/fix-timestamps.py b/fix-timestamps.py new file mode 100755 index 0000000..a564257 --- /dev/null +++ b/fix-timestamps.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# Startdate: 2020-05-29 20:40 +# Purpose: convert timestamps on gitlab issue web page into UTC +# History: +# 2020-05-30 09:24 add loop through files listed in output/files-for-timestamps.txt +# Usage: +# ls -1 /mnt/public/www/issues/output*.html > output/files-for-timestamps.txt +# ./fix-timestamps.py +# References: +# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#pretty-printing +# https://gitlab.com/bgstack15/vooblystats/-/blob/master/vooblystats.py +# https://bgstack15.wordpress.com/2020/02/16/python3-convert-relative-date-to-utc-timestamp/ +# Improve: +# this is hardcoded to work when the pages are shown in EDT. +from bs4 import BeautifulSoup +from datetime import timedelta +from parsedatetime import Calendar +from pytz import timezone + +def fix_timestamps(page_text): + soup = BeautifulSoup(page_text,"html.parser") + cal = Calendar() + x = 0 + for i in soup.find_all(name='time'): + x = x + 1 + j = i.attrs["data-original-title"] + if 'EDT' == j[-3:] or 'EST' == j[-3:]: + tzobject=timezone("US/Eastern") + else: + tzobject=timezone("UTC") + dto, _ = cal.parseDT(datetimeString=j,tzinfo=timezone("US/Eastern")) + add_hours = int((str(dto)[-6:])[:3]) + j = (timedelta(hours=-add_hours) + dto).strftime('%Y-%m-%dT%H:%MZ') + # second precision %S is not needed for this use case. + i.string = j + return soup + +with open("output/files-for-timestamps.txt") as f: + lines = [line.rstrip() for line in f] + +for thisfile in lines: + print("Fixing timestamps in file",thisfile) + with open(thisfile) as tf: + output=fix_timestamps(tf.read()) + with open(thisfile,"w",encoding='utf-8') as tf: + tf.write(str(output.prettify())) |