#!/usr/bin/env python3 # Startdate: 2020-05-29 20:40 # Purpose: convert timestamps on gitlab issue web page into UTC # History: # 2020-05-30 09:24 add loop through files listed in output/files-for-timestamps.txt # Usage: # ls -1 /mnt/public/www/gitlab-issues/*.html > output/files-for-timestamps.txt # ./fix-timestamps.py # References: # https://www.crummy.com/software/BeautifulSoup/bs4/doc/#pretty-printing # https://gitlab.com/bgstack15/vooblystats/-/blob/master/vooblystats.py # https://bgstack15.wordpress.com/2020/02/16/python3-convert-relative-date-to-utc-timestamp/ # Improve: # this is hardcoded to work when the pages are shown in EDT. from bs4 import BeautifulSoup from datetime import timedelta from parsedatetime import Calendar from pytz import timezone def fix_timestamps(page_text): soup = BeautifulSoup(page_text,"html.parser") cal = Calendar() x = 0 for i in soup.find_all(name='time'): x = x + 1 j = i.attrs["data-original-title"] if 'EDT' == j[-3:] or 'EST' == j[-3:]: tzobject=timezone("US/Eastern") else: tzobject=timezone("UTC") dto, _ = cal.parseDT(datetimeString=j,tzinfo=timezone("US/Eastern")) add_hours = int((str(dto)[-6:])[:3]) j = (timedelta(hours=-add_hours) + dto).strftime('%Y-%m-%dT%H:%MZ') # second precision %S is not needed for this use case. i.string = j return soup with open("output/files-for-timestamps.txt") as f: lines = [line.rstrip() for line in f] for thisfile in lines: print("Fixing timestamps in file",thisfile) with open(thisfile) as tf: output=fix_timestamps(tf.read()) with open(thisfile,"w",encoding='utf-8') as tf: tf.write(str(output.prettify()))