#!/usr/bin/env python3 # Startdate: 2020-06-15 # Purpose: fix some images that somehow havea src="data:" that is rendered wrong, but the data-src attribute has the local, valid image uri! from bs4 import BeautifulSoup import re import sys def fiximgs(contents): soup = BeautifulSoup(contents,"html.parser") try: images = soup.find_all("img") for image in images: try: if re.match(".*data:.*",image["src"]): if image["data-src"]: image["src"]=image["data-src"] except: print("Was not able to transfer data-src to src where src contained 'data:'") #print("src",image["src"]) #print("data-src",image.datasrc) except: print("Error of some sort.") return soup with open("output/files-for-timestamps.txt") as f: lines = [line.rstrip() for line in f] for thisfile in lines: print("Fixing images with src=\"data:\" tag",thisfile) with open(thisfile) as tf: output=fiximgs(tf.read()) #with open(thisfile,"w",encoding='utf-8') as tf: with open(thisfile,"w") as tf: tf.write(str(output.prettify()))