summaryrefslogtreecommitdiff
path: root/use-datasrc-instead-src.py
blob: 90fca9fee51b8be0e5041b3c03e833aef00a3ded (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/usr/bin/env python3
# Startdate: 2020-06-15
# Purpose: fix some images that somehow havea  src="data:" that is rendered wrong, but the data-src attribute has the local, valid image uri!
from bs4 import BeautifulSoup
import re
import sys

def fiximgs(contents):
   soup = BeautifulSoup(contents,"html.parser")
   try:
      images = soup.find_all("img")
      for image in images:
         try:
            if re.match(".*data:.*",image["src"]):
               if image["data-src"]:
                  image["src"]=image["data-src"]
         except:
            print("Was not able to transfer data-src to src where src contained 'data:'")
         #print("src",image["src"])
         #print("data-src",image.datasrc)
   except:
      print("Error of some sort.")
   return soup

with open("output/files-for-timestamps.txt") as f:
   lines = [line.rstrip() for line in f]

for thisfile in lines:
   print("Fixing images with src=\"data:\" tag",thisfile)
   with open(thisfile) as tf:
      output=fiximgs(tf.read())
   #with open(thisfile,"w",encoding='utf-8') as tf:
   with open(thisfile,"w") as tf:
      tf.write(str(output.prettify()))
bgstack15