aboutsummaryrefslogtreecommitdiff
path: root/savewebfonts_lib.py
diff options
context:
space:
mode:
Diffstat (limited to 'savewebfonts_lib.py')
-rwxr-xr-xsavewebfonts_lib.py202
1 files changed, 135 insertions, 67 deletions
diff --git a/savewebfonts_lib.py b/savewebfonts_lib.py
index 19790df..0c5943e 100755
--- a/savewebfonts_lib.py
+++ b/savewebfonts_lib.py
@@ -10,62 +10,90 @@
# Reference:
# https://github.com/fonttools/fonttools/issues/1694
# Improve:
-# accept a list of filetypes to save, or exclude? Such as, ['ttf','woff2']
-# Convert woff2 fonts?
# Handle using tinycss old?
# Dependencies:
# req-fedora: python3-beautifulsoup4, python3-tinycss2
-# rec-fedora: python3-fonttools
-import requests, os, json, tempfile
+# rec-fedora: python3-fonttools, libeot-tools
+# req-devuan: python3-bs4, python3-tinycss2
+# rec-devuan: python3-fonttools, eot2ttf
+
+import requests, os, json, tempfile, subprocess
from sys import stderr
from bs4 import BeautifulSoup as bs # python3-beautifulsoup4
from urllib.parse import urljoin, urlparse
import tinycss2 # python3-tinycss2
# defaults for library
-debuglevel = 8
-MAX_STRING_PRINT_LENGTH = 180
+class swf_config:
+ def __init__(
+ self
+ , debuglevel = 8
+ , session = None
+ , MAX_STRING_PRINT_LENGTH = 180
+ , eot2ttf_binary = "eot2ttf"
+ , dryrun = False
+ , convert = False
+ ):
+ self.debuglevel = debuglevel
+ self.MAX_STRING_PRINT_LENGTH = MAX_STRING_PRINT_LENGTH
+ self.eot2ttf_binary = eot2ttf_binary
+ self.dryrun = dryrun
+ if session is None:
+ self.session = get_session()
+ else:
+ self.session = session
+ self.convert = convert
+
+ def __repr__(self):
+ response = "<swf_config"
+ for i in self.__dict__:
+ # omit printing session
+ if "session" not in i:
+ response = response + " " + (str(i)) + "=\"" + str(self.__dict__[i]) + "\","
+ response = response.rstrip(",") + ">"
+ return response
# Functions
def eprint(*args, **kwargs):
print(*args, file=stderr, **kwargs)
def ttfify_filename(filename):
- return filename.rstrip(".woff").rstrip(".woff2").rstrip(".svg").rstrip(".eot") + ".ttf"
-
+ response = filename
+ for end in [".woff2",".woff",".eot",".svg"]:
+ if response.endswith(end):
+ response = response[:-len(end)]
+ # For python 3.9 and higher only:
+ #response = response.removesuffix(end)
+ return response + ".ttf"
def get_session():
session = requests.Session()
session.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"
return session
-def list_all_css_for_page(url, session = None, debuglevel = debuglevel, dryrun=False):
+def list_all_css_for_page(url, config):
"""
Return all css links from a given page
"""
# Reference: https://www.thepythoncode.com/article/extract-web-page-script-and-css-files-in-python
css_files = []
- if not session:
- session = get_session()
- html = session.get(url).content
+ html = config.session.get(url).content
soup = bs(html, "html.parser")
for css in soup.find_all("link"):
if ".css" in css.attrs.get("href"):
# if the link tag has the 'href' attribute
css_url = urljoin(url, css.attrs.get("href"))
- if debuglevel >= 8:
+ if config.debuglevel >= 8:
eprint(f"Found css: {css_url}")
css_files.append(css_url)
return css_files
-def get_webfonts_for_one_css(url, session = None, debuglevel = debuglevel, dryrun=False):
+def get_webfonts_for_one_css(url, config):
"""
Return a list of urls of all webfonts specified in this css file
"""
#theseFonts = []
- if not session:
- session = get_session()
- css = session.get(url).content
+ css = config.session.get(url).content
a = tinycss2.parse_stylesheet_bytes(css)
a = a[0]
b = []
@@ -76,7 +104,7 @@ def get_webfonts_for_one_css(url, session = None, debuglevel = debuglevel, dryru
try:
if "at-rule" in i.type and "font-face" in i.at_keyword:
b.append(i)
- if debuglevel >= 10:
+ if config.debuglevel >= 10:
eprint(str(x) + " " + str(i))
except:
pass
@@ -91,13 +119,13 @@ def get_webfonts_for_one_css(url, session = None, debuglevel = debuglevel, dryru
# make absolute from relative
thisurl = urljoin(url,j.value)
if thisurl not in c:
- if debuglevel >= 5:
+ if config.debuglevel >= 5:
eprint(f"get_webfonts_for_one_css: Found font url {thisurl}")
c.append(thisurl)
# c is a flat list of all font files, many of which are duplicates
return c
-def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False, convert=False):
+def save_font(url,destdir,config):
"""
Given a url, and destination dir, and optionally an existing http session, download the url and save to a file. If convert, save any woff/woff2 to ttf.
"""
@@ -108,8 +136,8 @@ def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False, c
filename=""
filename=os.path.basename(urlparse(url).path)
ext = os.path.splitext(filename)[-1]
- # Do not try to convert .eot
- if convert and not filename.endswith(".ttf") and ext not in [".eot"]:
+ # Do not try to convert .svg
+ if config.convert and not filename.endswith(".ttf") and ext not in [".svg"]:
need_convert = True
orig_filename = filename # in case we cannot load library later
filename = ttfify_filename(filename)
@@ -119,14 +147,11 @@ def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False, c
if url.startswith("data:"):
# not supported!
# WORKHERE: support saving to a tempfile this datastream, probably a base64encoded woff file. Then just convert.
- eprint(f"Warning: Url {url[:MAX_STRING_PRINT_LENGTH]} is unsupported.")
+ eprint(f"Warning: Url {url[:config.MAX_STRING_PRINT_LENGTH]} is unsupported.")
else:
- if not dryrun:
+ if not config.dryrun:
# Download content
- if session:
- response = session.get(url)
- else:
- response = requests.get(url)
+ response = config.session.get(url)
if 'Content-Disposition' in response.headers:
filename=response.headers['Content-Disposition']
@@ -136,47 +161,53 @@ def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False, c
filename = ttfify_filename(filename)
filepath = os.path.join(destdir, filename)
- # Future: logic for woff2 to ttf conversion goes here, approximately
-
try:
- if debuglevel >= 1:
- sstring = "Saving" if not dryrun else "Save"
+ if config.debuglevel >= 1:
+ sstring = "Saving" if not config.dryrun else "Save"
eprint(f"{sstring} {url} to file {filepath}")
- if not dryrun:
+ if not config.dryrun:
if not need_convert:
with open(filepath,'wb') as thisfile:
thisfile.write(response.content)
else:
# need_convert is true, and not dryrun, so call function
- try:
- from fontTools import ttLib
- except Exception as e:
- raise e
- convert_font(url,filepath,session=session,debuglevel=debuglevel,dryrun=dryrun)
+ if ext in [".woff",".woff2"]:
+ try:
+ from fontTools import ttLib
+ except Exception as e:
+ raise e
+ convert_woffwoff2_ttf(url,filepath,config=config)
+ elif ext in [".eot"]:
+ convert_eot_ttf(url,filepath,config=config)
+ else:
+ # no plan for conversion!
+ eprint(f"Warning: no conversion plan for ext {ext} of {url}. Saving as-is.")
+ with open(filepath,'wb') as thisfile:
+ thisfile.write(response.content)
return 0
except Exception as E:
eprint(f"Error when downloading {url}, {E}")
return -1
else: # filepath does exist
- if debuglevel >= 2:
+ if config.debuglevel >= 2:
eprint(f"File {filepath} exists for {url}. Skipping.")
return 0
-def get_all_fonts_from_csslist(all_css, session=None, debuglevel=debuglevel, dryrun=False):
+def get_all_fonts_from_csslist(all_css, config):
all_fonts = []
for this_css in all_css:
- webfonts = get_webfonts_for_one_css(this_css, session, debuglevel=debuglevel, dryrun=dryrun)
+ webfonts = get_webfonts_for_one_css(this_css, config)
for webfont in webfonts:
# filter accepted extensions here. Technically fontconfig only uses ttf.
# Always exclude svg, because those are really big, and not usable files for fontconfig.
- # WORKHERE: allow svg, if convert_font works on svg.
+ # WORKHERE: allow svg, if convert_woffwoff2_ttf works on svg.
if webfont not in all_fonts and '.svg' not in webfont:
- if debuglevel >= 2:
+ if config.debuglevel >= 2:
eprint(f"Found font {webfont}")
all_fonts.append(webfont)
return all_fonts
-def save_all_fonts(all_fonts, destdir, session=None, debuglevel=debuglevel, dryrun=False, convert=False):
+def save_all_fonts(all_fonts, destdir, config):
"""
Given a list of font urls, and the destdir, save all these fonts
"""
@@ -189,7 +220,7 @@ def save_all_fonts(all_fonts, destdir, session=None, debuglevel=debuglevel, dryr
raise NotADirectoryError(20,destdir,"Please clean up this non-directory file and try again")
return -1
try:
- if not dryrun:
+ if not config.dryrun:
os.mkdir(destdir)
except FileExistsError:
pass # it already exists
@@ -198,48 +229,44 @@ def save_all_fonts(all_fonts, destdir, session=None, debuglevel=debuglevel, dryr
# Loop through all webfont files and save them
for font in all_fonts:
- save_font(font, destdir, session=session, debuglevel=debuglevel, dryrun=dryrun, convert=convert)
+ save_font(font, destdir, config)
return 0
-def whitelist_page(url, fontdir, session=None, debuglevel=debuglevel, dryrun=False, convert = False):
+def whitelist_page(url, fontdir, config):
"""
For the given URL, Save all listed webfonts to a directory named
after the domain, underneath the given fontdir. If convert, then
convert all woff, woff2 files to ttf using woffTools
"""
all_fonts = []
- if not session:
- session = get_session()
# List all webfonts called by the given page
- all_css = list_all_css_for_page(url, session, debuglevel=debuglevel, dryrun=dryrun)
- all_fonts = get_all_fonts_from_csslist(all_css, session, debuglevel=debuglevel, dryrun=dryrun)
+ all_css = list_all_css_for_page(url, config)
+ all_fonts = get_all_fonts_from_csslist(all_css, config)
# Prepare destination dir
destdir = os.path.join(fontdir,urlparse(url).netloc)
# Save all fonts to that dir
- return save_all_fonts(all_fonts, destdir, session, debuglevel=debuglevel, dryrun=dryrun, convert=convert)
+ return save_all_fonts(all_fonts, destdir, config)
-def whitelist_harfile(harfile, fontdir, session=None, debuglevel=debuglevel, dryrun=False, convert=False):
+def whitelist_harfile(harfile, fontdir, config):
"""
Given the harfile, save all fonts listed in the discovered css files
"""
all_fonts = []
- if not session:
- session = get_session()
# List all css in the har file
- all_css = extract_css_urls_from_harfile(harfile)
- all_fonts = get_all_fonts_from_csslist(all_css, session, debuglevel=debuglevel, dryrun=dryrun)
+ all_css = extract_css_urls_from_harfile(harfile, config)
+ all_fonts = get_all_fonts_from_csslist(all_css, config)
# Prepare destination dir
destdir = os.path.join(fontdir,"harfiles")
# Save all fonts to that dir
- return save_all_fonts(all_fonts, destdir, session, debuglevel=debuglevel, dryrun=dryrun, convert=convert)
+ return save_all_fonts(all_fonts, destdir, config)
-def extract_css_urls_from_harfile(harfile):
+def extract_css_urls_from_harfile(harfile, config):
"""
Extract all urls that match string "css" from a har file
"""
@@ -254,21 +281,18 @@ def extract_css_urls_from_harfile(harfile):
for d in c:
e = c[x]['request']['url']
if "css" in e and e not in css_files:
- if debuglevel >= 5:
+ if config.debuglevel >= 5:
eprint(e)
css_files.append(e)
x = x + 1
return css_files
-def convert_font(url, filename, session=None, debuglevel=debuglevel, dryrun=False):
+def convert_woffwoff2_ttf(url, filename, config):
"""
Save the given url to filename, with filetype ttf
"""
# This will only be called from save_font when dryrun=False, so the dryrun flag here is useful only if called from some other usage.
- if session:
- response = session.get(url)
- else:
- response = requests.get(url)
+ response = config.session.get(url)
try:
from fontTools import ttLib
except ModuleNotFoundError:
@@ -280,10 +304,54 @@ def convert_font(url, filename, session=None, debuglevel=debuglevel, dryrun=Fals
with tempfile.TemporaryFile() as tf:
tf.write(response.content)
font = ttLib.TTFont(tf)
- if debuglevel >= 3:
- eprint(f"Converting {url[:MAX_STRING_PRINT_LENGTH]} from {font.flavor} to ttf as file {filename}")
+ if config.debuglevel >= 3:
+ eprint(f"Converting {url[:config.MAX_STRING_PRINT_LENGTH]} from {font.flavor} to ttf as file {filename}")
font.flavor = None # restores default value, for non-compressed OpenType
font.save(filename)
return 0
+
+def convert_eot_ttf(uri, filename, config):
+ """
+ Save the given uri of an eot file to filename, with filetype ttf
+ """
+ # This will only be called from save_font when dryrun=False, so the dryrun flag here is useful only if called from some other usage.
+ tf = None
+ if "http://" in uri or "https://" in uri or "ftp://" in uri:
+ response = config.session.get(uri)
+ content = response.content
+ tf = tempfile.NamedTemporaryFile()
+ tf.write(content)
+ infile = tf.name # change to use this temp file
+ else:
+ # local file, or some uri scheme not planned yet
+ infile = uri
+ #with open(uri,'rb') as o:
+ # content = o.read()
+ try:
+ r = subprocess.run(["which",config.eot2ttf_binary], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+ if r.returncode != 0:
+ #raise FileNotFoundError(18,"eot2ttf","cannot find")
+ eprint(f"Warning: Cannot convert {uri} because cannot find eot2ttf. Please set --eotbin.")
+ return -1
+ # so proceed
+ except Exception as e:
+ try: # clean up temp file
+ if tf:
+ tf.close()
+ except:
+ pass
+ raise e
+
+ r=subprocess.run([config.eot2ttf_binary,infile,filename])
+ if r.returncode != 0:
+ eprint(f"Warning: eot2ttf failed on {uri}")
+
+ # exit convert_eot_ttf
+ try: # clean up temp file
+ if tf:
+ tf.close()
+ except:
+ pass
+ return 0
bgstack15