aboutsummaryrefslogtreecommitdiff
path: root/savewebfonts_lib.py
diff options
context:
space:
mode:
Diffstat (limited to 'savewebfonts_lib.py')
-rwxr-xr-xsavewebfonts_lib.py85
1 files changed, 71 insertions, 14 deletions
diff --git a/savewebfonts_lib.py b/savewebfonts_lib.py
index a8bcb11..19790df 100755
--- a/savewebfonts_lib.py
+++ b/savewebfonts_lib.py
@@ -8,25 +8,32 @@
# Purpose: library for whitelisting a page's webfonts by downloading them for current user
# Usage: See save-webfonts (1)
# Reference:
+# https://github.com/fonttools/fonttools/issues/1694
# Improve:
# accept a list of filetypes to save, or exclude? Such as, ['ttf','woff2']
# Convert woff2 fonts?
# Handle using tinycss old?
# Dependencies:
# req-fedora: python3-beautifulsoup4, python3-tinycss2
-import requests, os, json
+# rec-fedora: python3-fonttools
+import requests, os, json, tempfile
from sys import stderr
from bs4 import BeautifulSoup as bs # python3-beautifulsoup4
from urllib.parse import urljoin, urlparse
import tinycss2 # python3-tinycss2
-# default for library
+# defaults for library
debuglevel = 8
+MAX_STRING_PRINT_LENGTH = 180
# Functions
def eprint(*args, **kwargs):
print(*args, file=stderr, **kwargs)
+def ttfify_filename(filename):
+ return filename.rstrip(".woff").rstrip(".woff2").rstrip(".svg").rstrip(".eot") + ".ttf"
+
+
def get_session():
session = requests.Session()
session.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"
@@ -90,20 +97,29 @@ def get_webfonts_for_one_css(url, session = None, debuglevel = debuglevel, dryru
# c is a flat list of all font files, many of which are duplicates
return c
-def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False):
+def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False, convert=False):
"""
- Given a url, and destination dir, and optionally an existing http session, download the url and save to a file
+ Given a url, and destination dir, and optionally an existing http session, download the url and save to a file. If convert, save any woff/woff2 to ttf.
"""
+ need_convert = False
+
# Derive filename
filename=""
filename=os.path.basename(urlparse(url).path)
+ ext = os.path.splitext(filename)[-1]
+ # Do not try to convert .eot
+ if convert and not filename.endswith(".ttf") and ext not in [".eot"]:
+ need_convert = True
+ orig_filename = filename # in case we cannot load library later
+ filename = ttfify_filename(filename)
filepath = os.path.join(destdir, filename)
if not os.path.exists(filepath):
if url.startswith("data:"):
# not supported!
- eprint(f"Warning: Url {url[:120]} is unsupported.")
+ # WORKHERE: support saving to a tempfile this datastream, probably a base64encoded woff file. Then just convert.
+ eprint(f"Warning: Url {url[:MAX_STRING_PRINT_LENGTH]} is unsupported.")
else:
if not dryrun:
# Download content
@@ -115,6 +131,9 @@ def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False):
if 'Content-Disposition' in response.headers:
filename=response.headers['Content-Disposition']
eprint(f"Using content-disposition value of {response.headers['Content-Disposition']}")
+ if need_convert and not filename.endswith(".ttf"):
+ orig_filename = filename # in case we cannot load library later
+ filename = ttfify_filename(filename)
filepath = os.path.join(destdir, filename)
# Future: logic for woff2 to ttf conversion goes here, approximately
@@ -124,8 +143,16 @@ def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False):
sstring = "Saving" if not dryrun else "Save"
eprint(f"{sstring} {url} to file {filepath}")
if not dryrun:
- with open(filepath,'wb') as thisfile:
- thisfile.write(response.content)
+ if not need_convert:
+ with open(filepath,'wb') as thisfile:
+ thisfile.write(response.content)
+ else:
+ # need_convert is true, and not dryrun, so call function
+ try:
+ from fontTools import ttLib
+ except Exception as e:
+ raise e
+ convert_font(url,filepath,session=session,debuglevel=debuglevel,dryrun=dryrun)
return 0
except Exception as E:
eprint(f"Error when downloading {url}, {E}")
@@ -142,13 +169,14 @@ def get_all_fonts_from_csslist(all_css, session=None, debuglevel=debuglevel, dry
for webfont in webfonts:
# filter accepted extensions here. Technically fontconfig only uses ttf.
# Always exclude svg, because those are really big, and not usable files for fontconfig.
+ # WORKHERE: allow svg, if convert_font works on svg.
if webfont not in all_fonts and '.svg' not in webfont:
if debuglevel >= 2:
eprint(f"Found font {webfont}")
all_fonts.append(webfont)
return all_fonts
-def save_all_fonts(all_fonts, destdir, session=None, debuglevel=debuglevel, dryrun=False):
+def save_all_fonts(all_fonts, destdir, session=None, debuglevel=debuglevel, dryrun=False, convert=False):
"""
Given a list of font urls, and the destdir, save all these fonts
"""
@@ -170,13 +198,14 @@ def save_all_fonts(all_fonts, destdir, session=None, debuglevel=debuglevel, dryr
# Loop through all webfont files and save them
for font in all_fonts:
- save_font(font, destdir, session=session, debuglevel=debuglevel, dryrun=dryrun)
+ save_font(font, destdir, session=session, debuglevel=debuglevel, dryrun=dryrun, convert=convert)
return 0
-def whitelist_page(url, fontdir, session=None, debuglevel=debuglevel, dryrun=False):
+def whitelist_page(url, fontdir, session=None, debuglevel=debuglevel, dryrun=False, convert = False):
"""
For the given URL, Save all listed webfonts to a directory named
- after the domain, underneath the given fontdir.
+ after the domain, underneath the given fontdir. If convert, then
+ convert all woff, woff2 files to ttf using woffTools
"""
all_fonts = []
if not session:
@@ -190,9 +219,9 @@ def whitelist_page(url, fontdir, session=None, debuglevel=debuglevel, dryrun=Fal
destdir = os.path.join(fontdir,urlparse(url).netloc)
# Save all fonts to that dir
- return save_all_fonts(all_fonts, destdir, session, debuglevel=debuglevel, dryrun=dryrun)
+ return save_all_fonts(all_fonts, destdir, session, debuglevel=debuglevel, dryrun=dryrun, convert=convert)
-def whitelist_harfile(harfile, fontdir, session=None, debuglevel=debuglevel, dryrun=False):
+def whitelist_harfile(harfile, fontdir, session=None, debuglevel=debuglevel, dryrun=False, convert=False):
"""
Given the harfile, save all fonts listed in the discovered css files
"""
@@ -208,7 +237,7 @@ def whitelist_harfile(harfile, fontdir, session=None, debuglevel=debuglevel, dry
destdir = os.path.join(fontdir,"harfiles")
# Save all fonts to that dir
- return save_all_fonts(all_fonts, destdir, session, debuglevel=debuglevel, dryrun=dryrun)
+ return save_all_fonts(all_fonts, destdir, session, debuglevel=debuglevel, dryrun=dryrun, convert=convert)
def extract_css_urls_from_harfile(harfile):
"""
@@ -230,3 +259,31 @@ def extract_css_urls_from_harfile(harfile):
css_files.append(e)
x = x + 1
return css_files
+
+def convert_font(url, filename, session=None, debuglevel=debuglevel, dryrun=False):
+ """
+ Save the given url to filename, with filetype ttf
+ """
+ # This will only be called from save_font when dryrun=False, so the dryrun flag here is useful only if called from some other usage.
+ if session:
+ response = session.get(url)
+ else:
+ response = requests.get(url)
+ try:
+ from fontTools import ttLib
+ except ModuleNotFoundError:
+ eprint("Warning: cannot load fontTools. Try installing python3-fonttools")
+ return -1
+ except Exception as e:
+ raise e
+
+ with tempfile.TemporaryFile() as tf:
+ tf.write(response.content)
+ font = ttLib.TTFont(tf)
+ if debuglevel >= 3:
+ eprint(f"Converting {url[:MAX_STRING_PRINT_LENGTH]} from {font.flavor} to ttf as file {filename}")
+
+ font.flavor = None # restores default value, for non-compressed OpenType
+ font.save(filename)
+
+ return 0
bgstack15