diff options
Diffstat (limited to 'savewebfonts_lib.py')
-rwxr-xr-x | savewebfonts_lib.py | 85 |
1 files changed, 71 insertions, 14 deletions
diff --git a/savewebfonts_lib.py b/savewebfonts_lib.py index a8bcb11..19790df 100755 --- a/savewebfonts_lib.py +++ b/savewebfonts_lib.py @@ -8,25 +8,32 @@ # Purpose: library for whitelisting a page's webfonts by downloading them for current user # Usage: See save-webfonts (1) # Reference: +# https://github.com/fonttools/fonttools/issues/1694 # Improve: # accept a list of filetypes to save, or exclude? Such as, ['ttf','woff2'] # Convert woff2 fonts? # Handle using tinycss old? # Dependencies: # req-fedora: python3-beautifulsoup4, python3-tinycss2 -import requests, os, json +# rec-fedora: python3-fonttools +import requests, os, json, tempfile from sys import stderr from bs4 import BeautifulSoup as bs # python3-beautifulsoup4 from urllib.parse import urljoin, urlparse import tinycss2 # python3-tinycss2 -# default for library +# defaults for library debuglevel = 8 +MAX_STRING_PRINT_LENGTH = 180 # Functions def eprint(*args, **kwargs): print(*args, file=stderr, **kwargs) +def ttfify_filename(filename): + return filename.rstrip(".woff").rstrip(".woff2").rstrip(".svg").rstrip(".eot") + ".ttf" + + def get_session(): session = requests.Session() session.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36" @@ -90,20 +97,29 @@ def get_webfonts_for_one_css(url, session = None, debuglevel = debuglevel, dryru # c is a flat list of all font files, many of which are duplicates return c -def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False): +def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False, convert=False): """ - Given a url, and destination dir, and optionally an existing http session, download the url and save to a file + Given a url, and destination dir, and optionally an existing http session, download the url and save to a file. If convert, save any woff/woff2 to ttf. """ + need_convert = False + # Derive filename filename="" filename=os.path.basename(urlparse(url).path) + ext = os.path.splitext(filename)[-1] + # Do not try to convert .eot + if convert and not filename.endswith(".ttf") and ext not in [".eot"]: + need_convert = True + orig_filename = filename # in case we cannot load library later + filename = ttfify_filename(filename) filepath = os.path.join(destdir, filename) if not os.path.exists(filepath): if url.startswith("data:"): # not supported! - eprint(f"Warning: Url {url[:120]} is unsupported.") + # WORKHERE: support saving to a tempfile this datastream, probably a base64encoded woff file. Then just convert. + eprint(f"Warning: Url {url[:MAX_STRING_PRINT_LENGTH]} is unsupported.") else: if not dryrun: # Download content @@ -115,6 +131,9 @@ def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False): if 'Content-Disposition' in response.headers: filename=response.headers['Content-Disposition'] eprint(f"Using content-disposition value of {response.headers['Content-Disposition']}") + if need_convert and not filename.endswith(".ttf"): + orig_filename = filename # in case we cannot load library later + filename = ttfify_filename(filename) filepath = os.path.join(destdir, filename) # Future: logic for woff2 to ttf conversion goes here, approximately @@ -124,8 +143,16 @@ def save_font(url,destdir,session=None, debuglevel = debuglevel, dryrun=False): sstring = "Saving" if not dryrun else "Save" eprint(f"{sstring} {url} to file {filepath}") if not dryrun: - with open(filepath,'wb') as thisfile: - thisfile.write(response.content) + if not need_convert: + with open(filepath,'wb') as thisfile: + thisfile.write(response.content) + else: + # need_convert is true, and not dryrun, so call function + try: + from fontTools import ttLib + except Exception as e: + raise e + convert_font(url,filepath,session=session,debuglevel=debuglevel,dryrun=dryrun) return 0 except Exception as E: eprint(f"Error when downloading {url}, {E}") @@ -142,13 +169,14 @@ def get_all_fonts_from_csslist(all_css, session=None, debuglevel=debuglevel, dry for webfont in webfonts: # filter accepted extensions here. Technically fontconfig only uses ttf. # Always exclude svg, because those are really big, and not usable files for fontconfig. + # WORKHERE: allow svg, if convert_font works on svg. if webfont not in all_fonts and '.svg' not in webfont: if debuglevel >= 2: eprint(f"Found font {webfont}") all_fonts.append(webfont) return all_fonts -def save_all_fonts(all_fonts, destdir, session=None, debuglevel=debuglevel, dryrun=False): +def save_all_fonts(all_fonts, destdir, session=None, debuglevel=debuglevel, dryrun=False, convert=False): """ Given a list of font urls, and the destdir, save all these fonts """ @@ -170,13 +198,14 @@ def save_all_fonts(all_fonts, destdir, session=None, debuglevel=debuglevel, dryr # Loop through all webfont files and save them for font in all_fonts: - save_font(font, destdir, session=session, debuglevel=debuglevel, dryrun=dryrun) + save_font(font, destdir, session=session, debuglevel=debuglevel, dryrun=dryrun, convert=convert) return 0 -def whitelist_page(url, fontdir, session=None, debuglevel=debuglevel, dryrun=False): +def whitelist_page(url, fontdir, session=None, debuglevel=debuglevel, dryrun=False, convert = False): """ For the given URL, Save all listed webfonts to a directory named - after the domain, underneath the given fontdir. + after the domain, underneath the given fontdir. If convert, then + convert all woff, woff2 files to ttf using woffTools """ all_fonts = [] if not session: @@ -190,9 +219,9 @@ def whitelist_page(url, fontdir, session=None, debuglevel=debuglevel, dryrun=Fal destdir = os.path.join(fontdir,urlparse(url).netloc) # Save all fonts to that dir - return save_all_fonts(all_fonts, destdir, session, debuglevel=debuglevel, dryrun=dryrun) + return save_all_fonts(all_fonts, destdir, session, debuglevel=debuglevel, dryrun=dryrun, convert=convert) -def whitelist_harfile(harfile, fontdir, session=None, debuglevel=debuglevel, dryrun=False): +def whitelist_harfile(harfile, fontdir, session=None, debuglevel=debuglevel, dryrun=False, convert=False): """ Given the harfile, save all fonts listed in the discovered css files """ @@ -208,7 +237,7 @@ def whitelist_harfile(harfile, fontdir, session=None, debuglevel=debuglevel, dry destdir = os.path.join(fontdir,"harfiles") # Save all fonts to that dir - return save_all_fonts(all_fonts, destdir, session, debuglevel=debuglevel, dryrun=dryrun) + return save_all_fonts(all_fonts, destdir, session, debuglevel=debuglevel, dryrun=dryrun, convert=convert) def extract_css_urls_from_harfile(harfile): """ @@ -230,3 +259,31 @@ def extract_css_urls_from_harfile(harfile): css_files.append(e) x = x + 1 return css_files + +def convert_font(url, filename, session=None, debuglevel=debuglevel, dryrun=False): + """ + Save the given url to filename, with filetype ttf + """ + # This will only be called from save_font when dryrun=False, so the dryrun flag here is useful only if called from some other usage. + if session: + response = session.get(url) + else: + response = requests.get(url) + try: + from fontTools import ttLib + except ModuleNotFoundError: + eprint("Warning: cannot load fontTools. Try installing python3-fonttools") + return -1 + except Exception as e: + raise e + + with tempfile.TemporaryFile() as tf: + tf.write(response.content) + font = ttLib.TTFont(tf) + if debuglevel >= 3: + eprint(f"Converting {url[:MAX_STRING_PRINT_LENGTH]} from {font.flavor} to ttf as file {filename}") + + font.flavor = None # restores default value, for non-compressed OpenType + font.save(filename) + + return 0 |