#!/usr/bin/env python3 # File ffbm_lib.py # License: CC-BY-SA 4.0 # Author: bgstack15@gmail.com # Startdate: 2020-12-12 # Title: Export Firefox Bookmarks to Html # Purpose: Add favicons to bookmarks in exported html file # History: # 2022-03-05 fix even more UTF-8 crap # Usage: # main command is export_bookmarks_to_html. # Reference: # pragma table_info(moz_bookmarks) # https://stackoverflow.com/questions/464516/firefox-bookmarks-sqlite-structure?rq=1 # https://stackoverflow.com/questions/40408607/attach-database-in-sqlite3-with-python # https://2.python-requests.org/en/latest/ # https://stackoverflow.com/questions/43446353/converting-png-images-to-base64 # future: https://stackoverflow.com/questions/5119041/how-can-i-get-a-web-sites-favicon # https://rosettacode.org/wiki/Strip_control_codes_and_extended_characters_from_a_string#Python # conn.text_factory to fix UTF-8 crap https://stackoverflow.com/questions/22751363/sqlite3-operationalerror-could-not-decode-to-utf-8-column/58891189#58891189 # Improve: # add to pretty_print the option to select what format the output is in, like "title | url | icon" or similar. # Documentation: # Dependencies: # devuan: python3 import sqlite3, sys, os from urllib.parse import urlparse import requests, base64 try: if "PYTHONIOENCODING" not in os.environ: print("You should really run this with env var PYTHONIOENCODING=UTF-8\n",sys.stderr) except: pass class Bookmark: def __init__(self, bid=None, btype=0, url="", title="", dateAdded=0, lastModified=0, bparent=0, position=0, folder_type=0, favicon="", current_bparent=0 ): if bid == "root": """ Make a root bookmark for populating with the contents of a places.sqlite Pass in just a single string with value "root" to bookmark: I had to use this if wrapper because __init__ is apparently the one function of a python class that cannot be overloaded. books = Bookmark("root") """ self.bid = 0 self.btype = 0 self.url = "file:///self" self.bparent = 0 self.position = 0 self.folder_type = 0 self.title = "root" self.dateAdded = 0 self.lastModified = 0 self.favicon = "" self.children = [] self.current_bparent = 0 else: self.bid = bid self.btype = btype self.url = url self.bparent = bparent #int self.position = position # int self.folder_type = folder_type if title is not None: #self.title = title.encode('utf-8','replace').decode('latin-1','replace') stripped = lambda s: "".join(i for i in title if 31 < ord(i) < 1024*8) self.title = stripped(title) else: self.title = "" self.dateAdded = dateAdded # int self.lastModified = lastModified # int self.favicon = favicon self.children = [] self.current_bparent = current_bparent # make this bookmark object basically act like the list of its children def count(self): return len(self.children) def append(self, newitem): self.children.append(newitem) def remove(self, olditem): self.children.remove(olditem) def __iter__(self): return self.children.__iter__() def __next__(self): return self.children.__next__() def __len__(self): return len(self.children) def __eq__(self, other): return self.bid == other.bid def pretty_print(self,i_count=0): print(i_count*' |','+',self.title) i_count+=1 for child in self.children: child.pretty_print(i_count) def to_html(self,i_count=0,file=sys.stdout,iconsize=32): # i_count is used to determine nested level if i_count == 0: f = None if file != sys.stdout: print("Saving to file {0}".format(file),file=sys.stderr) f = open(file,'w') old_stdout = sys.stdout sys.stdout = f print("") print("{0}".format("Bookmarks")) print("".format(iconsize)) print("") print("") print("") # print self # btype == 2 is a folder if self.btype == 2: #print("

{0}

".format(self.title)) li_start = "" li_stop = "" if i_count >= 1: li_start = "
  • " li_stop = "
  • " print("{1}{2}{3}".format(min(i_count,8),li_start,self.title,li_stop)) else: #print("{0} {1} {2}".format(self.bid,self.btype,self.title)) title = self.title if title == "": title = "Untitled" # specifically exclude a few internal items like "recently bookmarked" if not self.url.startswith("place:") and not self.url.startswith("about:logopage"): print( '
  • {5}
  • '.format( self.url, self.dateAdded, self.lastModified, self.favicon, 'class="here" ' if self.favicon is not None else ' ', title, ) ) # handle children i_count+=1 if self.count() > 0: print("") # close whole thing if i_count == 1: # main body is now in i_count == 1 print("") # close file if it is open try: if f: f.close() sys.stdout = old_stdout except: pass def sort_children(self): self.new_children = sorted(self.children, key=lambda r: r.position) self.children , self.new_children = self.new_children, self.children self.new_children = None for child in self.children: child.sort_children() def find(self,searchfield,value,i_count=0, debuglev = 0): if searchfield != "id" and searchfield != "title": #print("Error: Bookmark",self.bid," with title",self.title," print("Error: Bookmark object can only search on fields [\"id\",\"title\"]",file=sys.stderr) return None if searchfield == "id": if self.bid == value: return self if debuglev >= 5: print(i_count,"My id",self.bid,self.title,"is not searched value",value) for child in self: if child.bid == value: #print("...but my child is!",child.bid,child) return child a = child.find(searchfield,value,i_count+1) if a: return a # did not find it return None elif searchfield == "title": print("Error: Gotta say unh! Bookmark search for title is not yet implemented.",file=sys.stderr) return None # catch-all return None def all_children(self): # goal: return a flat list of all items underneath this one, and its real parent id self.all_children_list = [] for child in self.children: self.all_children_list.append(child) for i in child.all_children(): self.all_children_list.append(i) #if len(child.all_children()) > 0: # self.all_children_list.append(child.all_children()) return self.all_children_list def prune(self, debuglev = 0): prune_count = 0 i = 0 # using an index instead of "for child in self.children" allows # me to step backwards one so I don't miss the item right after the one I just deleted. while i < len(self.children): child = self.children[i] prune_count += child.prune(debuglev=debuglev) if child.bparent != self.bid: if debuglev >= 5: print("Removing",child.bid,child.title,"from parent",self.bid,self.title) prune_count += 1 self.children.remove(child) i -= 1 i += 1 return prune_count def unflatten(self,debuglev=0,i_count=0, root = None): """ Main task of moving bookmarks to their intended parent bookmark objects. """ if debuglev >= 7: print("{0} {1} has {2} children.".format(self.bid,self.title,self.count())) safety_limit = 2000 x=0 if i_count == 0: root = self for child in self.children: x += 1 if x < safety_limit: try: if debuglev >= 7: print("{0} {1} is looking for parent {2}".format(child.bid,child.title,child.bparent)) if child.bparent > 0: try: thisparent=root.find('id',child.bparent) try: if self.bid != child.bparent: thisparent.append(child) # the old child will still exist until we run prune() which will remove # any child child whose bparent is not the same as its parent bid. if debuglev >= 5: print("SUCCESS: move",child.bid,child,"to parent",child.bparent,thisparent) else: if debuglev >= 6: print("info: {0} {1} is already underneath parent {2} {3}".format(child.bid,child.title,thisparent.bid,thisparent.title)) except: if debuglev >= 4: print("FAIL: move",child.bid,child,"to parent",child.bparent,thisparent) except: if debuglev >= 4: print("Unable to find parent item which should be bid",item.bparent) except: if debuglev >= 4: print("Unable to list bparent for child",child) pass else: if debuglev >= 1: print("{0} {1} has to stop after {2} children".format(self.bid,self.title,safety_limit),file=sys.stderr) break # still within for child in self.children child.unflatten(debuglev=debuglev, i_count=i_count+1, root=root) self.prune(debuglev=debuglev) self.sort_children() if self.count() == 1: return self.children[0] else: return self def __str__(self): encoded = "Encoding error!" try: encoded = self.title.encode('utf-8','replace').decode('utf-8','replace') except: pass return encoded def load_from_places(self, places_file, icons_file = None, limit = 50, fetch_icons=True, debuglev = 0, iconsize = 32, browser = "autodetect"): # initialize db_bookmarks = [] db_places = [] db_icons = [] if limit >= 8000: print("WARNING! Limit is really big, but we will proceed.",file=sys.stderr) elif limit < 1: limit = 1 if icons_file is None: icons_file = places_file.replace("places.","favicons.") # retrieve contents of sqlite if debuglev > 4: print("Using places file {0} and icons_file {1}".format(places_file,icons_file)) with sqlite3.connect(places_file) as conn: conn.text_factory = lambda b: b.decode(errors = 'ignore') # detect browser automatically if browser == 'autodetect': try: conn.execute("SELECT syncStatus FROM moz_bookmarks limit 1") browser = "firefox" except sqlite3.OperationalError as e: if str(e).startswith("no such column"): browser = "palemoon" else: print("Other error,",e) e if debuglev > 3: print("Using browser value {0}".format(browser)) # now conduct regular operations if browser == 'firefox': raw_bookmarks= conn.execute("SELECT * FROM moz_bookmarks") raw_places = conn.execute("SELECT * FROM moz_places") elif browser == 'palemoon': raw_bookmarks= conn.execute("SELECT id,type,fk,parent,position,title,keyword_id,folder_type,dateAdded,lastModified,guid FROM moz_bookmarks") raw_places = conn.execute("SELECT id,url,title,rev_host,visit_count,hidden,typed,favicon_id,frecency,last_visit_date,guid,foreign_count,url_hash FROM moz_places") if fetch_icons and browser != "palemoon": with sqlite3.connect(icons_file) as conn: raw_icons = conn.execute("SELECT * FROM moz_icons") if browser == "firefox": for row in raw_bookmarks: bid, btype, parent, fk, position, title, keyword_id, folder_type, dateAdded, lastModified, guid, syncStatus, syncChangeCounter = row db_bookmarks.append(db_bookmark(bid, btype, parent, fk, position, title, keyword_id, folder_type, dateAdded, lastModified, guid, syncStatus, syncChangeCounter)) for row in raw_places: if (len(row)) == 15: # LibreWolf, observed 2021-12-02 has no column for favicon_id. pid, url, title, rev_host, visit_count, hidden, typed, frecency, last_visit_date, guid, foreign_count, url_hash, description, preview_image_url, origin_id = row favicon_id = 0 else: pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash, description, preview_image_url, origin_id = row db_places.append(db_place(pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash, description, preview_image_url, origin_id)) elif browser == "palemoon": for row in raw_bookmarks: bid, btype, fk, parent, position, title, keyword_id, folder_type, dateAdded, lastModified, guid = row db_bookmarks.append(db_bookmark(bid, btype, fk, parent, position, title, keyword_id, folder_type, dateAdded, lastModified, guid)) for row in raw_places: pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash = row db_places.append(db_place(pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash, "", "", 0)) if fetch_icons and (browser != "palemoon"): for row in raw_icons: iid, icon_url, fixed_icon_url, width, root, color, expire_ms, data = row db_icons.append(db_icon(iid, icon_url, fixed_icon_url, width, root, color, expire_ms, data)) ## print, to validate #for bm in db_bookmarks: # print(bm) if debuglev >= 1: print("Found this many bookmarks, places, and icons:") print(len(db_bookmarks)) print(len(db_places)) if fetch_icons: print(len(db_icons)) # now all items are loaded. # flow: # 1. make new list with Bookmark objects with desired info from all 3 old lists # 2. move bookmarks with "parent" value to be a child of that parent # make new list with all info count = 0 for item in db_bookmarks: count += 1 if count >= limit: continue place = None icon = None url = "" favicon = "" # print this, so I have something to catch so I can interrupt this process like with | head -n40 if debuglev >= 1: print("{0}: {1}".format(count,item)) # attempt to find matching place try: for tmp_place in db_places: if tmp_place.pid == item.fk: place = tmp_place continue except: pass try: url = place.url except: pass # attempt to find icon for that place if fetch_icons: try: if place.favicon_id is not None: for tmp_icon in db_icons: if tmp_icon.iid == place.favicon_id: icon = tmp_icon continue except: pass try: favicon = icon.icon_url except: pass # use google service to get a favicon for this domain try: if favicon == "" and url != "": domain = urlparse(url).netloc if domain != "": #print("Will try to download favicon for domain", domain) # fetch it from google # parameter sz is undocumented but works r = requests.get("http://www.google.com/s2/favicons?sz={0}&domain={1}".format(iconsize,domain)) if r.status_code == 200: # r.content is the useful stuff a = base64.b64encode(r.content) favicon = "data:image/png;base64," + a.decode('utf-8') else: print("unable to fetch icon for",domain," because of:",file=sys.stderr) print(r,file=sys.stderr) except: pass #print("{0} | {1} | {2}".format(item,url,favicon)) self.append( Bookmark( item.bid, item.btype, url, item.title, item.dateAdded, item.lastModified, item.bparent, item.position, item.folder_type, favicon ) ) class db_bookmark: def __init__(self,bid,btype,fk,bparent,position,title,keyword_id,folder_type,dateAdded,lastModified,guid,syncStatus="",syncChangeCounter=""): self.bid = bid self.btype = btype self.bparent = bparent self.fk = fk self.position = position self.title = title self.keyword_id = keyword_id self.folder_type = folder_type self.dateAdded = dateAdded self.lastModified = lastModified self.guid = guid self.syncStatus = syncStatus self.syncChangeCounter = syncChangeCounter def __str__(self): encoded = "Encoding error on bookmark " + str(self.bid) try: encoded = self.title.encode('utf-8','replace') if self.title is not None else "(untitled)" #encoded = self.title if self.title is not None else "(untitled)" except: pass try: return encoded.decode('utf-8','replace') except: return encoded class db_place: def __init__(self,pid,url,title,rev_host,visit_count,hidden,typed,favicon_id,frecency,last_visit_date,guid,foreign_count,url_hash,description,preview_image_url,origin_id): self.pid = pid self.url = url self.title = title self.rev_host = rev_host self.visit_count = visit_count self.hidden = hidden self.typed = typed self.favicon_id = favicon_id self.frecency = frecency self.last_visit_date = last_visit_date self.guid = guid self.foreign_count = foreign_count self.url_hash = url_hash self.description = description self.preview_image_url = preview_image_url self.origin_id = origin_id class db_icon: def __init__(self,iid,icon_url,fixed_icon_url,width,root,color,expire_ms,data): self.iid = iid self.icon_url = icon_url self.fixed_icon_url = fixed_icon_url self.width = width self.root = root self.color = color self.expire_ms = expire_ms self.data = data def export_bookmarks_to_html(places_file, output_file, limit=2000, fetch_icons=True, iconsize=32, debuglev=0): """ Export to output_file the html representation of the bookmarks in the input places_file. This is the main interface from the library. """ # prepare the places_file in case we were only handed a profile directory name if "places.sqlite" not in places_file: orig_places_file = places_file places_file += "/places.sqlite" if not os.path.isfile(places_file): print("Provided path {0} does not contain a places.sqlite file. Cannot export these bookmarks to html!".format(orig_places_file),file=sys.stderr) return 1 bm = Bookmark("root") favicons_file = places_file.replace("places.","favicons.") bm.load_from_places(places_file, limit=limit, fetch_icons=fetch_icons, iconsize=iconsize, debuglev=debuglev) b2 = bm.unflatten(debuglev=debuglev) b2.to_html(file=output_file,iconsize=iconsize) def find_available_places(basedir=os.path.expanduser("~")): # return list of places.sqlite that meet the criteria import pathlib results = [] for path in pathlib.Path(basedir).rglob('places.sqlite'): results.append(str(path.parent)) return results