#!/usr/bin/env python3
# File ffbm_lib.py
# License: CC-BY-SA 4.0
# Author: bgstack15@gmail.com
# Startdate: 2020-12-12
# Title: Export Firefox Bookmarks to Html
# Purpose: Add favicons to bookmarks in exported html file
# History:
# 2022-03-05 fix even more UTF-8 crap
# 2023-04-01 add bare 17-row Firefox moz_places support. Still need to follow probably origin_id to moz_origins to the icons file to moz_icons?
# 2023-04-02 removing newmoon support to focus on FF icon support
# Usage:
# main command is export_bookmarks_to_html.
# Reference:
# pragma table_info(moz_bookmarks)
# https://stackoverflow.com/questions/464516/firefox-bookmarks-sqlite-structure?rq=1
# https://stackoverflow.com/questions/40408607/attach-database-in-sqlite3-with-python
# https://2.python-requests.org/en/latest/
# https://stackoverflow.com/questions/43446353/converting-png-images-to-base64
# future: https://stackoverflow.com/questions/5119041/how-can-i-get-a-web-sites-favicon
# https://rosettacode.org/wiki/Strip_control_codes_and_extended_characters_from_a_string#Python
# conn.text_factory to fix UTF-8 crap https://stackoverflow.com/questions/22751363/sqlite3-operationalerror-could-not-decode-to-utf-8-column/58891189#58891189
# Improve:
# add to pretty_print the option to select what format the output is in, like "title | url | icon" or similar.
# Documentation:
# Dependencies:
# devuan: python3
import sqlite3, sys, os
from urllib.parse import urlparse
import requests, base64
try:
if "PYTHONIOENCODING" not in os.environ:
print("You should really run this with env var PYTHONIOENCODING=UTF-8\n",sys.stderr)
except:
pass
class Bookmark:
def __init__(self,
bid=None, btype=0, url="", title="", dateAdded=0, lastModified=0, bparent=0, position=0, folder_type=0, favicon="", current_bparent=0
):
if bid == "root":
"""
Make a root bookmark for populating with the contents of a places.sqlite
Pass in just a single string with value "root" to bookmark:
I had to use this if wrapper because __init__ is apparently the one function of a python class that cannot be overloaded.
books = Bookmark("root")
"""
self.bid = 0
self.btype = 0
self.url = "file:///self"
self.bparent = 0
self.position = 0
self.folder_type = 0
self.title = "root"
self.dateAdded = 0
self.lastModified = 0
self.favicon = ""
self.children = []
self.current_bparent = 0
else:
self.bid = bid
self.btype = btype
self.url = url
self.bparent = bparent #int
self.position = position # int
self.folder_type = folder_type
if title is not None:
#self.title = title.encode('utf-8','replace').decode('latin-1','replace')
stripped = lambda s: "".join(i for i in title if 31 < ord(i) < 1024*8)
self.title = stripped(title)
else:
self.title = ""
self.dateAdded = dateAdded # int
self.lastModified = lastModified # int
self.favicon = favicon
self.children = []
self.current_bparent = current_bparent
# make this bookmark object basically act like the list of its children
def count(self):
return len(self.children)
def append(self, newitem):
self.children.append(newitem)
def remove(self, olditem):
self.children.remove(olditem)
def __iter__(self):
return self.children.__iter__()
def __next__(self):
return self.children.__next__()
def __len__(self):
return len(self.children)
def __eq__(self, other):
return self.bid == other.bid
def pretty_print(self,i_count=0):
print(i_count*' |','+',self.title)
i_count+=1
for child in self.children:
child.pretty_print(i_count)
def to_html(self,i_count=0,file=sys.stdout,iconsize=32):
# i_count is used to determine nested level
if i_count == 0:
f = None
if file != sys.stdout:
print("Saving to file {0}".format(file),file=sys.stderr)
f = open(file,'w')
old_stdout = sys.stdout
sys.stdout = f
print("")
print("
{0}".format("Bookmarks"))
print("".format(iconsize))
print("")
print("")
print("")
# print self
# btype == 2 is a folder
if self.btype == 2:
#print("{0}
".format(self.title))
li_start = ""
li_stop = ""
if i_count >= 1:
li_start = ""
li_stop = ""
print("{1}{2}{3}".format(min(i_count,8),li_start,self.title,li_stop))
else:
#print("{0} {1} {2}".format(self.bid,self.btype,self.title))
title = self.title
if title == "":
title = "Untitled"
# specifically exclude a few internal items like "recently bookmarked"
if not self.url.startswith("place:") and not self.url.startswith("about:logopage"):
print(
'{5}'.format(
self.url,
self.dateAdded,
self.lastModified,
self.favicon,
'class="here" ' if self.favicon is not None else ' ',
title,
)
)
# handle children
i_count+=1
if self.count() > 0:
print("")
for child in self.children:
child.to_html(i_count)
if self.count() > 0:
print("
")
# close whole thing
if i_count == 1: # main body is now in i_count == 1
print("")
# close file if it is open
try:
if f:
f.close()
sys.stdout = old_stdout
except:
pass
def sort_children(self):
self.new_children = sorted(self.children, key=lambda r: r.position)
self.children , self.new_children = self.new_children, self.children
self.new_children = None
for child in self.children:
child.sort_children()
def find(self,searchfield,value,i_count=0, debuglev = 0):
if searchfield != "id" and searchfield != "title":
#print("Error: Bookmark",self.bid," with title",self.title,"
print("Error: Bookmark object can only search on fields [\"id\",\"title\"]",file=sys.stderr)
return None
if searchfield == "id":
if self.bid == value:
return self
if debuglev >= 5:
print(i_count,"My id",self.bid,self.title,"is not searched value",value)
for child in self:
if child.bid == value:
#print("...but my child is!",child.bid,child)
return child
a = child.find(searchfield,value,i_count+1)
if a:
return a
# did not find it
return None
elif searchfield == "title":
print("Error: Gotta say unh! Bookmark search for title is not yet implemented.",file=sys.stderr)
return None
# catch-all
return None
def all_children(self):
# goal: return a flat list of all items underneath this one, and its real parent id
self.all_children_list = []
for child in self.children:
self.all_children_list.append(child)
for i in child.all_children():
self.all_children_list.append(i)
#if len(child.all_children()) > 0:
# self.all_children_list.append(child.all_children())
return self.all_children_list
def prune(self, debuglev = 0):
prune_count = 0
i = 0
# using an index instead of "for child in self.children" allows
# me to step backwards one so I don't miss the item right after the one I just deleted.
while i < len(self.children):
child = self.children[i]
prune_count += child.prune(debuglev=debuglev)
if child.bparent != self.bid:
if debuglev >= 5:
print("Removing",child.bid,child.title,"from parent",self.bid,self.title)
prune_count += 1
self.children.remove(child)
i -= 1
i += 1
return prune_count
def unflatten(self,debuglev=0,i_count=0, root = None):
"""
Main task of moving bookmarks to their intended parent bookmark objects.
"""
if debuglev >= 7:
print("{0} {1} has {2} children.".format(self.bid,self.title,self.count()))
safety_limit = 2000
x=0
if i_count == 0:
root = self
for child in self.children:
x += 1
if x < safety_limit:
try:
if debuglev >= 7:
print("{0} {1} is looking for parent {2}".format(child.bid,child.title,child.bparent))
if child.bparent > 0:
try:
thisparent=root.find('id',child.bparent)
try:
if self.bid != child.bparent:
thisparent.append(child)
# the old child will still exist until we run prune() which will remove
# any child child whose bparent is not the same as its parent bid.
if debuglev >= 5:
print("SUCCESS: move",child.bid,child,"to parent",child.bparent,thisparent)
else:
if debuglev >= 6:
print("info: {0} {1} is already underneath parent {2} {3}".format(child.bid,child.title,thisparent.bid,thisparent.title))
except:
if debuglev >= 4:
print("FAIL: move",child.bid,child,"to parent",child.bparent,thisparent)
except:
if debuglev >= 4:
print("Unable to find parent item which should be bid",item.bparent)
except:
if debuglev >= 4:
print("Unable to list bparent for child",child)
pass
else:
if debuglev >= 1:
print("{0} {1} has to stop after {2} children".format(self.bid,self.title,safety_limit),file=sys.stderr)
break
# still within for child in self.children
child.unflatten(debuglev=debuglev, i_count=i_count+1, root=root)
self.prune(debuglev=debuglev)
self.sort_children()
if self.count() == 1:
return self.children[0]
else:
return self
def __str__(self):
encoded = "Encoding error!"
try:
encoded = self.title.encode('utf-8','replace').decode('utf-8','replace')
except:
pass
return encoded
def load_from_places(self, places_file, icons_file = None, limit = 50, fetch_icons=True, debuglev = 0, iconsize = 32, browser = "autodetect"):
# initialize
db_bookmarks = []
db_places = []
db_icons = []
if limit >= 8000:
print("WARNING! Limit is really big, but we will proceed.",file=sys.stderr)
elif limit < 1:
limit = 1
if icons_file is None:
icons_file = places_file.replace("places.","favicons.")
# retrieve contents of sqlite
if debuglev > 4:
print("Using places file {0} and icons_file {1}".format(places_file,icons_file))
with sqlite3.connect(places_file) as conn:
conn.text_factory = lambda b: b.decode(errors = 'ignore')
# detect browser automatically
if browser == 'autodetect':
try:
conn.execute("SELECT syncStatus FROM moz_bookmarks limit 1")
browser = "firefox"
except sqlite3.OperationalError as e:
if str(e).startswith("no such column"):
browser = "palemoon"
else:
print("Other error,",e)
e
if debuglev > 3:
print("Using browser value {0}".format(browser))
# now conduct regular operations
if browser == 'firefox':
raw_bookmarks = conn.execute("SELECT id, type, fk, parent, position, title, keyword_id, folder_type, dateAdded, lastModified FROM moz_bookmarks")
raw_places = conn.execute("SELECT id, url, title FROM moz_places p")
if fetch_icons:
conn.execute(f"""ATTACH "{icons_file}" AS i""")
raw_places = conn.execute("SELECT p.id, p.url, p.title, i.id, i.data FROM moz_places p LEFT join moz_pages_w_icons w on p.url = w.page_url LEFT JOIN moz_icons_to_pages ic ON ic.page_id = w.id LEFT JOIN moz_icons i on i.id = ic.icon_id")
elif browser == 'palemoon':
printf(f"Fatal! Palemoon no longer supported as of version 20230402a.")
return -1
for row in raw_bookmarks:
bid, btype, fk, parent, position, title, keyword_id, folder_type, dateAdded, lastModified = row
db_bookmarks.append(db_bookmark(bid, btype, parent, fk, position, title, keyword_id, folder_type, dateAdded, lastModified))
for row in raw_places:
if fetch_icons:
pid, url, title, iid, icon_data = row
else:
pid, url, title = row
iid = None
icon_data = None
db_places.append(db_place(pid, url, title))
db_icons.append(db_icon(iid, url, icon_data))
## print, to validate
#for bm in db_bookmarks:
# print(bm)
if debuglev >= 1:
print("Found this many bookmarks, places, and icons:")
print(len(db_bookmarks))
print(len(db_places))
if fetch_icons:
print(len(db_icons))
# now all items are loaded.
# flow:
# 1. make new list with Bookmark objects with desired info from all 3 old lists
# 2. move bookmarks with "parent" value to be a child of that parent
# make new list with all info
count = 0
for item in db_bookmarks:
count += 1
if count >= limit:
continue
place = None
icon = None
url = ""
favicon = ""
# print this, so I have something to catch so I can interrupt this process like with | head -n40
if debuglev >= 1:
print("{0}: {1}".format(count,item))
# attempt to find matching place
try:
for tmp_place in db_places:
if tmp_place.pid == item.fk:
place = tmp_place
continue
except:
pass
try:
url = place.url
except:
pass
# attempt to find icon for that place
if fetch_icons:
try:
if place.favicon_id is not None:
for tmp_icon in db_icons:
if tmp_icon.iid == place.favicon_id:
icon = tmp_icon
continue
except:
pass
try:
favicon = icon.icon_url
except:
pass
# use google service to get a favicon for this domain
try:
if favicon == "" and url != "":
domain = urlparse(url).netloc
if domain != "":
#print("Will try to download favicon for domain", domain)
# fetch it from google
# parameter sz is undocumented but works
r = requests.get("http://www.google.com/s2/favicons?sz={0}&domain={1}".format(iconsize,domain))
if r.status_code == 200:
# r.content is the useful stuff
a = base64.b64encode(r.content)
favicon = "data:image/png;base64," + a.decode('utf-8')
else:
print("unable to fetch icon for",domain," because of:",file=sys.stderr)
print(r,file=sys.stderr)
except:
pass
#print("{0} | {1} | {2}".format(item,url,favicon))
self.append(
Bookmark(
item.bid, item.btype, url, item.title, item.dateAdded, item.lastModified, item.bparent, item.position, item.folder_type, favicon
)
)
class db_bookmark:
def __init__(self,bid,btype,bparent,fk,position,title,keyword_id,folder_type,dateAdded,lastModified):
self.bid = bid
self.btype = btype
self.bparent = bparent
self.fk = fk
self.position = position
self.title = title
self.keyword_id = keyword_id
self.folder_type = folder_type
self.dateAdded = dateAdded
self.lastModified = lastModified
def __str__(self):
encoded = "Encoding error on bookmark " + str(self.bid)
try:
encoded = self.title.encode('utf-8','replace') if self.title is not None else "(untitled)"
#encoded = self.title if self.title is not None else "(untitled)"
except:
pass
try:
return encoded.decode('utf-8','replace')
except:
return encoded
class db_place:
def __init__(self,pid,url,title):
self.pid = pid
self.url = url
self.title = title
class db_icon:
def __init__(self,iid,icon_url,data):
self.iid = iid
self.icon_url = icon_url
self.data = data
def export_bookmarks_to_html(places_file, output_file, limit=2000, fetch_icons=True, iconsize=32, debuglev=0):
"""
Export to output_file the html representation of the bookmarks in the input places_file.
This is the main interface from the library.
"""
# prepare the places_file in case we were only handed a profile directory name
if "places.sqlite" not in places_file:
orig_places_file = places_file
places_file += "/places.sqlite"
if not os.path.isfile(places_file):
print("Provided path {0} does not contain a places.sqlite file. Cannot export these bookmarks to html!".format(orig_places_file),file=sys.stderr)
return 1
bm = Bookmark("root")
favicons_file = places_file.replace("places.","favicons.")
bm.load_from_places(places_file, limit=limit, fetch_icons=fetch_icons, iconsize=iconsize, debuglev=debuglev)
b2 = bm.unflatten(debuglev=debuglev)
b2.to_html(file=output_file,iconsize=iconsize)
def find_available_places(basedir=os.path.expanduser("~")):
# return list of places.sqlite that meet the criteria
import pathlib
results = []
for path in pathlib.Path(basedir).rglob('places.sqlite'):
results.append(str(path.parent))
return results