aboutsummaryrefslogtreecommitdiff
path: root/mbbmlib.py
diff options
context:
space:
mode:
authorB Stack <bgstack15@gmail.com>2020-12-13 22:23:47 -0500
committerB Stack <bgstack15@gmail.com>2020-12-13 22:26:15 -0500
commitd349db57519597a7d528c96d7db2d74116dae737 (patch)
treee9f6280dcf5cf7b24fec8f726bf68c441e96b9b2 /mbbmlib.py
downloadmbbmlib-d349db57519597a7d528c96d7db2d74116dae737.tar.gz
mbbmlib-d349db57519597a7d528c96d7db2d74116dae737.tar.bz2
mbbmlib-d349db57519597a7d528c96d7db2d74116dae737.zip
initial commit
Diffstat (limited to 'mbbmlib.py')
-rw-r--r--mbbmlib.py519
1 files changed, 519 insertions, 0 deletions
diff --git a/mbbmlib.py b/mbbmlib.py
new file mode 100644
index 0000000..68cbca2
--- /dev/null
+++ b/mbbmlib.py
@@ -0,0 +1,519 @@
+#!/usr/bin/env python3
+# File ffbm_lib.py
+# License: CC-BY-SA 4.0
+# Author: bgstack15@gmail.com
+# Startdate: 2020-12-12
+# Title: Export Firefox Bookmarks to Html
+# Purpose: Add favicons to bookmarks in exported html file
+# History:
+# Usage:
+# main command is export_bookmarks_to_html.
+# Reference:
+# pragma table_info(moz_bookmarks)
+# https://stackoverflow.com/questions/464516/firefox-bookmarks-sqlite-structure?rq=1
+# https://stackoverflow.com/questions/40408607/attach-database-in-sqlite3-with-python
+# https://2.python-requests.org/en/latest/
+# https://stackoverflow.com/questions/43446353/converting-png-images-to-base64
+# future: https://stackoverflow.com/questions/5119041/how-can-i-get-a-web-sites-favicon
+# https://rosettacode.org/wiki/Strip_control_codes_and_extended_characters_from_a_string#Python
+# Improve:
+# add to pretty_print the option to select what format the output is in, like "title | url | icon" or similar.
+# Documentation:
+# Dependencies:
+# devuan: python3
+import sqlite3, sys, os
+from urllib.parse import urlparse
+import requests, base64
+
+try:
+ if "PYTHONIOENCODING" not in os.environ:
+ print("You should really run this with env var PYTHONIOENCODING=UTF-8\n",sys.stderr)
+except:
+ pass
+
+class Bookmark:
+ def __init__(self,
+ bid=None, btype=0, url="", title="", dateAdded=0, lastModified=0, bparent=0, position=0, folder_type=0, favicon="", current_bparent=0
+ ):
+ if bid == "root":
+ """
+ Make a root bookmark for populating with the contents of a places.sqlite
+ Pass in just a single string with value "root" to bookmark:
+ I had to use this if wrapper because __init__ is apparently the one function of a python class that cannot be overloaded.
+ books = Bookmark("root")
+ """
+ self.bid = 0
+ self.btype = 0
+ self.url = "file:///self"
+ self.bparent = 0
+ self.position = 0
+ self.folder_type = 0
+ self.title = "root"
+ self.dateAdded = 0
+ self.lastModified = 0
+ self.favicon = ""
+ self.children = []
+ self.current_bparent = 0
+ else:
+ self.bid = bid
+ self.btype = btype
+ self.url = url
+ self.bparent = bparent #int
+ self.position = position # int
+ self.folder_type = folder_type
+ if title is not None:
+ #self.title = title.encode('utf-8','replace').decode('latin-1','replace')
+ stripped = lambda s: "".join(i for i in title if 31 < ord(i) < 1024*8)
+ self.title = stripped(title)
+ else:
+ self.title = ""
+ self.dateAdded = dateAdded # int
+ self.lastModified = lastModified # int
+ self.favicon = favicon
+ self.children = []
+ self.current_bparent = current_bparent
+
+ # make this bookmark object basically act like the list of its children
+ def count(self):
+ return len(self.children)
+ def append(self, newitem):
+ self.children.append(newitem)
+ def remove(self, olditem):
+ self.children.remove(olditem)
+ def __iter__(self):
+ return self.children.__iter__()
+ def __next__(self):
+ return self.children.__next__()
+ def __len__(self):
+ return len(self.children)
+ def __eq__(self, other):
+ return self.bid == other.bid
+
+ def pretty_print(self,i_count=0):
+ print(i_count*' |','+',self.title)
+ i_count+=1
+ for child in self.children:
+ child.pretty_print(i_count)
+
+ def to_html(self,i_count=0,file=sys.stdout,iconsize=32):
+
+ # i_count is used to determine nested level
+ if i_count == 0:
+ f = None
+ if file != sys.stdout:
+ print("Saving to file {0}".format(file),file=sys.stderr)
+ f = open(file,'w')
+ old_stdout = sys.stdout
+ sys.stdout = f
+ print("<html>")
+ print("<head><title>{0}</title>".format("Bookmarks"))
+ print("<style>img #here {{ width: {0}px; height: {0}px; }}</style>".format(iconsize))
+ print("<style>img {{ width: 0px; height: 0px; }}</style>")
+ print("</head>")
+ print("<body>")
+
+ # print self
+ # btype == 2 is a folder
+ if self.btype == 2:
+ #print("<h2>{0}</h2>".format(self.title))
+ li_start = ""
+ li_stop = ""
+ if i_count >= 1:
+ li_start = "<li>"
+ li_stop = "</li>"
+ print("<h{0}>{1}{2}{3}</h{0}>".format(min(i_count,8),li_start,self.title,li_stop))
+ else:
+ #print("{0} {1} {2}".format(self.bid,self.btype,self.title))
+ title = self.title
+ if title == "":
+ title = "Untitled"
+ # specifically exclude a few internal items like "recently bookmarked"
+ if not self.url.startswith("place:") and not self.url.startswith("about:logopage"):
+ print(
+ '<li><a href="{0}" add_date="{1}" last_modified="{2}" ICON="{3}"><img {4}src="{3}"/>{5}</a></li>'.format(
+ self.url,
+ self.dateAdded,
+ self.lastModified,
+ self.favicon,
+ 'class="here" ' if self.favicon is not None else ' ',
+ title,
+ )
+ )
+ # handle children
+ i_count+=1
+ if self.count() > 0:
+ print("<ul>")
+ for child in self.children:
+ child.to_html(i_count)
+ if self.count() > 0:
+ print("</ul>")
+
+ # close whole thing
+ if i_count == 1: # main body is now in i_count == 1
+ print("</body></html>")
+
+ # close file if it is open
+ try:
+ if f:
+ f.close()
+ sys.stdout = old_stdout
+ except:
+ pass
+
+ def sort_children(self):
+ self.new_children = sorted(self.children, key=lambda r: r.position)
+ self.children , self.new_children = self.new_children, self.children
+ self.new_children = None
+ for child in self.children:
+ child.sort_children()
+
+ def find(self,searchfield,value,i_count=0, debuglev = 0):
+ if searchfield != "id" and searchfield != "title":
+ #print("Error: Bookmark",self.bid," with title",self.title,"
+ print("Error: Bookmark object can only search on fields [\"id\",\"title\"]",file=sys.stderr)
+ return None
+ if searchfield == "id":
+ if self.bid == value:
+ return self
+ if debuglev >= 5:
+ print(i_count,"My id",self.bid,self.title,"is not searched value",value)
+ for child in self:
+ if child.bid == value:
+ #print("...but my child is!",child.bid,child)
+ return child
+ a = child.find(searchfield,value,i_count+1)
+ if a:
+ return a
+ # did not find it
+ return None
+ elif searchfield == "title":
+ print("Error: Gotta say unh! Bookmark search for title is not yet implemented.",file=sys.stderr)
+ return None
+ # catch-all
+ return None
+
+ def all_children(self):
+ # goal: return a flat list of all items underneath this one, and its real parent id
+ self.all_children_list = []
+ for child in self.children:
+ self.all_children_list.append(child)
+ for i in child.all_children():
+ self.all_children_list.append(i)
+ #if len(child.all_children()) > 0:
+ # self.all_children_list.append(child.all_children())
+ return self.all_children_list
+
+ def prune(self, debuglev = 0):
+ prune_count = 0
+ i = 0
+ # using an index instead of "for child in self.children" allows
+ # me to step backwards one so I don't miss the item right after the one I just deleted.
+ while i < len(self.children):
+ child = self.children[i]
+ prune_count += child.prune(debuglev=debuglev)
+ if child.bparent != self.bid:
+ if debuglev >= 5:
+ print("Removing",child.bid,child.title,"from parent",self.bid,self.title)
+ prune_count += 1
+ self.children.remove(child)
+ i -= 1
+ i += 1
+ return prune_count
+
+ def unflatten(self,debuglev=0,i_count=0, root = None):
+ """
+ Main task of moving bookmarks to their intended parent bookmark objects.
+ """
+ if debuglev >= 7:
+ print("{0} {1} has {2} children.".format(self.bid,self.title,self.count()))
+ safety_limit = 2000
+ x=0
+ if i_count == 0:
+ root = self
+
+ for child in self.children:
+ x += 1
+ if x < safety_limit:
+ try:
+ if debuglev >= 7:
+ print("{0} {1} is looking for parent {2}".format(child.bid,child.title,child.bparent))
+ if child.bparent > 0:
+ try:
+ thisparent=root.find('id',child.bparent)
+ try:
+ if self.bid != child.bparent:
+ thisparent.append(child)
+ # the old child will still exist until we run prune() which will remove
+ # any child child whose bparent is not the same as its parent bid.
+ if debuglev >= 5:
+ print("SUCCESS: move",child.bid,child,"to parent",child.bparent,thisparent)
+ else:
+ if debuglev >= 6:
+ print("info: {0} {1} is already underneath parent {2} {3}".format(child.bid,child.title,thisparent.bid,thisparent.title))
+ except:
+ if debuglev >= 4:
+ print("FAIL: move",child.bid,child,"to parent",child.bparent,thisparent)
+ except:
+ if debuglev >= 4:
+ print("Unable to find parent item which should be bid",item.bparent)
+ except:
+ if debuglev >= 4:
+ print("Unable to list bparent for child",child)
+ pass
+ else:
+ if debuglev >= 1:
+ print("{0} {1} has to stop after {2} children".format(self.bid,self.title,safety_limit),file=sys.stderr)
+ break
+ # still within for child in self.children
+ child.unflatten(debuglev=debuglev, i_count=i_count+1, root=root)
+
+ self.prune(debuglev=debuglev)
+ self.sort_children()
+ if self.count() == 1:
+ return self.children[0]
+ else:
+ return self
+
+ def __str__(self):
+ encoded = "Encoding error!"
+ try:
+ encoded = self.title.encode('utf-8','replace').decode('utf-8','replace')
+ except:
+ pass
+ return encoded
+
+ def load_from_places(self, places_file, icons_file = None, limit = 50, fetch_icons=True, debuglev = 0, iconsize = 32, browser = "autodetect"):
+ # initialize
+ db_bookmarks = []
+ db_places = []
+ db_icons = []
+ if limit >= 8000:
+ print("WARNING! Limit is really big, but we will proceed.",file=sys.stderr)
+ elif limit < 1:
+ limit = 1
+
+ if icons_file is None:
+ icons_file = places_file.replace("places.","favicons.")
+
+ # retrieve contents of sqlite
+ if debuglev > 4:
+ print("Using places file {0} and icons_file {1}".format(places_file,icons_file))
+ with sqlite3.connect(places_file) as conn:
+
+ # detect browser automatically
+ if browser == 'autodetect':
+ try:
+ conn.execute("SELECT syncStatus FROM moz_bookmarks limit 1")
+ browser = "firefox"
+ except sqlite3.OperationalError as e:
+ if str(e).startswith("no such column"):
+ browser = "palemoon"
+ else:
+ print("Other error,",e)
+ e
+
+ if debuglev > 3:
+ print("Using browser value {0}".format(browser))
+
+ # now conduct regular operations
+ if browser == 'firefox':
+ raw_bookmarks= conn.execute("SELECT * FROM moz_bookmarks")
+ raw_places = conn.execute("SELECT * FROM moz_places")
+ elif browser == 'palemoon':
+ raw_bookmarks= conn.execute("SELECT id,type,fk,parent,position,title,keyword_id,folder_type,dateAdded,lastModified,guid FROM moz_bookmarks")
+ raw_places = conn.execute("SELECT id,url,title,rev_host,visit_count,hidden,typed,favicon_id,frecency,last_visit_date,guid,foreign_count,url_hash FROM moz_places")
+
+ with sqlite3.connect(icons_file) as conn:
+ raw_icons = conn.execute("SELECT * FROM moz_icons")
+
+ if browser == "firefox":
+ for row in raw_bookmarks:
+ bid, btype, parent, fk, position, title, keyword_id, folder_type, dateAdded, lastModified, guid, syncStatus, syncChangeCounter = row
+ db_bookmarks.append(db_bookmark(bid, btype, parent, fk, position, title, keyword_id, folder_type, dateAdded, lastModified, guid, syncStatus, syncChangeCounter))
+ for row in raw_places:
+ pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash, description, preview_image_url, origin_id = row
+ db_places.append(db_place(pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash, description, preview_image_url, origin_id))
+
+ elif browser == "palemoon":
+ for row in raw_bookmarks:
+ bid, btype, fk, parent, position, title, keyword_id, folder_type, dateAdded, lastModified, guid = row
+ db_bookmarks.append(db_bookmark(bid, btype, fk, parent, position, title, keyword_id, folder_type, dateAdded, lastModified, guid))
+ for row in raw_places:
+ pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash = row
+ db_places.append(db_place(pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash, "", "", 0))
+
+ if fetch_icons:
+ for row in raw_icons:
+ iid, icon_url, fixed_icon_url, width, root, color, expire_ms, data = row
+ db_icons.append(db_icon(iid, icon_url, fixed_icon_url, width, root, color, expire_ms, data))
+
+ ## print, to validate
+ #for bm in db_bookmarks:
+ # print(bm)
+ if debuglev >= 1:
+ print("Found this many bookmarks, places, and icons:")
+ print(len(db_bookmarks))
+ print(len(db_places))
+ if fetch_icons:
+ print(len(db_icons))
+
+ # now all items are loaded.
+ # flow:
+ # 1. make new list with Bookmark objects with desired info from all 3 old lists
+ # 2. move bookmarks with "parent" value to be a child of that parent
+
+ # make new list with all info
+
+ count = 0
+ for item in db_bookmarks:
+ count += 1
+ if count >= limit:
+ continue
+ place = None
+ icon = None
+ url = ""
+ favicon = ""
+
+ # print this, so I have something to catch so I can interrupt this process like with | head -n40
+ if debuglev >= 1:
+ print("{0}: {1}".format(count,item))
+
+ # attempt to find matching place
+ try:
+ for tmp_place in db_places:
+ if tmp_place.pid == item.fk:
+ place = tmp_place
+ continue
+ except:
+ pass
+ try:
+ url = place.url
+ except:
+ pass
+
+ # attempt to find icon for that place
+ if fetch_icons:
+ try:
+ if place.favicon_id is not None:
+ for tmp_icon in db_icons:
+ if tmp_icon.iid == place.favicon_id:
+ icon = tmp_icon
+ continue
+ except:
+ pass
+ try:
+ favicon = icon.icon_url
+ except:
+ pass
+ # use google service to get a favicon for this domain
+ try:
+ if favicon == "" and url != "":
+ domain = urlparse(url).netloc
+ if domain != "":
+ #print("Will try to download favicon for domain", domain)
+ # fetch it from google
+ # parameter sz is undocumented but works
+ r = requests.get("http://www.google.com/s2/favicons?sz={0}&domain={1}".format(iconsize,domain))
+ if r.status_code == 200:
+ # r.content is the useful stuff
+ a = base64.b64encode(r.content)
+ favicon = "data:image/png;base64," + a.decode('utf-8')
+ else:
+ print("unable to fetch icon for",domain," because of:",file=sys.stderr)
+ print(r,file=sys.stderr)
+ except:
+ pass
+
+ #print("{0} | {1} | {2}".format(item,url,favicon))
+ self.append(
+ Bookmark(
+ item.bid, item.btype, url, item.title, item.dateAdded, item.lastModified, item.bparent, item.position, item.folder_type, favicon
+ )
+ )
+
+class db_bookmark:
+ def __init__(self,bid,btype,fk,bparent,position,title,keyword_id,folder_type,dateAdded,lastModified,guid,syncStatus="",syncChangeCounter=""):
+ self.bid = bid
+ self.btype = btype
+ self.bparent = bparent
+ self.fk = fk
+ self.position = position
+ self.title = title
+ self.keyword_id = keyword_id
+ self.folder_type = folder_type
+ self.dateAdded = dateAdded
+ self.lastModified = lastModified
+ self.guid = guid
+ self.syncStatus = syncStatus
+ self.syncChangeCounter = syncChangeCounter
+
+ def __str__(self):
+ encoded = "Encoding error on bookmark " + str(self.bid)
+
+ try:
+ encoded = self.title.encode('utf-8','replace') if self.title is not None else "(untitled)"
+ #encoded = self.title if self.title is not None else "(untitled)"
+ except:
+ pass
+ try:
+ return encoded.decode('utf-8','replace')
+ except:
+ return encoded
+
+class db_place:
+ def __init__(self,pid,url,title,rev_host,visit_count,hidden,typed,favicon_id,frecency,last_visit_date,guid,foreign_count,url_hash,description,preview_image_url,origin_id):
+ self.pid = pid
+ self.url = url
+ self.title = title
+ self.rev_host = rev_host
+ self.visit_count = visit_count
+ self.hidden = hidden
+ self.typed = typed
+ self.favicon_id = favicon_id
+ self.frecency = frecency
+ self.last_visit_date = last_visit_date
+ self.guid = guid
+ self.foreign_count = foreign_count
+ self.url_hash = url_hash
+ self.description = description
+ self.preview_image_url = preview_image_url
+ self.origin_id = origin_id
+
+class db_icon:
+ def __init__(self,iid,icon_url,fixed_icon_url,width,root,color,expire_ms,data):
+ self.iid = iid
+ self.icon_url = icon_url
+ self.fixed_icon_url = fixed_icon_url
+ self.width = width
+ self.root = root
+ self.color = color
+ self.expire_ms = expire_ms
+ self.data = data
+
+def export_bookmarks_to_html(places_file, output_file, limit=2000, fetch_icons=True, iconsize=32, debuglev=0):
+ """
+ Export to output_file the html representation of the bookmarks in the input places_file.
+ This is the main interface from the library.
+ """
+
+ # prepare the places_file in case we were only handed a profile directory name
+ if "places.sqlite" not in places_file:
+ orig_places_file = places_file
+ places_file += "/places.sqlite"
+ if not os.path.isfile(places_file):
+ print("Provided path {0} does not contain a places.sqlite file. Cannot export these bookmarks to html!".format(orig_places_file),file=sys.stderr)
+ return 1
+
+ bm = Bookmark("root")
+ favicons_file = places_file.replace("places.","favicons.")
+ bm.load_from_places(places_file, limit=limit, fetch_icons=fetch_icons, iconsize=iconsize, debuglev=debuglev)
+ b2 = bm.unflatten(debuglev=debuglev)
+ b2.to_html(file=output_file,iconsize=iconsize)
+
+def find_available_places(basedir=os.path.expanduser("~")):
+ # return list of places.sqlite that meet the criteria
+ import pathlib
+ results = []
+ for path in pathlib.Path(basedir).rglob('places.sqlite'):
+ results.append(str(path.parent))
+ return results
bgstack15