diff options
-rw-r--r-- | .gitignore | 6 | ||||
-rw-r--r-- | README.md | 24 | ||||
-rwxr-xr-x | ffbookmarkexporter.py | 13 | ||||
-rw-r--r-- | mbbmlib.py | 519 |
4 files changed, 562 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6fe2e69 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.sqlite +__pycache__* +foo* +*.html +old +.*.swp diff --git a/README.md b/README.md new file mode 100644 index 0000000..39f4700 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Readme for mbbmlib +This project exists to programmatically export my bookmarks from the profiles of Mozilla-based web browsers to html, without having to run the browser. Also, I wanted to have icons displayed in the outputted html. + +## `mmbmlib` upstream +This project is the upstream: [https://gitlab.com/bgstack15/mbbmlib.git](https://gitlab.com/bgstack15/mbbmlib.git) + +## Alternatives +Just use the built-in export bookmarks feature. + +## Dependencies +* python3 + +## How to use mbbmlib +The most basic way to use this library is to run `export_bookmarks_to_html`. + + mbbmlib.export_bookmarks_to_html("/home/bgstack15/.mozilla/firefox/12345678.default", "/mnt/public/Public/bookmarks.html", debuglev=8, fetch_icons=True, iconsize=32) + +See [ffbookmarkexporter.py](ffbookmarkexporter.py) for more examples. + +## References +connman-gtk-xdg-autostart/README.md + +## Differences from upstream +None diff --git a/ffbookmarkexporter.py b/ffbookmarkexporter.py new file mode 100755 index 0000000..5881638 --- /dev/null +++ b/ffbookmarkexporter.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +# This is more of a demo script than a useful thing on its own. + +import mbbmlib + +# Export a mozilla profile's bookmarks to html with favicons +mbbmlib.export_bookmarks_to_html("/home/bgstack15/.mozilla/firefox/12345678.default", "/mnt/public/Public/bookmarks.html", debuglev=8, fetch_icons=True) + +# Get a nested list of bookmarks from that profile. The only additional use it has right now is to pretty-print which is worse than the html. +bm = mbbmlib.Bookmark("root") # bootstrap bookmark object +bm.load_from_places("places.sqlite", limit=2000, fetch_icons=False, iconsize=32, debuglev=8) +bm = bm.unflatten() +bm.pretty_print() diff --git a/mbbmlib.py b/mbbmlib.py new file mode 100644 index 0000000..68cbca2 --- /dev/null +++ b/mbbmlib.py @@ -0,0 +1,519 @@ +#!/usr/bin/env python3 +# File ffbm_lib.py +# License: CC-BY-SA 4.0 +# Author: bgstack15@gmail.com +# Startdate: 2020-12-12 +# Title: Export Firefox Bookmarks to Html +# Purpose: Add favicons to bookmarks in exported html file +# History: +# Usage: +# main command is export_bookmarks_to_html. +# Reference: +# pragma table_info(moz_bookmarks) +# https://stackoverflow.com/questions/464516/firefox-bookmarks-sqlite-structure?rq=1 +# https://stackoverflow.com/questions/40408607/attach-database-in-sqlite3-with-python +# https://2.python-requests.org/en/latest/ +# https://stackoverflow.com/questions/43446353/converting-png-images-to-base64 +# future: https://stackoverflow.com/questions/5119041/how-can-i-get-a-web-sites-favicon +# https://rosettacode.org/wiki/Strip_control_codes_and_extended_characters_from_a_string#Python +# Improve: +# add to pretty_print the option to select what format the output is in, like "title | url | icon" or similar. +# Documentation: +# Dependencies: +# devuan: python3 +import sqlite3, sys, os +from urllib.parse import urlparse +import requests, base64 + +try: + if "PYTHONIOENCODING" not in os.environ: + print("You should really run this with env var PYTHONIOENCODING=UTF-8\n",sys.stderr) +except: + pass + +class Bookmark: + def __init__(self, + bid=None, btype=0, url="", title="", dateAdded=0, lastModified=0, bparent=0, position=0, folder_type=0, favicon="", current_bparent=0 + ): + if bid == "root": + """ + Make a root bookmark for populating with the contents of a places.sqlite + Pass in just a single string with value "root" to bookmark: + I had to use this if wrapper because __init__ is apparently the one function of a python class that cannot be overloaded. + books = Bookmark("root") + """ + self.bid = 0 + self.btype = 0 + self.url = "file:///self" + self.bparent = 0 + self.position = 0 + self.folder_type = 0 + self.title = "root" + self.dateAdded = 0 + self.lastModified = 0 + self.favicon = "" + self.children = [] + self.current_bparent = 0 + else: + self.bid = bid + self.btype = btype + self.url = url + self.bparent = bparent #int + self.position = position # int + self.folder_type = folder_type + if title is not None: + #self.title = title.encode('utf-8','replace').decode('latin-1','replace') + stripped = lambda s: "".join(i for i in title if 31 < ord(i) < 1024*8) + self.title = stripped(title) + else: + self.title = "" + self.dateAdded = dateAdded # int + self.lastModified = lastModified # int + self.favicon = favicon + self.children = [] + self.current_bparent = current_bparent + + # make this bookmark object basically act like the list of its children + def count(self): + return len(self.children) + def append(self, newitem): + self.children.append(newitem) + def remove(self, olditem): + self.children.remove(olditem) + def __iter__(self): + return self.children.__iter__() + def __next__(self): + return self.children.__next__() + def __len__(self): + return len(self.children) + def __eq__(self, other): + return self.bid == other.bid + + def pretty_print(self,i_count=0): + print(i_count*' |','+',self.title) + i_count+=1 + for child in self.children: + child.pretty_print(i_count) + + def to_html(self,i_count=0,file=sys.stdout,iconsize=32): + + # i_count is used to determine nested level + if i_count == 0: + f = None + if file != sys.stdout: + print("Saving to file {0}".format(file),file=sys.stderr) + f = open(file,'w') + old_stdout = sys.stdout + sys.stdout = f + print("<html>") + print("<head><title>{0}</title>".format("Bookmarks")) + print("<style>img #here {{ width: {0}px; height: {0}px; }}</style>".format(iconsize)) + print("<style>img {{ width: 0px; height: 0px; }}</style>") + print("</head>") + print("<body>") + + # print self + # btype == 2 is a folder + if self.btype == 2: + #print("<h2>{0}</h2>".format(self.title)) + li_start = "" + li_stop = "" + if i_count >= 1: + li_start = "<li>" + li_stop = "</li>" + print("<h{0}>{1}{2}{3}</h{0}>".format(min(i_count,8),li_start,self.title,li_stop)) + else: + #print("{0} {1} {2}".format(self.bid,self.btype,self.title)) + title = self.title + if title == "": + title = "Untitled" + # specifically exclude a few internal items like "recently bookmarked" + if not self.url.startswith("place:") and not self.url.startswith("about:logopage"): + print( + '<li><a href="{0}" add_date="{1}" last_modified="{2}" ICON="{3}"><img {4}src="{3}"/>{5}</a></li>'.format( + self.url, + self.dateAdded, + self.lastModified, + self.favicon, + 'class="here" ' if self.favicon is not None else ' ', + title, + ) + ) + # handle children + i_count+=1 + if self.count() > 0: + print("<ul>") + for child in self.children: + child.to_html(i_count) + if self.count() > 0: + print("</ul>") + + # close whole thing + if i_count == 1: # main body is now in i_count == 1 + print("</body></html>") + + # close file if it is open + try: + if f: + f.close() + sys.stdout = old_stdout + except: + pass + + def sort_children(self): + self.new_children = sorted(self.children, key=lambda r: r.position) + self.children , self.new_children = self.new_children, self.children + self.new_children = None + for child in self.children: + child.sort_children() + + def find(self,searchfield,value,i_count=0, debuglev = 0): + if searchfield != "id" and searchfield != "title": + #print("Error: Bookmark",self.bid," with title",self.title," + print("Error: Bookmark object can only search on fields [\"id\",\"title\"]",file=sys.stderr) + return None + if searchfield == "id": + if self.bid == value: + return self + if debuglev >= 5: + print(i_count,"My id",self.bid,self.title,"is not searched value",value) + for child in self: + if child.bid == value: + #print("...but my child is!",child.bid,child) + return child + a = child.find(searchfield,value,i_count+1) + if a: + return a + # did not find it + return None + elif searchfield == "title": + print("Error: Gotta say unh! Bookmark search for title is not yet implemented.",file=sys.stderr) + return None + # catch-all + return None + + def all_children(self): + # goal: return a flat list of all items underneath this one, and its real parent id + self.all_children_list = [] + for child in self.children: + self.all_children_list.append(child) + for i in child.all_children(): + self.all_children_list.append(i) + #if len(child.all_children()) > 0: + # self.all_children_list.append(child.all_children()) + return self.all_children_list + + def prune(self, debuglev = 0): + prune_count = 0 + i = 0 + # using an index instead of "for child in self.children" allows + # me to step backwards one so I don't miss the item right after the one I just deleted. + while i < len(self.children): + child = self.children[i] + prune_count += child.prune(debuglev=debuglev) + if child.bparent != self.bid: + if debuglev >= 5: + print("Removing",child.bid,child.title,"from parent",self.bid,self.title) + prune_count += 1 + self.children.remove(child) + i -= 1 + i += 1 + return prune_count + + def unflatten(self,debuglev=0,i_count=0, root = None): + """ + Main task of moving bookmarks to their intended parent bookmark objects. + """ + if debuglev >= 7: + print("{0} {1} has {2} children.".format(self.bid,self.title,self.count())) + safety_limit = 2000 + x=0 + if i_count == 0: + root = self + + for child in self.children: + x += 1 + if x < safety_limit: + try: + if debuglev >= 7: + print("{0} {1} is looking for parent {2}".format(child.bid,child.title,child.bparent)) + if child.bparent > 0: + try: + thisparent=root.find('id',child.bparent) + try: + if self.bid != child.bparent: + thisparent.append(child) + # the old child will still exist until we run prune() which will remove + # any child child whose bparent is not the same as its parent bid. + if debuglev >= 5: + print("SUCCESS: move",child.bid,child,"to parent",child.bparent,thisparent) + else: + if debuglev >= 6: + print("info: {0} {1} is already underneath parent {2} {3}".format(child.bid,child.title,thisparent.bid,thisparent.title)) + except: + if debuglev >= 4: + print("FAIL: move",child.bid,child,"to parent",child.bparent,thisparent) + except: + if debuglev >= 4: + print("Unable to find parent item which should be bid",item.bparent) + except: + if debuglev >= 4: + print("Unable to list bparent for child",child) + pass + else: + if debuglev >= 1: + print("{0} {1} has to stop after {2} children".format(self.bid,self.title,safety_limit),file=sys.stderr) + break + # still within for child in self.children + child.unflatten(debuglev=debuglev, i_count=i_count+1, root=root) + + self.prune(debuglev=debuglev) + self.sort_children() + if self.count() == 1: + return self.children[0] + else: + return self + + def __str__(self): + encoded = "Encoding error!" + try: + encoded = self.title.encode('utf-8','replace').decode('utf-8','replace') + except: + pass + return encoded + + def load_from_places(self, places_file, icons_file = None, limit = 50, fetch_icons=True, debuglev = 0, iconsize = 32, browser = "autodetect"): + # initialize + db_bookmarks = [] + db_places = [] + db_icons = [] + if limit >= 8000: + print("WARNING! Limit is really big, but we will proceed.",file=sys.stderr) + elif limit < 1: + limit = 1 + + if icons_file is None: + icons_file = places_file.replace("places.","favicons.") + + # retrieve contents of sqlite + if debuglev > 4: + print("Using places file {0} and icons_file {1}".format(places_file,icons_file)) + with sqlite3.connect(places_file) as conn: + + # detect browser automatically + if browser == 'autodetect': + try: + conn.execute("SELECT syncStatus FROM moz_bookmarks limit 1") + browser = "firefox" + except sqlite3.OperationalError as e: + if str(e).startswith("no such column"): + browser = "palemoon" + else: + print("Other error,",e) + e + + if debuglev > 3: + print("Using browser value {0}".format(browser)) + + # now conduct regular operations + if browser == 'firefox': + raw_bookmarks= conn.execute("SELECT * FROM moz_bookmarks") + raw_places = conn.execute("SELECT * FROM moz_places") + elif browser == 'palemoon': + raw_bookmarks= conn.execute("SELECT id,type,fk,parent,position,title,keyword_id,folder_type,dateAdded,lastModified,guid FROM moz_bookmarks") + raw_places = conn.execute("SELECT id,url,title,rev_host,visit_count,hidden,typed,favicon_id,frecency,last_visit_date,guid,foreign_count,url_hash FROM moz_places") + + with sqlite3.connect(icons_file) as conn: + raw_icons = conn.execute("SELECT * FROM moz_icons") + + if browser == "firefox": + for row in raw_bookmarks: + bid, btype, parent, fk, position, title, keyword_id, folder_type, dateAdded, lastModified, guid, syncStatus, syncChangeCounter = row + db_bookmarks.append(db_bookmark(bid, btype, parent, fk, position, title, keyword_id, folder_type, dateAdded, lastModified, guid, syncStatus, syncChangeCounter)) + for row in raw_places: + pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash, description, preview_image_url, origin_id = row + db_places.append(db_place(pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash, description, preview_image_url, origin_id)) + + elif browser == "palemoon": + for row in raw_bookmarks: + bid, btype, fk, parent, position, title, keyword_id, folder_type, dateAdded, lastModified, guid = row + db_bookmarks.append(db_bookmark(bid, btype, fk, parent, position, title, keyword_id, folder_type, dateAdded, lastModified, guid)) + for row in raw_places: + pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash = row + db_places.append(db_place(pid, url, title, rev_host, visit_count, hidden, typed, favicon_id, frecency, last_visit_date, guid, foreign_count, url_hash, "", "", 0)) + + if fetch_icons: + for row in raw_icons: + iid, icon_url, fixed_icon_url, width, root, color, expire_ms, data = row + db_icons.append(db_icon(iid, icon_url, fixed_icon_url, width, root, color, expire_ms, data)) + + ## print, to validate + #for bm in db_bookmarks: + # print(bm) + if debuglev >= 1: + print("Found this many bookmarks, places, and icons:") + print(len(db_bookmarks)) + print(len(db_places)) + if fetch_icons: + print(len(db_icons)) + + # now all items are loaded. + # flow: + # 1. make new list with Bookmark objects with desired info from all 3 old lists + # 2. move bookmarks with "parent" value to be a child of that parent + + # make new list with all info + + count = 0 + for item in db_bookmarks: + count += 1 + if count >= limit: + continue + place = None + icon = None + url = "" + favicon = "" + + # print this, so I have something to catch so I can interrupt this process like with | head -n40 + if debuglev >= 1: + print("{0}: {1}".format(count,item)) + + # attempt to find matching place + try: + for tmp_place in db_places: + if tmp_place.pid == item.fk: + place = tmp_place + continue + except: + pass + try: + url = place.url + except: + pass + + # attempt to find icon for that place + if fetch_icons: + try: + if place.favicon_id is not None: + for tmp_icon in db_icons: + if tmp_icon.iid == place.favicon_id: + icon = tmp_icon + continue + except: + pass + try: + favicon = icon.icon_url + except: + pass + # use google service to get a favicon for this domain + try: + if favicon == "" and url != "": + domain = urlparse(url).netloc + if domain != "": + #print("Will try to download favicon for domain", domain) + # fetch it from google + # parameter sz is undocumented but works + r = requests.get("http://www.google.com/s2/favicons?sz={0}&domain={1}".format(iconsize,domain)) + if r.status_code == 200: + # r.content is the useful stuff + a = base64.b64encode(r.content) + favicon = "data:image/png;base64," + a.decode('utf-8') + else: + print("unable to fetch icon for",domain," because of:",file=sys.stderr) + print(r,file=sys.stderr) + except: + pass + + #print("{0} | {1} | {2}".format(item,url,favicon)) + self.append( + Bookmark( + item.bid, item.btype, url, item.title, item.dateAdded, item.lastModified, item.bparent, item.position, item.folder_type, favicon + ) + ) + +class db_bookmark: + def __init__(self,bid,btype,fk,bparent,position,title,keyword_id,folder_type,dateAdded,lastModified,guid,syncStatus="",syncChangeCounter=""): + self.bid = bid + self.btype = btype + self.bparent = bparent + self.fk = fk + self.position = position + self.title = title + self.keyword_id = keyword_id + self.folder_type = folder_type + self.dateAdded = dateAdded + self.lastModified = lastModified + self.guid = guid + self.syncStatus = syncStatus + self.syncChangeCounter = syncChangeCounter + + def __str__(self): + encoded = "Encoding error on bookmark " + str(self.bid) + + try: + encoded = self.title.encode('utf-8','replace') if self.title is not None else "(untitled)" + #encoded = self.title if self.title is not None else "(untitled)" + except: + pass + try: + return encoded.decode('utf-8','replace') + except: + return encoded + +class db_place: + def __init__(self,pid,url,title,rev_host,visit_count,hidden,typed,favicon_id,frecency,last_visit_date,guid,foreign_count,url_hash,description,preview_image_url,origin_id): + self.pid = pid + self.url = url + self.title = title + self.rev_host = rev_host + self.visit_count = visit_count + self.hidden = hidden + self.typed = typed + self.favicon_id = favicon_id + self.frecency = frecency + self.last_visit_date = last_visit_date + self.guid = guid + self.foreign_count = foreign_count + self.url_hash = url_hash + self.description = description + self.preview_image_url = preview_image_url + self.origin_id = origin_id + +class db_icon: + def __init__(self,iid,icon_url,fixed_icon_url,width,root,color,expire_ms,data): + self.iid = iid + self.icon_url = icon_url + self.fixed_icon_url = fixed_icon_url + self.width = width + self.root = root + self.color = color + self.expire_ms = expire_ms + self.data = data + +def export_bookmarks_to_html(places_file, output_file, limit=2000, fetch_icons=True, iconsize=32, debuglev=0): + """ + Export to output_file the html representation of the bookmarks in the input places_file. + This is the main interface from the library. + """ + + # prepare the places_file in case we were only handed a profile directory name + if "places.sqlite" not in places_file: + orig_places_file = places_file + places_file += "/places.sqlite" + if not os.path.isfile(places_file): + print("Provided path {0} does not contain a places.sqlite file. Cannot export these bookmarks to html!".format(orig_places_file),file=sys.stderr) + return 1 + + bm = Bookmark("root") + favicons_file = places_file.replace("places.","favicons.") + bm.load_from_places(places_file, limit=limit, fetch_icons=fetch_icons, iconsize=iconsize, debuglev=debuglev) + b2 = bm.unflatten(debuglev=debuglev) + b2.to_html(file=output_file,iconsize=iconsize) + +def find_available_places(basedir=os.path.expanduser("~")): + # return list of places.sqlite that meet the criteria + import pathlib + results = [] + for path in pathlib.Path(basedir).rglob('places.sqlite'): + results.append(str(path.parent)) + return results |