#!/usr/bin/env python3
# File ffbm_lib.py
# License: CC-BY-SA 4.0
# Author: bgstack15@gmail.com
# Startdate: 2020-12-12
# Title: Export Firefox Bookmarks to Html
# Purpose: Add favicons to bookmarks in exported html file
# History:
#    2022-03-05 fix even more UTF-8 crap
#    2023-04-01 add bare 17-row Firefox moz_places support. Still need to follow probably origin_id to moz_origins to the icons file to moz_icons?
#    2023-04-02 removing newmoon support to focus on FF icon support
# Usage:
#    main command is export_bookmarks_to_html.
# Reference:
#    pragma table_info(moz_bookmarks)
#    https://stackoverflow.com/questions/464516/firefox-bookmarks-sqlite-structure?rq=1
#    https://stackoverflow.com/questions/40408607/attach-database-in-sqlite3-with-python
#    https://2.python-requests.org/en/latest/
#    https://stackoverflow.com/questions/43446353/converting-png-images-to-base64
#    future: https://stackoverflow.com/questions/5119041/how-can-i-get-a-web-sites-favicon
#    https://rosettacode.org/wiki/Strip_control_codes_and_extended_characters_from_a_string#Python
#    conn.text_factory to fix UTF-8 crap https://stackoverflow.com/questions/22751363/sqlite3-operationalerror-could-not-decode-to-utf-8-column/58891189#58891189
# Improve:
#    add to pretty_print the option to select what format the output is in, like "title | url | icon" or similar.
# Documentation:
# Dependencies:
#    devuan: python3
import sqlite3, sys, os
from urllib.parse import urlparse
import requests, base64

try:
   if "PYTHONIOENCODING" not in os.environ:
      print("You should really run this with env var PYTHONIOENCODING=UTF-8\n",sys.stderr)
except:
   pass

class Bookmark:
   def __init__(self, 
         bid=None, btype=0, url="", title="", dateAdded=0, lastModified=0, bparent=0, position=0, folder_type=0, favicon="", current_bparent=0
   ):
      if bid == "root":
         """
         Make a root bookmark for populating with the contents of a places.sqlite 
         Pass in just a single string with value "root" to bookmark:
         I had to use this if wrapper because __init__ is apparently the one function of a python class that cannot be overloaded.
         books = Bookmark("root")
         """
         self.bid = 0
         self.btype = 0
         self.url = "file:///self"
         self.bparent = 0
         self.position = 0
         self.folder_type = 0
         self.title = "root"
         self.dateAdded = 0
         self.lastModified = 0
         self.favicon = ""
         self.children = []
         self.current_bparent = 0
      else:
         self.bid = bid
         self.btype = btype
         self.url = url
         self.bparent = bparent #int
         self.position = position # int
         self.folder_type = folder_type
         if title is not None:
            #self.title = title.encode('utf-8','replace').decode('latin-1','replace')
            stripped = lambda s: "".join(i for i in title if 31 < ord(i) < 1024*8)
            self.title = stripped(title)
         else:
            self.title = ""
         self.dateAdded = dateAdded # int
         self.lastModified = lastModified # int
         self.favicon = favicon
         self.children = []
         self.current_bparent = current_bparent

   # make this bookmark object basically act like the list of its children
   def count(self):
      return len(self.children)
   def append(self, newitem):
      self.children.append(newitem)
   def remove(self, olditem):
      self.children.remove(olditem)
   def __iter__(self):
      return self.children.__iter__()
   def __next__(self):
      return self.children.__next__()
   def __len__(self):
      return len(self.children)
   def __eq__(self, other):
      return self.bid == other.bid

   def pretty_print(self,i_count=0):
      print(i_count*' |','+',self.title)
      i_count+=1
      for child in self.children:
         child.pretty_print(i_count)

   def to_html(self,i_count=0,file=sys.stdout,iconsize=32):

      # i_count is used to determine nested level
      if i_count == 0:
         f = None
         if file != sys.stdout:
            print("Saving to file {0}".format(file),file=sys.stderr)
            f = open(file,'w')
            old_stdout = sys.stdout
            sys.stdout = f
         print("<html>")
         print("<head><title>{0}</title>".format("Bookmarks"))
         print("<style>img #here {{ width: {0}px; height: {0}px; }}</style>".format(iconsize))
         print("<style>img {{ width: 0px; height: 0px; }}</style>")
         print("</head>")
         print("<body>")

      # print self
      # btype == 2 is a folder
      if self.btype == 2:
         #print("<h2>{0}</h2>".format(self.title))
         li_start = ""
         li_stop = ""
         if i_count >= 1:
            li_start = "<li>"
            li_stop = "</li>"
         print("<h{0}>{1}{2}{3}</h{0}>".format(min(i_count,8),li_start,self.title,li_stop))
      else:
         #print("{0} {1} {2}".format(self.bid,self.btype,self.title))
         title = self.title
         if title == "":
            title = "Untitled"
         # specifically exclude a few internal items like "recently bookmarked"
         if not self.url.startswith("place:") and not self.url.startswith("about:logopage"):
            print(
               '<li><a href="{0}" add_date="{1}" last_modified="{2}" ICON="{3}"><img {4}src="{3}"/>{5}</a></li>'.format(
                  self.url,
                  self.dateAdded,
                  self.lastModified,
                  self.favicon,
                  'class="here" ' if self.favicon is not None else ' ',
                  title,
               )
            )
      # handle children
      i_count+=1
      if self.count() > 0:
         print("<ul>")
      for child in self.children:
         child.to_html(i_count)
      if self.count() > 0:
         print("</ul>")

      # close whole thing
      if i_count == 1: # main body is now in i_count == 1
         print("</body></html>")

      # close file if it is open
      try:
         if f:
            f.close()
            sys.stdout = old_stdout
      except:
         pass

   def sort_children(self):
      self.new_children = sorted(self.children, key=lambda r: r.position)
      self.children , self.new_children = self.new_children, self.children
      self.new_children = None
      for child in self.children:
         child.sort_children()

   def find(self,searchfield,value,i_count=0, debuglev = 0):
      if searchfield != "id" and searchfield != "title":
         #print("Error: Bookmark",self.bid," with title",self.title," 
         print("Error: Bookmark object can only search on fields [\"id\",\"title\"]",file=sys.stderr)
         return None
      if searchfield == "id":
         if self.bid == value:
            return self
         if debuglev >= 5:
            print(i_count,"My id",self.bid,self.title,"is not searched value",value)
         for child in self:
            if child.bid == value:
               #print("...but my child is!",child.bid,child)
               return child
            a = child.find(searchfield,value,i_count+1)
            if a:
               return a
         # did not find it
         return None
      elif searchfield == "title":
         print("Error: Gotta say unh! Bookmark search for title is not yet implemented.",file=sys.stderr)
         return None
      # catch-all
      return None

   def all_children(self):
      # goal: return a flat list of all items underneath this one, and its real parent id
      self.all_children_list = []
      for child in self.children:
         self.all_children_list.append(child)
         for i in child.all_children():
            self.all_children_list.append(i)
         #if len(child.all_children()) > 0:
         #   self.all_children_list.append(child.all_children())
      return self.all_children_list

   def prune(self, debuglev = 0):
      prune_count = 0
      i = 0
      # using an index instead of "for child in self.children" allows 
      # me to step backwards one so I don't miss the item right after the one I just deleted.
      while i < len(self.children):
         child = self.children[i]
         prune_count += child.prune(debuglev=debuglev)
         if child.bparent != self.bid:
            if debuglev >= 5:
               print("Removing",child.bid,child.title,"from parent",self.bid,self.title)
            prune_count += 1
            self.children.remove(child)
            i -= 1
         i += 1
      return prune_count

   def unflatten(self,debuglev=0,i_count=0, root = None):
      """
      Main task of moving bookmarks to their intended parent bookmark objects.
      """
      if debuglev >= 7:
         print("{0} {1} has {2} children.".format(self.bid,self.title,self.count()))
      safety_limit = 2000
      x=0
      if i_count == 0:
         root = self
      
      for child in self.children:
         x += 1
         if x < safety_limit:
            try:
               if debuglev >= 7:
                  print("{0} {1} is looking for parent {2}".format(child.bid,child.title,child.bparent))
               if child.bparent > 0:
                  try:
                     thisparent=root.find('id',child.bparent)
                     try:
                        if self.bid != child.bparent:
                           thisparent.append(child)
                           # the old child will still exist until we run prune() which will remove
                           # any child child whose bparent is not the same as its parent bid.
                           if debuglev >= 5:
                              print("SUCCESS: move",child.bid,child,"to parent",child.bparent,thisparent)
                        else:
                           if debuglev >= 6:
                              print("info: {0} {1} is already underneath parent {2} {3}".format(child.bid,child.title,thisparent.bid,thisparent.title))
                     except:
                        if debuglev >= 4:
                           print("FAIL: move",child.bid,child,"to parent",child.bparent,thisparent)
                  except:
                     if debuglev >= 4:
                        print("Unable to find parent item which should be bid",item.bparent)
            except:
               if debuglev >= 4:
                  print("Unable to list bparent for child",child)
               pass
         else:
            if debuglev >= 1:
               print("{0} {1} has to stop after {2} children".format(self.bid,self.title,safety_limit),file=sys.stderr)
            break
         # still within for child in self.children
         child.unflatten(debuglev=debuglev, i_count=i_count+1, root=root)

      self.prune(debuglev=debuglev)
      self.sort_children()
      if self.count() == 1:
         return self.children[0]
      else:
         return self

   def __str__(self):
      encoded = "Encoding error!"
      try:
         encoded = self.title.encode('utf-8','replace').decode('utf-8','replace')
      except:
         pass
      return encoded

   def load_from_places(self, places_file, icons_file = None, limit = 50, fetch_icons=True, debuglev = 0, iconsize = 32, browser = "autodetect"):
      # initialize
      db_bookmarks = []
      db_places = []
      db_icons = []
      if limit >= 8000:
         print("WARNING! Limit is really big, but we will proceed.",file=sys.stderr)
      elif limit < 1:
         limit = 1

      if icons_file is None:
         icons_file = places_file.replace("places.","favicons.")

      # retrieve contents of sqlite
      if debuglev > 4:
         print("Using places file {0} and icons_file {1}".format(places_file,icons_file))
      with sqlite3.connect(places_file) as conn:
         conn.text_factory = lambda b: b.decode(errors = 'ignore')

         # detect browser automatically
         if browser == 'autodetect':
            try:
               conn.execute("SELECT syncStatus FROM moz_bookmarks limit 1")
               browser = "firefox"
            except sqlite3.OperationalError as e:
               if str(e).startswith("no such column"):
                  browser = "palemoon"
               else:
                  print("Other error,",e)
                  e

         if debuglev > 3:
            print("Using browser value {0}".format(browser))

         # now conduct regular operations
         if browser == 'firefox':
            raw_bookmarks = conn.execute("SELECT id, type, fk, parent, position, title, keyword_id, folder_type, dateAdded, lastModified FROM moz_bookmarks")
            raw_places = conn.execute("SELECT id, url, title FROM moz_places p")
            if fetch_icons:
               conn.execute(f"""ATTACH "{icons_file}" AS i""")
               raw_places = conn.execute("SELECT p.id, p.url, p.title, i.id, i.data FROM moz_places p LEFT join moz_pages_w_icons w on p.url = w.page_url LEFT JOIN moz_icons_to_pages ic ON ic.page_id = w.id LEFT JOIN moz_icons i on i.id = ic.icon_id")
         elif browser == 'palemoon':
            printf(f"Fatal! Palemoon no longer supported as of version 20230402a.")
            return -1

      for row in raw_bookmarks:
         bid, btype, fk, parent, position, title, keyword_id, folder_type, dateAdded, lastModified = row
         db_bookmarks.append(db_bookmark(bid, btype, parent, fk, position, title, keyword_id, folder_type, dateAdded, lastModified))
      for row in raw_places:
         if fetch_icons:
            pid, url, title, iid, icon_data = row
         else:
            pid, url, title = row
            iid = None
            icon_data = None
         db_places.append(db_place(pid, url, title))
         db_icons.append(db_icon(iid, url, icon_data))

      ## print, to validate
      #for bm in db_bookmarks:
      #   print(bm)
      if debuglev >= 1:
         print("Found this many bookmarks, places, and icons:")
         print(len(db_bookmarks))
         print(len(db_places))
         if fetch_icons:
            print(len(db_icons))

      # now all items are loaded.
      # flow:
      # 1. make new list with Bookmark objects with desired info from all 3 old lists
      # 2. move bookmarks with "parent" value to be a child of that parent

      # make new list with all info

      count = 0
      for item in db_bookmarks:
         count += 1
         if count >= limit:
            continue
         place = None
         icon = None
         url = ""
         favicon = ""

         # print this, so I have something to catch so I can interrupt this process like with | head -n40
         if debuglev >= 1:
            print("{0}: {1}".format(count,item))

         # attempt to find matching place
         try:
            for tmp_place in db_places:
               if tmp_place.pid == item.fk:
                  place = tmp_place
                  continue
         except:
            pass
         try:
            url = place.url
         except:
            pass

         # attempt to find icon for that place
         if fetch_icons:
            try:
               if place.favicon_id is not None:
                  for tmp_icon in db_icons:
                     if tmp_icon.iid == place.favicon_id:
                        icon = tmp_icon
                        continue
            except:
               pass
            try:
               favicon = icon.icon_url
            except:
               pass
            # use google service to get a favicon for this domain
            try:
               if favicon == "" and url != "":
                  domain = urlparse(url).netloc
                  if domain != "":
                     #print("Will try to download favicon for domain", domain)
                     # fetch it from google
                     # parameter sz is undocumented but works
                     r = requests.get("http://www.google.com/s2/favicons?sz={0}&domain={1}".format(iconsize,domain))
                     if r.status_code == 200:
                        # r.content is the useful stuff
                        a = base64.b64encode(r.content)
                        favicon = "data:image/png;base64," + a.decode('utf-8')
                     else:
                        print("unable to fetch icon for",domain," because of:",file=sys.stderr)
                        print(r,file=sys.stderr)
            except:
               pass

         #print("{0} | {1} | {2}".format(item,url,favicon))
         self.append(
            Bookmark(
               item.bid, item.btype, url, item.title, item.dateAdded, item.lastModified, item.bparent, item.position, item.folder_type, favicon
            )
         )

class db_bookmark:
   def __init__(self,bid,btype,bparent,fk,position,title,keyword_id,folder_type,dateAdded,lastModified):
      self.bid = bid
      self.btype = btype
      self.bparent = bparent
      self.fk = fk
      self.position = position
      self.title = title
      self.keyword_id = keyword_id
      self.folder_type = folder_type
      self.dateAdded = dateAdded
      self.lastModified = lastModified

   def __str__(self):
      encoded = "Encoding error on bookmark " + str(self.bid)

      try:
         encoded = self.title.encode('utf-8','replace') if self.title is not None else "(untitled)"
         #encoded = self.title if self.title is not None else "(untitled)"
      except:
         pass
      try:
         return encoded.decode('utf-8','replace')
      except:
         return encoded

class db_place:
   def __init__(self,pid,url,title):
      self.pid = pid
      self.url = url
      self.title = title

class db_icon:
   def __init__(self,iid,icon_url,data):
      self.iid = iid
      self.icon_url = icon_url
      self.data = data

def export_bookmarks_to_html(places_file, output_file, limit=2000, fetch_icons=True, iconsize=32, debuglev=0):
   """
   Export to output_file the html representation of the bookmarks in the input places_file.
   This is the main interface from the library.
   """

   # prepare the places_file in case we were only handed a profile directory name
   if "places.sqlite" not in places_file:
      orig_places_file = places_file
      places_file += "/places.sqlite"
      if not os.path.isfile(places_file):
         print("Provided path {0} does not contain a places.sqlite file. Cannot export these bookmarks to html!".format(orig_places_file),file=sys.stderr)
         return 1

   bm = Bookmark("root")
   favicons_file = places_file.replace("places.","favicons.")
   bm.load_from_places(places_file, limit=limit, fetch_icons=fetch_icons, iconsize=iconsize, debuglev=debuglev)
   b2 = bm.unflatten(debuglev=debuglev)
   b2.to_html(file=output_file,iconsize=iconsize)

def find_available_places(basedir=os.path.expanduser("~")):
   # return list of places.sqlite that meet the criteria
   import pathlib
   results = []
   for path in pathlib.Path(basedir).rglob('places.sqlite'):
      results.append(str(path.parent))
   return results