aboutsummaryrefslogtreecommitdiff
path: root/libraries
diff options
context:
space:
mode:
authorB. Stack <bgstack15@gmail.com>2024-07-09 15:11:26 -0400
committerB. Stack <bgstack15@gmail.com>2024-07-09 15:11:26 -0400
commit516263ea51350514571deafc70b14f9f19d760d8 (patch)
tree9da665d2e3eee383335c5f2ecc82dd649824ed66 /libraries
downloadlibrary-info-516263ea51350514571deafc70b14f9f19d760d8.tar.gz
library-info-516263ea51350514571deafc70b14f9f19d760d8.tar.bz2
library-info-516263ea51350514571deafc70b14f9f19d760d8.zip
initial commit
Diffstat (limited to 'libraries')
-rw-r--r--libraries/__init__.py6
-rw-r--r--libraries/aspen.py125
-rw-r--r--libraries/base.py50
-rw-r--r--libraries/polaris.py164
4 files changed, 345 insertions, 0 deletions
diff --git a/libraries/__init__.py b/libraries/__init__.py
new file mode 100644
index 0000000..db032cd
--- /dev/null
+++ b/libraries/__init__.py
@@ -0,0 +1,6 @@
+# File: libraries/__init__.py
+# Project: library_info
+import os.path as _path
+import glob as _glob
+modules = _glob.glob(_path.join(_path.dirname(__file__), "*.py"))
+__all__ = [ _path.basename(f)[:-3] for f in modules if _path.isfile(f) and not _path.basename(f).startswith("_")]
diff --git a/libraries/aspen.py b/libraries/aspen.py
new file mode 100644
index 0000000..1d8de39
--- /dev/null
+++ b/libraries/aspen.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+# File: libraries/aspen.py
+# Author: bgstack15
+# Startdate: 2024-07-06-7 08:06
+# SPDX-License-Identifier: GPL-3.0-only
+# Title: Library Plugin for Aspen
+# Project: library_info
+# Purpose: plugin for aspen-based library websites
+# History:
+# Usage:
+# Reference:
+# Improve:
+# Dependencies:
+# dep-devuan: python3-bs4
+
+from .base import *
+import requests, json, dateutil, base64, os, sys
+from bs4 import BeautifulSoup
+
+class Library(BaseLibrary):
+
+ def __init__(self, config_obj = None, alias = None, username = None, password = None, baseurl = None, session = None):
+ if config_obj and "username" in config_obj:
+ self.username = config_obj["username"]
+ else:
+ self.username = username
+ if config_obj and "password" in config_obj:
+ self.password = config_obj["password"]
+ else:
+ self.password = password
+ if config_obj and "baseurl" in config_obj:
+ self.baseurl = config_obj["baseurl"]
+ else:
+ self.baseurl = baseurl if baseurl else "https://aspen.example.org"
+ self.baseurl = self.baseurl.rstrip("/")
+ if session and type(session) == requests.sessions.Session:
+ self.session = session
+ else:
+ self.session = requests.Session()
+ if config_obj and "alias" in config_obj:
+ self.alias = config_obj["alias"]
+ else:
+ self.alias = alias if alias else "Aspen-based library"
+ # log in now. Why would we not?
+ self.login()
+
+ def get_checkouts(self, verbose = False):
+ # WORKHERE: no example of possible/completed renewals at this time
+ checked_out_objects = []
+ b = self.baseurl
+ s = self.session
+ # step 1: visit the "checked out" web page, so it doesn't freak out that I am taking shortcuts
+ headers = {
+ "Content-Type": "application/x-www-form-urlencoded",
+ "Referer": f"{b}/MyAccount/Home",
+ "Priority": "u=1"
+ }
+ s.get(f"{b}/MyAccount/CheckedOut", headers = headers)
+ # step 2: visit the checkout list which is a cruddy html-inside-json garbage
+ headers = {
+ "Referer": f"{b}/MyAccount/CheckedOut?source=all"
+ }
+ params = {
+ "method": "getCheckouts",
+ "source": "all"
+ }
+ output = s.get(f"{b}/MyAccount/AJAX",params=params,headers=headers)
+ output = json.loads(output.content)["checkouts"].replace("\xa0"," ")
+ soup = BeautifulSoup(output, "html.parser")
+ # goals: get title, format, picture, barcode, due date, possible renewal date, times_renewed, when_checked_out
+ results = soup.find_all("div", class_ = "result row")
+ #results = soup.find_all("span",class_="result-index")
+ #results = [i.parent.parent.parent for i in results]
+ for i in results:
+ title = i.find(class_ = "result-title").contents[0]
+ labels = [j.contents[0] for j in i.find_all("div", class_ = "result-label")]
+ values = [j.contents[0] for j in i.find_all("div", class_ = "result-value")]
+ values_dict = dict(map(lambda i,j:(i,j),labels,values))
+ if verbose:
+ print(f"DEBUG: Values_dict: {values_dict}",file=sys.stderr)
+ # contains Call number, Format, Barcode, Due
+ img_href = i.find("img", class_="listResultImage")["src"]
+ img_response = s.get(img_href)
+ img_b64 = base64.b64encode(img_response.content).decode()
+ img_type = img_response.headers["Content-Type"]
+ # normalize format
+ item_format = ""
+ item_format = "book" if "book" in values_dict["Format"].lower() else ""
+ if not item_format:
+ item_format = values_dict["Format"]
+ obj = {
+ "patron": self.alias,
+ "title": title,
+ "format": item_format,
+ "barcode": values_dict["Barcode"],
+ "due": dateutil.parser.parse(values_dict["Due"]),
+ "img_href": img_href,
+ "img50": img_b64[:50],
+ "img": img_b64,
+ "img_type": img_type,
+ }
+ checked_out_objects.append(obj)
+ return checked_out_objects
+
+ def get_class_name(self):
+ return os.path.basename(__file__).replace(".py","")
+
+ def login(self):
+ b = self.baseurl
+ s = self.session
+ # step 1: visit login page
+ s.get(f"{b}/MyAccount/Home")
+ # step 2: log in
+ # curl 'https://aspen.example.org/MyAccount/Home' -X POST -H 'Content-Type: application/x-www-form-urlencoded' -H 'Referer: https://aspen.example.org/MyAccount/Home' -H 'Priority: u=1' --data-raw 'username=987213497234&password=1234&submit=Login'
+ data = {
+ "username": self.username,
+ "password": self.password,
+ "submit": "Login"
+ }
+ headers = {
+ "Content-Type": "application/x-www-form-urlencoded",
+ "Referer": f"{b}/MyAccount/Home",
+ "Priority": "u=1"
+ }
+ s.post(f"{b}/MyAccount/Home", headers = headers, data = data)
diff --git a/libraries/base.py b/libraries/base.py
new file mode 100644
index 0000000..74394be
--- /dev/null
+++ b/libraries/base.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+# File: libraries/base.py
+# Author: bgstack15
+# Startdate: 2024-07-06-7 08:08
+# SPDX-License-Identifier: GPL-3.0-only
+# Title: Library Plugin example
+# Project: library_info
+# Purpose: base class for library plugins
+# History:
+# Usage:
+# Reference:
+# Improve:
+# Dependencies:
+# dep-devuan: python3-bs4
+
+# For a real library you will need this entry too:
+#from .base import *
+import requests, json, dateutil, base64, os
+from bs4 import BeautifulSoup
+
+class BaseLibrary:
+
+ def __init__(self, username = None, password = None, baseurl = None):
+ self.username = username
+ self.password = password
+ self.baseurl = baseurl
+ # will need cookies or session manager here.
+
+ def get_checkouts(self):
+ """ STUB """
+ sample = {
+ "title": "sample book 1",
+ "format": "book",
+ "picture": "DUMMYIMAGEprobablybase64ed",
+ "barcode": 912738490172349,
+ "duedate": "2024-07-12",
+ "possible_renewal_date": "2024-07-11",
+ "times_renewed": 0,
+ "checkout_date": "2024-07-02"
+ }
+ return [sample]
+
+ def get_class_name(self):
+ """ Leave this function as is. It will return the filename. """
+ return os.path.basename(__file__).replace(".py","")
+
+ def login(self):
+ """
+ This is where the login interaction should happen.
+ """
diff --git a/libraries/polaris.py b/libraries/polaris.py
new file mode 100644
index 0000000..8b3037e
--- /dev/null
+++ b/libraries/polaris.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# File: libraries/polaris.py
+# Author: bgstack15
+# Startdate: 2024-07-06-7 19:11
+# SPDX-License-Identifier: GPL-3.0-only
+# Title: Library Plugin for Polaris
+# Purpose: plugin for polaris-based library websites
+# History:
+# Usage:
+# Reference:
+# Improve:
+# Dependencies:
+# dep-devuan: python3-bs4
+
+from .base import *
+import requests, dateutil, base64, os, urllib, sys
+from bs4 import BeautifulSoup
+
+class Library(BaseLibrary):
+
+ def __init__(self, config_obj = None, alias = None, username = None, password = None, baseurl = None, session = None):
+ if config_obj and "username" in config_obj:
+ self.username = config_obj["username"]
+ else:
+ self.username = username
+ if config_obj and "password" in config_obj:
+ self.password = config_obj["password"]
+ else:
+ self.password = password
+ if config_obj and "baseurl" in config_obj:
+ self.baseurl = config_obj["baseurl"]
+ else:
+ self.baseurl = baseurl if baseurl else "https://catalog.example.org/polaris"
+ #/logon.aspx?src=http%3a%2f%2fcatalog.example.org%2fpolaris%2fpatronaccount%2fdefault.aspx%3fctx%3d1.1033.0.0.3&ctx=1.1033.0.0.3"
+ self.baseurl = self.baseurl.rstrip("/")
+ if session and type(session) == requests.sessions.Session:
+ self.session = session
+ else:
+ self.session = requests.Session()
+ if config_obj and "alias" in config_obj:
+ self.alias = config_obj["alias"]
+ else:
+ self.alias = alias if alias else "Polaris-based library"
+ # asp/M$ components
+ self.language_code = "1.1033.0.0.3"
+ self.baseurl_http = self.baseurl.replace("https://","http://")
+ self.src = f"{self.baseurl_http}/patronaccount/default.aspx?ctx={self.language_code}&ctx={self.language_code}"
+ self.src2 = urllib.parse.unquote_plus(self.src)
+ # log in now. Why would we not?
+ self.login()
+
+ def get_checkouts(self, verbose=False):
+ checked_out_objects = []
+ b = self.baseurl
+ s = self.session
+ # step 1: visit the checked out webpage
+ # curl 'https://catalog.example.org/polaris/patronaccount/itemsout.aspx' -H 'Referer: https://catalog.example.org/polaris/patronaccount/default.aspx?ctx=1.1033.0.0.3' -H 'Cookie: ASP.NET_SessionId=f4gn4iqzma4ftv3i3x3qo4k3; OrgID=1'
+ headers = {
+ "Referer": self.src2,
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Priority": "u=1",
+ }
+ output = s.get(f"{b}/patronaccount/itemsout.aspx", headers=headers).content.decode()
+ soup = BeautifulSoup(output, "html.parser")
+ all_checkouts = soup.find_all("tr",class_=["patron-account__grid-alternating-row","patron-account__grid-row"])
+ for item in all_checkouts:
+ images_hrefs = [i["src"] for i in item.find_all("img",attrs={"aria-label":"Cover Image"})]
+ titles = [i.text for i in item.find_all("span",id="labelTitle")]
+ renewals_lefts = [i.text for i in item.find_all("span",id="labelRenewalsLeft")]
+ due_dates = [i.text for i in item.find_all("span",id="labelDueDate")]
+ formats = item.find_all("img")
+ formats2 = []
+ for i in formats:
+ try:
+ i["title"]
+ formats2.append(i["alt"])
+ except:
+ pass
+ #formats = formats2
+ # for each item we must run the info link:
+ # curl 'https://catalog.example.org/polaris/patronaccount/components/ajaxiteminfo.aspx?RecID=1296014&VendorObjID=&VendorID=0' -H 'Referer: https://catalog.example.org/polaris/patronaccount/itemsout.aspx' -H 'Cookie: ASP.NET_SessionId=b0ssctcysdrbcfy3jlwwdvr0; OrgID=1'
+ info_links = [i["href"].replace("javascript:showModalBasic('","").replace("')","") for i in item.find_all("a",title="Item Details")]
+ # obj MUST HAVE patron, title, format, barcode, due, img
+ # obj SHOULD HAVE img50, img_href
+ if verbose:
+ print(f"DEBUG: got images_hrefs {images_hrefs}",file=sys.stderr)
+ print(f"DEBUG: got titles {titles}",file=sys.stderr)
+ print(f"DEBUG: got renewals_lefts {renewals_lefts}",file=sys.stderr)
+ print(f"DEBUG: got due_dates {due_dates}",file=sys.stderr)
+ print(f"DEBUG: got formats {formats}",file=sys.stderr)
+ print(f"DEBUG: got formats2 {formats2}",file=sys.stderr)
+ x = -1
+ for i in titles:
+ x += 1
+ img_response = s.get(images_hrefs[x])
+ img_b64 = base64.b64encode(img_response.content).decode()
+ img_type = img_response.headers["Content-Type"]
+ details_response = s.get(info_links[x]).content.decode().replace("&nbsp;","")
+ soup2 = BeautifulSoup(details_response,"html.parser")
+ #details_labels = [i.text for i in soup2.find_all("td",class_="nsm-label") if i.text]
+ #details_texts = [i.text for i in soup2.find_all("td",class_="") if i.text]
+ #details_texts = [i.text for i in soup2.find_all("td") if ("class" in i and "nsm-label" not in i["class"]) or ("class" not in i)]
+ details = [i.text for i in soup2.find_all("td")]
+ details_labels = details[::3]
+ details_texts = details[2::3]
+ details_dict = dict(zip(details_labels,details_texts))
+ if verbose:
+ print(f"DEBUG: labels {details_labels}",file=sys.stderr)
+ print(f"DEBUG: texts {details_texts}",file=sys.stderr)
+ print(f"DEBUG: details_dict {details_dict}",file=sys.stderr)
+ obj = {
+ "patron": self.alias,
+ "title": i,
+ "format": formats2[x],
+ "barcode": details_dict["Item Barcode"],
+ "due": dateutil.parser.parse(due_dates[x]),
+ "img_href": images_hrefs[x],
+ "renewals_left": renewals_lefts[x],
+ "img_type": img_type,
+ "img50": img_b64[:50],
+ "img": img_b64,
+ "checkout_date": dateutil.parser.parse(details_dict["Date of Checkout"]),
+ "renewals_left": details_dict["Renewals Permitted"],
+ }
+ checked_out_objects.append(obj)
+ return checked_out_objects
+
+ # Boilerplate
+ def get_class_name(self):
+ return os.path.basename(__file__).replace(".py","")
+
+ def login(self):
+ b = self.baseurl
+ s = self.session
+ # step 1: visit logon page
+ response = s.get(f"{b}/logon.aspx?src={self.src2}").content
+ # probably will need to get the __VIEWSTATE, VIEWSTATEGENERATOR, and Eventvalidation
+ soup = BeautifulSoup(response, "html.parser")
+ form = soup.find("form", id = "formMain")
+ viewstate = form.find("input", id = "__VIEWSTATE")["value"]
+ viewstategenerator = form.find("input", id = "__VIEWSTATEGENERATOR")["value"]
+ eventvalidation = form.find("input", id = "__EVENTVALIDATION")["value"]
+ # step 2: submit logon
+ # curl 'https://catalog.example.org/polaris/logon.aspx?src=http%3a%2f%2fcatalog.example.org%2fpolaris%2fpatronaccount%2fdefault.aspx%3fctx%3d1.1033.0.0.3&ctx=1.1033.0.0.3' -X POST -H 'Content-Type: application/x-www-form-urlencoded' -H 'Referer: https://catalog.example.org/polaris/logon.aspx?src=http%3a%2f%2fcatalog.example.org%2fpolaris%2fpatronaccount%2fdefault.aspx%3fctx%3d1.1033.0.0.3&ctx=1.1033.0.0.3' -H 'Cookie: ASP.NET_SessionId=vxcsdp1cj0hx4pw5xzyvjzmv; OrgID=1' --data-raw '__VIEWSTATE=%2FwEPDwUTRUNCATED_TRUNCATEDAFFNraXAgdG8gbWFpbiBjb250ZW50EVNraXAgdG8gbWFpbiBtZW51ZGR1k%2BAxg5Y9OX3bD7t2P%2FT5kMtk3%2F5W7qyJnA%2B8VzrtGg%3D%3D&__VIEWSTATEGENERATOR=ADF38500&__EVENTVALIDATION=%2FwEdAAR9XqzqHC%2FBfgLuDOYb7iPsH5Q20m6JqSZMIYtkRxbfSedJDH80kUkzbS%2FLyzKWHn1t7yJCbczH%2Bwz7aZL%2F8kJqv109lw0hamEH0qk8Qgc0RXgof%2BWcR4FnrI1R3xMIkD4%3D&ctl00%24BodyMainContent%24textboxBarcodeUsername=userNameHere&ctl00%24BodyMainContent%24textboxPassword=12Pass34&ctl00%24BodyMainContent%24buttonSubmit=Log+In'
+ headers = {
+ "Content-Type": "application/x-www-form-urlencoded",
+ "Referer": f"{b}/logon.aspx?src={self.src2}",
+ #"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
+ }
+ data = {
+ "__VIEWSTATE": viewstate,
+ "__VIEWSTATEGENERATOR": viewstategenerator,
+ "__EVENTVALIDATION": eventvalidation,
+ "ctl00$BodyMainContent$textboxBarcodeUsername": self.username,
+ "ctl00$BodyMainContent$textboxPassword": self.password,
+ "ctl00$BodyMainContent$buttonSubmit": "Log In",
+ }
+ # this always returns a 200, even if the user login failed
+ url = f"{b}/logon.aspx?src={self.src2}"
+ response = s.post(url, headers=headers, data=data).content.decode()
+ # success is determined by absence of "invalid Library Card" or "Please try again"
+ for msg in ["invalid Library Card", "Please enter your Library", "Please try again"]:
+ if msg in response:
+ raise Exception(f"Failed to log in to {self.alias}")
bgstack15