From 516263ea51350514571deafc70b14f9f19d760d8 Mon Sep 17 00:00:00 2001 From: "B. Stack" Date: Tue, 9 Jul 2024 15:11:26 -0400 Subject: initial commit --- libraries/__init__.py | 6 ++ libraries/aspen.py | 125 ++++++++++++++++++++++++++++++++++++++ libraries/base.py | 50 +++++++++++++++ libraries/polaris.py | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 345 insertions(+) create mode 100644 libraries/__init__.py create mode 100644 libraries/aspen.py create mode 100644 libraries/base.py create mode 100644 libraries/polaris.py (limited to 'libraries') diff --git a/libraries/__init__.py b/libraries/__init__.py new file mode 100644 index 0000000..db032cd --- /dev/null +++ b/libraries/__init__.py @@ -0,0 +1,6 @@ +# File: libraries/__init__.py +# Project: library_info +import os.path as _path +import glob as _glob +modules = _glob.glob(_path.join(_path.dirname(__file__), "*.py")) +__all__ = [ _path.basename(f)[:-3] for f in modules if _path.isfile(f) and not _path.basename(f).startswith("_")] diff --git a/libraries/aspen.py b/libraries/aspen.py new file mode 100644 index 0000000..1d8de39 --- /dev/null +++ b/libraries/aspen.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +# File: libraries/aspen.py +# Author: bgstack15 +# Startdate: 2024-07-06-7 08:06 +# SPDX-License-Identifier: GPL-3.0-only +# Title: Library Plugin for Aspen +# Project: library_info +# Purpose: plugin for aspen-based library websites +# History: +# Usage: +# Reference: +# Improve: +# Dependencies: +# dep-devuan: python3-bs4 + +from .base import * +import requests, json, dateutil, base64, os, sys +from bs4 import BeautifulSoup + +class Library(BaseLibrary): + + def __init__(self, config_obj = None, alias = None, username = None, password = None, baseurl = None, session = None): + if config_obj and "username" in config_obj: + self.username = config_obj["username"] + else: + self.username = username + if config_obj and "password" in config_obj: + self.password = config_obj["password"] + else: + self.password = password + if config_obj and "baseurl" in config_obj: + self.baseurl = config_obj["baseurl"] + else: + self.baseurl = baseurl if baseurl else "https://aspen.example.org" + self.baseurl = self.baseurl.rstrip("/") + if session and type(session) == requests.sessions.Session: + self.session = session + else: + self.session = requests.Session() + if config_obj and "alias" in config_obj: + self.alias = config_obj["alias"] + else: + self.alias = alias if alias else "Aspen-based library" + # log in now. Why would we not? + self.login() + + def get_checkouts(self, verbose = False): + # WORKHERE: no example of possible/completed renewals at this time + checked_out_objects = [] + b = self.baseurl + s = self.session + # step 1: visit the "checked out" web page, so it doesn't freak out that I am taking shortcuts + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Referer": f"{b}/MyAccount/Home", + "Priority": "u=1" + } + s.get(f"{b}/MyAccount/CheckedOut", headers = headers) + # step 2: visit the checkout list which is a cruddy html-inside-json garbage + headers = { + "Referer": f"{b}/MyAccount/CheckedOut?source=all" + } + params = { + "method": "getCheckouts", + "source": "all" + } + output = s.get(f"{b}/MyAccount/AJAX",params=params,headers=headers) + output = json.loads(output.content)["checkouts"].replace("\xa0"," ") + soup = BeautifulSoup(output, "html.parser") + # goals: get title, format, picture, barcode, due date, possible renewal date, times_renewed, when_checked_out + results = soup.find_all("div", class_ = "result row") + #results = soup.find_all("span",class_="result-index") + #results = [i.parent.parent.parent for i in results] + for i in results: + title = i.find(class_ = "result-title").contents[0] + labels = [j.contents[0] for j in i.find_all("div", class_ = "result-label")] + values = [j.contents[0] for j in i.find_all("div", class_ = "result-value")] + values_dict = dict(map(lambda i,j:(i,j),labels,values)) + if verbose: + print(f"DEBUG: Values_dict: {values_dict}",file=sys.stderr) + # contains Call number, Format, Barcode, Due + img_href = i.find("img", class_="listResultImage")["src"] + img_response = s.get(img_href) + img_b64 = base64.b64encode(img_response.content).decode() + img_type = img_response.headers["Content-Type"] + # normalize format + item_format = "" + item_format = "book" if "book" in values_dict["Format"].lower() else "" + if not item_format: + item_format = values_dict["Format"] + obj = { + "patron": self.alias, + "title": title, + "format": item_format, + "barcode": values_dict["Barcode"], + "due": dateutil.parser.parse(values_dict["Due"]), + "img_href": img_href, + "img50": img_b64[:50], + "img": img_b64, + "img_type": img_type, + } + checked_out_objects.append(obj) + return checked_out_objects + + def get_class_name(self): + return os.path.basename(__file__).replace(".py","") + + def login(self): + b = self.baseurl + s = self.session + # step 1: visit login page + s.get(f"{b}/MyAccount/Home") + # step 2: log in + # curl 'https://aspen.example.org/MyAccount/Home' -X POST -H 'Content-Type: application/x-www-form-urlencoded' -H 'Referer: https://aspen.example.org/MyAccount/Home' -H 'Priority: u=1' --data-raw 'username=987213497234&password=1234&submit=Login' + data = { + "username": self.username, + "password": self.password, + "submit": "Login" + } + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Referer": f"{b}/MyAccount/Home", + "Priority": "u=1" + } + s.post(f"{b}/MyAccount/Home", headers = headers, data = data) diff --git a/libraries/base.py b/libraries/base.py new file mode 100644 index 0000000..74394be --- /dev/null +++ b/libraries/base.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# File: libraries/base.py +# Author: bgstack15 +# Startdate: 2024-07-06-7 08:08 +# SPDX-License-Identifier: GPL-3.0-only +# Title: Library Plugin example +# Project: library_info +# Purpose: base class for library plugins +# History: +# Usage: +# Reference: +# Improve: +# Dependencies: +# dep-devuan: python3-bs4 + +# For a real library you will need this entry too: +#from .base import * +import requests, json, dateutil, base64, os +from bs4 import BeautifulSoup + +class BaseLibrary: + + def __init__(self, username = None, password = None, baseurl = None): + self.username = username + self.password = password + self.baseurl = baseurl + # will need cookies or session manager here. + + def get_checkouts(self): + """ STUB """ + sample = { + "title": "sample book 1", + "format": "book", + "picture": "DUMMYIMAGEprobablybase64ed", + "barcode": 912738490172349, + "duedate": "2024-07-12", + "possible_renewal_date": "2024-07-11", + "times_renewed": 0, + "checkout_date": "2024-07-02" + } + return [sample] + + def get_class_name(self): + """ Leave this function as is. It will return the filename. """ + return os.path.basename(__file__).replace(".py","") + + def login(self): + """ + This is where the login interaction should happen. + """ diff --git a/libraries/polaris.py b/libraries/polaris.py new file mode 100644 index 0000000..8b3037e --- /dev/null +++ b/libraries/polaris.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# File: libraries/polaris.py +# Author: bgstack15 +# Startdate: 2024-07-06-7 19:11 +# SPDX-License-Identifier: GPL-3.0-only +# Title: Library Plugin for Polaris +# Purpose: plugin for polaris-based library websites +# History: +# Usage: +# Reference: +# Improve: +# Dependencies: +# dep-devuan: python3-bs4 + +from .base import * +import requests, dateutil, base64, os, urllib, sys +from bs4 import BeautifulSoup + +class Library(BaseLibrary): + + def __init__(self, config_obj = None, alias = None, username = None, password = None, baseurl = None, session = None): + if config_obj and "username" in config_obj: + self.username = config_obj["username"] + else: + self.username = username + if config_obj and "password" in config_obj: + self.password = config_obj["password"] + else: + self.password = password + if config_obj and "baseurl" in config_obj: + self.baseurl = config_obj["baseurl"] + else: + self.baseurl = baseurl if baseurl else "https://catalog.example.org/polaris" + #/logon.aspx?src=http%3a%2f%2fcatalog.example.org%2fpolaris%2fpatronaccount%2fdefault.aspx%3fctx%3d1.1033.0.0.3&ctx=1.1033.0.0.3" + self.baseurl = self.baseurl.rstrip("/") + if session and type(session) == requests.sessions.Session: + self.session = session + else: + self.session = requests.Session() + if config_obj and "alias" in config_obj: + self.alias = config_obj["alias"] + else: + self.alias = alias if alias else "Polaris-based library" + # asp/M$ components + self.language_code = "1.1033.0.0.3" + self.baseurl_http = self.baseurl.replace("https://","http://") + self.src = f"{self.baseurl_http}/patronaccount/default.aspx?ctx={self.language_code}&ctx={self.language_code}" + self.src2 = urllib.parse.unquote_plus(self.src) + # log in now. Why would we not? + self.login() + + def get_checkouts(self, verbose=False): + checked_out_objects = [] + b = self.baseurl + s = self.session + # step 1: visit the checked out webpage + # curl 'https://catalog.example.org/polaris/patronaccount/itemsout.aspx' -H 'Referer: https://catalog.example.org/polaris/patronaccount/default.aspx?ctx=1.1033.0.0.3' -H 'Cookie: ASP.NET_SessionId=f4gn4iqzma4ftv3i3x3qo4k3; OrgID=1' + headers = { + "Referer": self.src2, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Priority": "u=1", + } + output = s.get(f"{b}/patronaccount/itemsout.aspx", headers=headers).content.decode() + soup = BeautifulSoup(output, "html.parser") + all_checkouts = soup.find_all("tr",class_=["patron-account__grid-alternating-row","patron-account__grid-row"]) + for item in all_checkouts: + images_hrefs = [i["src"] for i in item.find_all("img",attrs={"aria-label":"Cover Image"})] + titles = [i.text for i in item.find_all("span",id="labelTitle")] + renewals_lefts = [i.text for i in item.find_all("span",id="labelRenewalsLeft")] + due_dates = [i.text for i in item.find_all("span",id="labelDueDate")] + formats = item.find_all("img") + formats2 = [] + for i in formats: + try: + i["title"] + formats2.append(i["alt"]) + except: + pass + #formats = formats2 + # for each item we must run the info link: + # curl 'https://catalog.example.org/polaris/patronaccount/components/ajaxiteminfo.aspx?RecID=1296014&VendorObjID=&VendorID=0' -H 'Referer: https://catalog.example.org/polaris/patronaccount/itemsout.aspx' -H 'Cookie: ASP.NET_SessionId=b0ssctcysdrbcfy3jlwwdvr0; OrgID=1' + info_links = [i["href"].replace("javascript:showModalBasic('","").replace("')","") for i in item.find_all("a",title="Item Details")] + # obj MUST HAVE patron, title, format, barcode, due, img + # obj SHOULD HAVE img50, img_href + if verbose: + print(f"DEBUG: got images_hrefs {images_hrefs}",file=sys.stderr) + print(f"DEBUG: got titles {titles}",file=sys.stderr) + print(f"DEBUG: got renewals_lefts {renewals_lefts}",file=sys.stderr) + print(f"DEBUG: got due_dates {due_dates}",file=sys.stderr) + print(f"DEBUG: got formats {formats}",file=sys.stderr) + print(f"DEBUG: got formats2 {formats2}",file=sys.stderr) + x = -1 + for i in titles: + x += 1 + img_response = s.get(images_hrefs[x]) + img_b64 = base64.b64encode(img_response.content).decode() + img_type = img_response.headers["Content-Type"] + details_response = s.get(info_links[x]).content.decode().replace(" ","") + soup2 = BeautifulSoup(details_response,"html.parser") + #details_labels = [i.text for i in soup2.find_all("td",class_="nsm-label") if i.text] + #details_texts = [i.text for i in soup2.find_all("td",class_="") if i.text] + #details_texts = [i.text for i in soup2.find_all("td") if ("class" in i and "nsm-label" not in i["class"]) or ("class" not in i)] + details = [i.text for i in soup2.find_all("td")] + details_labels = details[::3] + details_texts = details[2::3] + details_dict = dict(zip(details_labels,details_texts)) + if verbose: + print(f"DEBUG: labels {details_labels}",file=sys.stderr) + print(f"DEBUG: texts {details_texts}",file=sys.stderr) + print(f"DEBUG: details_dict {details_dict}",file=sys.stderr) + obj = { + "patron": self.alias, + "title": i, + "format": formats2[x], + "barcode": details_dict["Item Barcode"], + "due": dateutil.parser.parse(due_dates[x]), + "img_href": images_hrefs[x], + "renewals_left": renewals_lefts[x], + "img_type": img_type, + "img50": img_b64[:50], + "img": img_b64, + "checkout_date": dateutil.parser.parse(details_dict["Date of Checkout"]), + "renewals_left": details_dict["Renewals Permitted"], + } + checked_out_objects.append(obj) + return checked_out_objects + + # Boilerplate + def get_class_name(self): + return os.path.basename(__file__).replace(".py","") + + def login(self): + b = self.baseurl + s = self.session + # step 1: visit logon page + response = s.get(f"{b}/logon.aspx?src={self.src2}").content + # probably will need to get the __VIEWSTATE, VIEWSTATEGENERATOR, and Eventvalidation + soup = BeautifulSoup(response, "html.parser") + form = soup.find("form", id = "formMain") + viewstate = form.find("input", id = "__VIEWSTATE")["value"] + viewstategenerator = form.find("input", id = "__VIEWSTATEGENERATOR")["value"] + eventvalidation = form.find("input", id = "__EVENTVALIDATION")["value"] + # step 2: submit logon + # curl 'https://catalog.example.org/polaris/logon.aspx?src=http%3a%2f%2fcatalog.example.org%2fpolaris%2fpatronaccount%2fdefault.aspx%3fctx%3d1.1033.0.0.3&ctx=1.1033.0.0.3' -X POST -H 'Content-Type: application/x-www-form-urlencoded' -H 'Referer: https://catalog.example.org/polaris/logon.aspx?src=http%3a%2f%2fcatalog.example.org%2fpolaris%2fpatronaccount%2fdefault.aspx%3fctx%3d1.1033.0.0.3&ctx=1.1033.0.0.3' -H 'Cookie: ASP.NET_SessionId=vxcsdp1cj0hx4pw5xzyvjzmv; OrgID=1' --data-raw '__VIEWSTATE=%2FwEPDwUTRUNCATED_TRUNCATEDAFFNraXAgdG8gbWFpbiBjb250ZW50EVNraXAgdG8gbWFpbiBtZW51ZGR1k%2BAxg5Y9OX3bD7t2P%2FT5kMtk3%2F5W7qyJnA%2B8VzrtGg%3D%3D&__VIEWSTATEGENERATOR=ADF38500&__EVENTVALIDATION=%2FwEdAAR9XqzqHC%2FBfgLuDOYb7iPsH5Q20m6JqSZMIYtkRxbfSedJDH80kUkzbS%2FLyzKWHn1t7yJCbczH%2Bwz7aZL%2F8kJqv109lw0hamEH0qk8Qgc0RXgof%2BWcR4FnrI1R3xMIkD4%3D&ctl00%24BodyMainContent%24textboxBarcodeUsername=userNameHere&ctl00%24BodyMainContent%24textboxPassword=12Pass34&ctl00%24BodyMainContent%24buttonSubmit=Log+In' + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Referer": f"{b}/logon.aspx?src={self.src2}", + #"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0", + } + data = { + "__VIEWSTATE": viewstate, + "__VIEWSTATEGENERATOR": viewstategenerator, + "__EVENTVALIDATION": eventvalidation, + "ctl00$BodyMainContent$textboxBarcodeUsername": self.username, + "ctl00$BodyMainContent$textboxPassword": self.password, + "ctl00$BodyMainContent$buttonSubmit": "Log In", + } + # this always returns a 200, even if the user login failed + url = f"{b}/logon.aspx?src={self.src2}" + response = s.post(url, headers=headers, data=data).content.decode() + # success is determined by absence of "invalid Library Card" or "Please try again" + for msg in ["invalid Library Card", "Please enter your Library", "Please try again"]: + if msg in response: + raise Exception(f"Failed to log in to {self.alias}") -- cgit