From 516263ea51350514571deafc70b14f9f19d760d8 Mon Sep 17 00:00:00 2001 From: "B. Stack" Date: Tue, 9 Jul 2024 15:11:26 -0400 Subject: initial commit --- libraries/polaris.py | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 libraries/polaris.py (limited to 'libraries/polaris.py') diff --git a/libraries/polaris.py b/libraries/polaris.py new file mode 100644 index 0000000..8b3037e --- /dev/null +++ b/libraries/polaris.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# File: libraries/polaris.py +# Author: bgstack15 +# Startdate: 2024-07-06-7 19:11 +# SPDX-License-Identifier: GPL-3.0-only +# Title: Library Plugin for Polaris +# Purpose: plugin for polaris-based library websites +# History: +# Usage: +# Reference: +# Improve: +# Dependencies: +# dep-devuan: python3-bs4 + +from .base import * +import requests, dateutil, base64, os, urllib, sys +from bs4 import BeautifulSoup + +class Library(BaseLibrary): + + def __init__(self, config_obj = None, alias = None, username = None, password = None, baseurl = None, session = None): + if config_obj and "username" in config_obj: + self.username = config_obj["username"] + else: + self.username = username + if config_obj and "password" in config_obj: + self.password = config_obj["password"] + else: + self.password = password + if config_obj and "baseurl" in config_obj: + self.baseurl = config_obj["baseurl"] + else: + self.baseurl = baseurl if baseurl else "https://catalog.example.org/polaris" + #/logon.aspx?src=http%3a%2f%2fcatalog.example.org%2fpolaris%2fpatronaccount%2fdefault.aspx%3fctx%3d1.1033.0.0.3&ctx=1.1033.0.0.3" + self.baseurl = self.baseurl.rstrip("/") + if session and type(session) == requests.sessions.Session: + self.session = session + else: + self.session = requests.Session() + if config_obj and "alias" in config_obj: + self.alias = config_obj["alias"] + else: + self.alias = alias if alias else "Polaris-based library" + # asp/M$ components + self.language_code = "1.1033.0.0.3" + self.baseurl_http = self.baseurl.replace("https://","http://") + self.src = f"{self.baseurl_http}/patronaccount/default.aspx?ctx={self.language_code}&ctx={self.language_code}" + self.src2 = urllib.parse.unquote_plus(self.src) + # log in now. Why would we not? + self.login() + + def get_checkouts(self, verbose=False): + checked_out_objects = [] + b = self.baseurl + s = self.session + # step 1: visit the checked out webpage + # curl 'https://catalog.example.org/polaris/patronaccount/itemsout.aspx' -H 'Referer: https://catalog.example.org/polaris/patronaccount/default.aspx?ctx=1.1033.0.0.3' -H 'Cookie: ASP.NET_SessionId=f4gn4iqzma4ftv3i3x3qo4k3; OrgID=1' + headers = { + "Referer": self.src2, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Priority": "u=1", + } + output = s.get(f"{b}/patronaccount/itemsout.aspx", headers=headers).content.decode() + soup = BeautifulSoup(output, "html.parser") + all_checkouts = soup.find_all("tr",class_=["patron-account__grid-alternating-row","patron-account__grid-row"]) + for item in all_checkouts: + images_hrefs = [i["src"] for i in item.find_all("img",attrs={"aria-label":"Cover Image"})] + titles = [i.text for i in item.find_all("span",id="labelTitle")] + renewals_lefts = [i.text for i in item.find_all("span",id="labelRenewalsLeft")] + due_dates = [i.text for i in item.find_all("span",id="labelDueDate")] + formats = item.find_all("img") + formats2 = [] + for i in formats: + try: + i["title"] + formats2.append(i["alt"]) + except: + pass + #formats = formats2 + # for each item we must run the info link: + # curl 'https://catalog.example.org/polaris/patronaccount/components/ajaxiteminfo.aspx?RecID=1296014&VendorObjID=&VendorID=0' -H 'Referer: https://catalog.example.org/polaris/patronaccount/itemsout.aspx' -H 'Cookie: ASP.NET_SessionId=b0ssctcysdrbcfy3jlwwdvr0; OrgID=1' + info_links = [i["href"].replace("javascript:showModalBasic('","").replace("')","") for i in item.find_all("a",title="Item Details")] + # obj MUST HAVE patron, title, format, barcode, due, img + # obj SHOULD HAVE img50, img_href + if verbose: + print(f"DEBUG: got images_hrefs {images_hrefs}",file=sys.stderr) + print(f"DEBUG: got titles {titles}",file=sys.stderr) + print(f"DEBUG: got renewals_lefts {renewals_lefts}",file=sys.stderr) + print(f"DEBUG: got due_dates {due_dates}",file=sys.stderr) + print(f"DEBUG: got formats {formats}",file=sys.stderr) + print(f"DEBUG: got formats2 {formats2}",file=sys.stderr) + x = -1 + for i in titles: + x += 1 + img_response = s.get(images_hrefs[x]) + img_b64 = base64.b64encode(img_response.content).decode() + img_type = img_response.headers["Content-Type"] + details_response = s.get(info_links[x]).content.decode().replace(" ","") + soup2 = BeautifulSoup(details_response,"html.parser") + #details_labels = [i.text for i in soup2.find_all("td",class_="nsm-label") if i.text] + #details_texts = [i.text for i in soup2.find_all("td",class_="") if i.text] + #details_texts = [i.text for i in soup2.find_all("td") if ("class" in i and "nsm-label" not in i["class"]) or ("class" not in i)] + details = [i.text for i in soup2.find_all("td")] + details_labels = details[::3] + details_texts = details[2::3] + details_dict = dict(zip(details_labels,details_texts)) + if verbose: + print(f"DEBUG: labels {details_labels}",file=sys.stderr) + print(f"DEBUG: texts {details_texts}",file=sys.stderr) + print(f"DEBUG: details_dict {details_dict}",file=sys.stderr) + obj = { + "patron": self.alias, + "title": i, + "format": formats2[x], + "barcode": details_dict["Item Barcode"], + "due": dateutil.parser.parse(due_dates[x]), + "img_href": images_hrefs[x], + "renewals_left": renewals_lefts[x], + "img_type": img_type, + "img50": img_b64[:50], + "img": img_b64, + "checkout_date": dateutil.parser.parse(details_dict["Date of Checkout"]), + "renewals_left": details_dict["Renewals Permitted"], + } + checked_out_objects.append(obj) + return checked_out_objects + + # Boilerplate + def get_class_name(self): + return os.path.basename(__file__).replace(".py","") + + def login(self): + b = self.baseurl + s = self.session + # step 1: visit logon page + response = s.get(f"{b}/logon.aspx?src={self.src2}").content + # probably will need to get the __VIEWSTATE, VIEWSTATEGENERATOR, and Eventvalidation + soup = BeautifulSoup(response, "html.parser") + form = soup.find("form", id = "formMain") + viewstate = form.find("input", id = "__VIEWSTATE")["value"] + viewstategenerator = form.find("input", id = "__VIEWSTATEGENERATOR")["value"] + eventvalidation = form.find("input", id = "__EVENTVALIDATION")["value"] + # step 2: submit logon + # curl 'https://catalog.example.org/polaris/logon.aspx?src=http%3a%2f%2fcatalog.example.org%2fpolaris%2fpatronaccount%2fdefault.aspx%3fctx%3d1.1033.0.0.3&ctx=1.1033.0.0.3' -X POST -H 'Content-Type: application/x-www-form-urlencoded' -H 'Referer: https://catalog.example.org/polaris/logon.aspx?src=http%3a%2f%2fcatalog.example.org%2fpolaris%2fpatronaccount%2fdefault.aspx%3fctx%3d1.1033.0.0.3&ctx=1.1033.0.0.3' -H 'Cookie: ASP.NET_SessionId=vxcsdp1cj0hx4pw5xzyvjzmv; OrgID=1' --data-raw '__VIEWSTATE=%2FwEPDwUTRUNCATED_TRUNCATEDAFFNraXAgdG8gbWFpbiBjb250ZW50EVNraXAgdG8gbWFpbiBtZW51ZGR1k%2BAxg5Y9OX3bD7t2P%2FT5kMtk3%2F5W7qyJnA%2B8VzrtGg%3D%3D&__VIEWSTATEGENERATOR=ADF38500&__EVENTVALIDATION=%2FwEdAAR9XqzqHC%2FBfgLuDOYb7iPsH5Q20m6JqSZMIYtkRxbfSedJDH80kUkzbS%2FLyzKWHn1t7yJCbczH%2Bwz7aZL%2F8kJqv109lw0hamEH0qk8Qgc0RXgof%2BWcR4FnrI1R3xMIkD4%3D&ctl00%24BodyMainContent%24textboxBarcodeUsername=userNameHere&ctl00%24BodyMainContent%24textboxPassword=12Pass34&ctl00%24BodyMainContent%24buttonSubmit=Log+In' + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Referer": f"{b}/logon.aspx?src={self.src2}", + #"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0", + } + data = { + "__VIEWSTATE": viewstate, + "__VIEWSTATEGENERATOR": viewstategenerator, + "__EVENTVALIDATION": eventvalidation, + "ctl00$BodyMainContent$textboxBarcodeUsername": self.username, + "ctl00$BodyMainContent$textboxPassword": self.password, + "ctl00$BodyMainContent$buttonSubmit": "Log In", + } + # this always returns a 200, even if the user login failed + url = f"{b}/logon.aspx?src={self.src2}" + response = s.post(url, headers=headers, data=data).content.decode() + # success is determined by absence of "invalid Library Card" or "Please try again" + for msg in ["invalid Library Card", "Please enter your Library", "Please try again"]: + if msg in response: + raise Exception(f"Failed to log in to {self.alias}") -- cgit