diff options
-rw-r--r-- | README.md | 59 | ||||
-rwxr-xr-x | coupons.py | 122 | ||||
-rw-r--r-- | coupons_web.py | 73 | ||||
-rw-r--r-- | extra/wsgi-coupons.conf | 7 | ||||
-rw-r--r-- | static/coupons.css | 27 | ||||
-rw-r--r-- | static/images/food lion.png | bin | 0 -> 7372 bytes | |||
-rw-r--r-- | static/images/ingles.png | bin | 0 -> 15401 bytes | |||
-rw-r--r-- | static/images/lidl.png | bin | 0 -> 7974 bytes | |||
-rw-r--r-- | static/images/publix.png | bin | 0 -> 12467 bytes | |||
-rw-r--r-- | static/results.js | 22 | ||||
-rw-r--r-- | templates/index.html | 19 | ||||
-rw-r--r-- | templates/results.html | 12 |
12 files changed, 312 insertions, 29 deletions
@@ -1,18 +1,40 @@ +<!-- + -- Filename: README.md + -- Location: coupons/ + -- Author: bgstack15 + -- Startdate: 2022-09-08 -- Title: README for coupons + -- Project: coupons + -- Purpose: Document the coupons project + -- History: + -- Usage: + -- Reference: + stackbin/README.md + -- Improve: + -- Documentation: + -- Dependencies: + --> # README for coupons -This project exists to make it easy to query the current sale papers for select grocery stores, including specifically Publix. +This project exists to make it easy to query the current sale papers for select grocery stores, including specifically Publix. Frontends include cli and a webapp (python flask). ## Upstream This project's upstream is at <https://bgstack15.ddns.net/cgit/coupons>. +## Features + +* simple cli +* simple wsgi web app with minimal javascript +* caching of results for faster lookups during the same day + ## Alternatives Visiting <https://southernsavers.com> manually, or each store's website. I previously started using selenium (see [aux/ads1.py](aux/ads1.py) in initial commit) but that was not necessary. ## Reason for existence -To simplify and automate searching for items currently on sale +To simplify and automate searching for items currently on sale. ## Using +### Command line interface Basic usage is pretty simple. You need to specify a store at a minimum. When the script visits the webpage, it will cache the file to `$XDG_CACHE_DIR/coupons/` to reduce the amount of work needed during the same day. ./coupons.py --store "publix" --search "candy" @@ -25,11 +47,38 @@ An already-cleaned json file would not need the **--clean** flag. But the cached See also `./coupons.py --help`. +### Web app + +Run a simple dev environment. + + FLASK_APP=coupons_web.py FLASK_DEBUG=True flask run --host='0.0.0.0' + +For a more robust production environment, drop the `extra/wsgi-coupons.conf` apache config into your apache httpd conf directory and configure it as documented in there. + +### Visiting the web app with curl + + $ curl http://d2-03a/coupons/search/dog --header 'Accept: application/json' ; printf '\n' + [{"publix": {"Publix Weekly Ad: 9/7-9/13 or 9/8-9/14": {"Buy One Get Ones": ["Healthy Hide Good'n Fun Dog Treats Triple Flavor Ribs or Wings, 12 oz, at $11.99 <small>($5.99)</small>"]}, "Unadvertised Deals: 9/7-9/13": {"Pet Care": ["Blue Dry Dog Food, 5 lb, $14.99"]}, "Extra Savings Flyer: 8/27-9/9": {"Meat": ["Greenfield Natural Meat Co. Bratwurst 12 oz, Hot Dogs 13 oz or Lunchmeat, 7 oz, $4"]}}}, {"ingles": {"Ingles Ad & Coupons: 9/7-9/13": {"Meat": ["Gwaltney Hot Dogs, 12 oz, $1.48"]}}}, {"food lion": {"Food Lion Ad & Coupons: 9/7-9/13": {"Pet Care": ["Rachael Ray Nutrish Dry Dog Food, 5.5-6 lb, $10.99"]}}}] + + $ curl -X POST http://d2-03a/coupons/search/ --data 'mayo' --header 'Accept: application/json' ; printf '\n' + [{"publix": {"Publix Weekly Ad: 9/7-9/13 or 9/8-9/14": {"Buy One Get Ones": ["Duke's Mayonnaise, 30-32 oz, at $5.23 <small>($3.11)</small>", "Hellmann's Mayonnaise, 15-20 oz, at $5.75 <small>($2.87)</small>"]}, "Extra Savings Flyer: 9/10-9/23": {"Grocery": ["Hellmann's Mayonnaise, 24-30 oz, $3.99", "Hellmann's Mayonnaise, Spicy Mayonnaise or Vegan Dressing & Spread, 11.5 oz, $3", "Primal Kitchen Mayonnaise, 12 oz, $8.99", "Sir Kensington's Special Sauce or Chipotle Mayonnaise, 12 oz, $4"]}}}] + +The API accepts a few URL parameters, including `&date=YYYY-MM-DD` to use that day's cached results (it does not time travel for you) and `?nocache=1`. + +## Improvements + +* Add flask-correct logger mechanisms? + ## Dependencies -A chart for distros, or maybe just a simple package list. +For the web app: + +* apache with `mod_wsgi` +* python3-flask ## Building or changing -Only two stores are currently supported. The southernsavers.com website lists other stores that are probably drop-in capable. To learn the widgets.json path needed, use Developer Tools in a web browser to capture the full widgets.json path and add it to the **stores_url** dict. +Only a few stores are currently supported. The southernsavers.com website lists other stores that are probably drop-in capable. To learn the widgets.json path needed, use Developer Tools in a web browser to capture the full widgets.json path and add it to the **stores_url** dict. ## References -Developer Tools in Firefox + +* Developer Tools in Firefox +* [stackbin](https://bgstack15.ddns.net/cgit/stackbin/) @@ -18,27 +18,79 @@ # Documentation: README.md import sys, json, requests, os, datetime, re, textwrap +# Ripped from https://stackoverflow.com/questions/26105659/how-to-convert-the-unicode-to-latin-characters-python/61551939#61551939 +from typing import Optional +import html, unicodedata + +class Config: + def __init__(self, cache_dir = None): + if "" == cache_dir or cache_dir is None: + try: + cache_dir = os.environ.get("XDG_CACHE_HOME") # defaults to ~/.cache + except: + pass + if "" == cache_dir or cache_dir is None: + try: + cache_dir = os.path.join(os.environ.get("HOME"),".cache") + except: + cache_dir = "/tmp" + #print(f"DEBUG(Config.init): cache_dir {cache_dir}") + self.cache_dir = cache_dir + +def normalize(value: str, encoding: Optional[str] = None) -> str: + """ + Normalize characters not maintainable when encode. + The default encoding is "ascii". + """ + if encoding is None: + return unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') + value = value.encode(encoding, 'backslashreplace').decode(encoding) + value = value.encode('ascii', 'xmlcharrefreplace').decode('unicode-escape') + value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') + return html.unescape(value) + store_urls = { "publix": "https://clipmunk.southernsavers.com/stores/4f823db2090c841ce0000013/widget.json?callback=jQuery111106644488051860198_1661993569105&_=1661993569106", - "ingles": "https://clipmunk.southernsavers.com/stores/4f823db2090c841ce000000f/widget.json?callback=jQuery11110011370202243518035_1662043118344&_=1662043118345" + "ingles": "https://clipmunk.southernsavers.com/stores/4f823db2090c841ce000000f/widget.json?callback=jQuery11110011370202243518035_1662043118344&_=1662043118345", + "food lion": "https://clipmunk.southernsavers.com/stores/4f823db2090c841ce000000a/widget.json?callback=jQuery111104817919592912373_1662672814198&_=1662672814199", + "lidl": "https://clipmunk.southernsavers.com/stores/59405bea724edc4175003366/widget.json?callback=jQuery111104720958887493587_1662672848590&_=1662672848591" } coupons_version = "2022-09-01a" -def fetch(store, force = False, date = None): +def fetch(store, force = False, date = None, config = None): """ Given a store name, visit the url and clean the json. If force, then update cached response.""" # Reference: # curl 'https://clipmunk.southernsavers.com/stores/4f823db2090c841ce0000013/widget.json?callback=jQuery111106644488051860198_1661993569105&_=1661993569106' | LANG=C sed -r -e 's/\\\\[uU]003[eE]/>/g;' -e 's/\\\\[uU]003[cC]/</g;' -e 's/^.*lists: \[/\[/;' -e 's/\],\\n.*$/\]/;' -e 's/\\\\"/\\"/g;' -e 's/\\"/"/g;' | ./coupons.py 'candy' | jq if store is None or store not in [f for f in store_urls]: print(f"ERROR (fetch): store {store} not a valid option.",file=sys.stderr) return -1 + # validate date + today = datetime.datetime.today().date() + if date is None: + date = today.strftime("%F") + try: + if date is not None: + ddate = datetime.datetime.strptime(date,"%Y-%m-%d").date() + if ddate > today: + ddate = today + print(f"WARNING(fetch): date {date} is in the future. Using {today} instead.") + except: + ddate = today + print(f"WARNING(fetch): date {date} is invalid YYYY-MM-DD. Using {today} instead.") + try: + date = ddate.strftime("%F") + except: + pass + if date is not None and date != today.strftime("%F"): + print(f"DEBUG(fetch): using date {date}") # try to use cache at first contents = None if not force: contents = None - contents = get_cached_contents(store, date) # it is safe to return None + contents = get_cached_contents(store, date, config) # it is safe to return None # So if force == True, or the cache failed if contents is None or "" == contents: - print(f"INFO (fetch): no cached content, so visiting url",file=sys.stderr) + print(f"INFO (fetch): no cached content for {store},{date}, so visiting url",file=sys.stderr) try: url = store_urls[store.lower()] except: @@ -46,10 +98,11 @@ def fetch(store, force = False, date = None): r = requests.get(url) contents = r.text # try to save to cache, but it is not a blocker - try: - set_cached_contents(store, date, contents) - except: - pass + #try: + if True: + set_cached_contents(store, date, contents, config) + #except: + # pass return contents def clean(contents): @@ -60,18 +113,22 @@ def clean(contents): a = re.sub("\],\\\\n.*$","]",a) a = re.sub("\\\\\\\\[uU]003[eE]",">",a) a = re.sub("\\\\\\\\[uU]003[cC]","<",a) + a = re.sub("\\\\\\\\[uU]0026","&",a) + a = re.sub("\\\\\\\\[uU]201[cCdDeEfF]",'"',a) + a = re.sub("\\\\\\\\[uU]201[89aA]","'",a) contents = re.sub('\\\\"','"',re.sub('\\\\\\\\"','\\\\"',a)) return contents -def get_cached_name(store, date = None): +def get_cached_name(store, date = None, config = None): """ Given store name, return cache filename regardless of existence or contents. """ USE_CACHE = True store = store.lower() - cache_dir = os.environ.get("XDG_CACHE_HOME") # defaults to ~/.cache - if "" == cache_dir or cache_dir is None: - cache_dir = os.path.join(os.environ.get("HOME"),".cache") + if config is None: + #print(f"DEBUG(get_cached_name): must generate new config") + config = Config() + cache_dir = config.cache_dir # use an app-specific dir underneath it cache_dir = os.path.join(cache_dir, "coupons") if not os.path.isdir(cache_dir): @@ -79,20 +136,24 @@ def get_cached_name(store, date = None): os.mkdir(cache_dir) except: # caching is not available; but this should not stop the program + print(f"INFO(get_cached_name): cannot create cache directory {cache_dir}.") USE_CACHE = False if USE_CACHE: if date is None: date = datetime.datetime.today().strftime("%F") cache_file = os.path.join(cache_dir,"_".join([store,date]) + ".json") - return cache_file + #print(f"DEBUG(get_cached_name): generated path is {cache_file}") + return cache_file + print(f"DEBUG(get_cached_name): no cache filename generated.") + return None -def get_cached_contents(store, date = None): +def get_cached_contents(store, date = None, config = None): """ Given store name, get cached contents Also, use today's date if not given a specific one. """ - cache_file = get_cached_name(store, date) - if os.path.exists(cache_file): + cache_file = get_cached_name(store, date, config) + if cache_file is not None and os.path.exists(cache_file): try: print(f"INFO(get_cached_contents): using cache {cache_file}",file=sys.stderr) return open(cache_file,"r").read() @@ -100,14 +161,24 @@ def get_cached_contents(store, date = None): print(f"INFO(get_cached_contents): unable to open existing cache file {cache_file}",file=sys.stderr) return None -def set_cached_contents(store, date = None, contents = None): +def set_cached_contents(store, date = None, contents = None, config = None): + """ + Write the large js+json payload to a cache file, if possible. This is low-priority. + """ if contents is None or "" == contents: return True # cache nothing so short-circuit + #print(f"DEBUG(set_cached_contents): contents length {len(contents)}") if date is None: date = datetime.datetime.today().strftime("%F") + #print(f"DEBUG(set_cached_contents): using date {date}") store = store.lower() - cache_file = get_cached_name(store, date) - open(cache_file,"w").write(contents) + cache_file = get_cached_name(store, date, config) + if cache_file is not None: + print(f"DEBUG(set_cached_contents): saving cache {cache_file} size {len(contents)}") + with open(cache_file,"w") as w: + # flatten weird characters into normal ones + #w.write(contents.encode('utf-8').decode('latin-1')) + w.write(normalize(contents)) def parse_coupons(inputobject, searchstring = None): """ @@ -126,7 +197,10 @@ def parse_coupons(inputobject, searchstring = None): #response[rgroup][rcat] = [] for i in c["items"]: text = i["html"] - if searchstring in text.lower(): + if "notes" in i and i["notes"] != "": + text = text + " <small>" + i["notes"] + "</small>" + #if searchstring in text.lower(): + if re.match(".*(" + searchstring.lower() + ").*", text.lower()): # only make this group and category if we have a match if rgroup not in response: response[rgroup] = {} @@ -135,9 +209,9 @@ def parse_coupons(inputobject, searchstring = None): response[rgroup][rcat].append(text) return(json.dumps(response)) -def fetch_and_search(store, force = False, date = None, searchstring = None): - """ Main usage of the whole library. """ - a = clean(fetch(store, force, date)) +def fetch_and_search(store, force = False, date = None, searchstring = None, config = None): + """ Main usage of the whole library for cli. """ + a = clean(fetch(store, force, date, config)) return parse_coupons(a, searchstring) if "__main__" == __name__: @@ -165,7 +239,7 @@ Basic usage: else: a = parse_coupons(sys.stdin.read(),args.search) else: - a = fetch_and_search(args.store,args.nocache,args.date,args.search) + a = fetch_and_search(args.store,args.nocache,args.date,args.search,config = None) if args.pretty: print(json.dumps(json.loads(a),indent=3)) else: diff --git a/coupons_web.py b/coupons_web.py new file mode 100644 index 0000000..16e89eb --- /dev/null +++ b/coupons_web.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +# Startdate: 2022-09-08 +# Improve: +# confirm date logic works. + +from flask import Flask, Response, request, url_for, render_template +import json, sys, os +# Load coupons from same directory as coupons_web.py +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +import coupons +from urllib import parse + +app = Flask(__name__) +config = coupons.Config() +# satisfies mod_wsgi: +application = app +@app.route("/") +def root(): + set_cache_dir_from_apache_environ(request, config) + return render_template("index.html") + +def set_cache_dir_from_apache_environ(_request, _config): + """ + Given the request and config objects, derive the SetEnv XDG_CACHE_HOME for coupons web app. + """ + try: + cache_dir = _request.environ.get("XDG_CACHE_HOME") + if cache_dir is not None and cache_dir != _config.cache_dir: + print(f"DEBUG(web set_cache_dir): setting cache dir to {cache_dir}") + _config.cache_dir = cache_dir + except: + pass + + +@app.route("/search/", methods=["GET","POST"]) +@app.route("/search/<searchstring>", methods=["GET"]) +def search(searchstring = ""): + """ + Pass searchstring to fetch_and_search. Returns json unless html is requested. + Accepts parameters nocache=1 and date=YYYY-MM-DD + """ + if request.method in ["POST"]: + searchstring = request.get_data().decode("utf-8") + set_cache_dir_from_apache_environ(request, config) + print(f"DEBUG(web search): {request}") + #print(f"DEBUG(web search): {request.method} searchstring={searchstring}") + force = False + nocache = request.args.get('nocache') + date = request.args.get('date') # in YYYY-MM-DD form + if nocache in ["1",1,"yes","YES","true","TRUE",True,"True","on"]: + print(f"DEBUG(web search): Caching is disabled for this request.") + force = True + results = [] + for store in coupons.store_urls: + a = coupons.fetch_and_search(store = store, force = force, searchstring = searchstring, date = date, config = config) + if len(a) > 2: # an empty a is "{}" which is a string?! + results.append({store: json.loads(a)}) + accept = request.headers.get("Accept") + from_js = True if request.headers.get("From") else False + #print(f"DEBUG(web search): Accept only type {accept}") + #print(f"DEBUG(web search): Results: {results}") + #print(f"DEBUG(web search): From_js is {from_js}") + if accept in ["text/html", "application/html"] or "text/html" in accept: + #print(f"DEBUG(web search): Must convert the json to html...") + savedlink = url_for("search") + searchstring # because url_for handles searchstring= weird + #print(f"DEBUG(web search): Preparing saved search link: {savedlink}") + return render_template("index.html", results = results, from_js = from_js, savedlink = savedlink) + else: + # json is default + return json.dumps(results),200 + +if __name__ == "__main__": + app.run() diff --git a/extra/wsgi-coupons.conf b/extra/wsgi-coupons.conf new file mode 100644 index 0000000..1a3b17c --- /dev/null +++ b/extra/wsgi-coupons.conf @@ -0,0 +1,7 @@ +WSGIScriptAlias /coupons /usr/libexec/coupons/coupons_web.py +# The application will try to use a "coupons" directory underneath here. +# You should make the coupons directory beforehand, and set permissions for apache to write to it. +SetEnv XDG_CACHE_HOME /var/cache/apache2 +<Directory /usr/libexec/coupons> + Require all granted +</Directory> diff --git a/static/coupons.css b/static/coupons.css new file mode 100644 index 0000000..ea225ff --- /dev/null +++ b/static/coupons.css @@ -0,0 +1,27 @@ +/* +publix #64953F; +ingles #EA2754; +food lion #004FAC; +lidl #00508D; + */ + +div .store { + border-style: solid; + border-width: 5px; +} +div .publix { + border-color: #64953F; +} +div .ingles { + border-color: #EA2754; +} +div .food.lion { + border-color: #004FAC; +} +div .lidl { + border-color: #00508D; +} + +.storeimg { + max-height: 60px; +} diff --git a/static/images/food lion.png b/static/images/food lion.png Binary files differnew file mode 100644 index 0000000..293f019 --- /dev/null +++ b/static/images/food lion.png diff --git a/static/images/ingles.png b/static/images/ingles.png Binary files differnew file mode 100644 index 0000000..53a2a74 --- /dev/null +++ b/static/images/ingles.png diff --git a/static/images/lidl.png b/static/images/lidl.png Binary files differnew file mode 100644 index 0000000..5ba8cfa --- /dev/null +++ b/static/images/lidl.png diff --git a/static/images/publix.png b/static/images/publix.png Binary files differnew file mode 100644 index 0000000..488fdf4 --- /dev/null +++ b/static/images/publix.png diff --git a/static/results.js b/static/results.js new file mode 100644 index 0000000..5d0be34 --- /dev/null +++ b/static/results.js @@ -0,0 +1,22 @@ +/* References: + * formie/static/new_form.js + */ +function react() { + event.preventDefault(); + // App uses all lowercase matches but does not convert input to lowercase so we must do it here. + var ss = document.getElementById("search").value.toLowerCase(); + var topurl = document.getElementById("topurl").innerHTML; + var path = (topurl + "/search/").replace("//","/"); + let xhr = new XMLHttpRequest(); + xhr.open("POST", path, false); + xhr.setRequestHeader("Accept", "application/html"); + xhr.setRequestHeader("From", "javascript"); + // the API supports parameter ?nocache=1 but this is not supported here. + xhr.onload = function() { + var r = document.getElementById("results"); + //console.log("Got result:"); + //console.log(xhr.responseText); + r.innerHTML = xhr.responseText; + }; + xhr.send(ss); +}; diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..1194fc6 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,19 @@ +<!DOCTYPE html> +<html> +<head> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<script src="{{ url_for('static', filename='results.js') }}"></script> +<link rel=stylesheet type=text/css href="{{ url_for('static', filename='coupons.css') }}"> +</head> +<body> +<!-- these hidden div power the static javascript to send searches to the correct spot --> +<div id="topurl" hidden>{{ url_for('root') }}</div> +{% if not from_js %}<form> +<input type="text" id="search"/> +<input onclick="react(); return false;" type="submit" value="search"> +</form>{% endif %} +<div id="results"> +{% if results %}{% include 'results.html' %}{% else %}no results{% endif %} +</div> +</body> +</html> diff --git a/templates/results.html b/templates/results.html new file mode 100644 index 0000000..c635a75 --- /dev/null +++ b/templates/results.html @@ -0,0 +1,12 @@ +{% if not from_js %} +<a href="{{ url_for('root') }}">clear search</a> +{% else %} +<a href="{{ savedlink }}">{{ savedlink }}</a> +{% endif %} +{# {% for r in results %}{% for store in r %}<div class="{{ store }} store"><h1>{{ store | capitalize }}</h1> #} +{% for r in results %}{% for store in r %}<div class="{{ store }} store"><h1><img class="storeimg" src="{{ url_for('static', filename = 'images/' + store + '.png') }}"/>{{ store | capitalize }}</h1> +{% for salepaper in r[store] %}<h2>{{ salepaper | safe }}</h2> +{% for grouping in r[store][salepaper] %}<h3>{{ grouping | safe }}</h3> +{% for coupon in r[store][salepaper][grouping] %}{{ coupon | safe }}<br/>{% endfor %} +{% endfor %}{% endfor %} +</div>{% endfor %}{% endfor %} |