aboutsummaryrefslogtreecommitdiff
path: root/coupons.py
diff options
context:
space:
mode:
authorB. Stack <bgstack15@gmail.com>2022-09-01 14:05:50 -0400
committerB. Stack <bgstack15@gmail.com>2022-09-01 14:05:50 -0400
commitb13c9d59c64df1e06c5890895a44d3c3a538178e (patch)
tree9bc9a44bfe9eda7396a91fbdfe326f1d4777caf8 /coupons.py
downloadcoupons-b13c9d59c64df1e06c5890895a44d3c3a538178e.tar.gz
coupons-b13c9d59c64df1e06c5890895a44d3c3a538178e.tar.bz2
coupons-b13c9d59c64df1e06c5890895a44d3c3a538178e.zip
initial commit
Diffstat (limited to 'coupons.py')
-rwxr-xr-xcoupons.py172
1 files changed, 172 insertions, 0 deletions
diff --git a/coupons.py b/coupons.py
new file mode 100755
index 0000000..061f998
--- /dev/null
+++ b/coupons.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+# File: coupons.py
+# Location: .
+# Author: bgstack15
+# Startdate: 2022-08-31
+# Title: Json Parser of Publix sales
+# Project: coupons
+# Purpose: Parse json for coupons that match lowercase string
+# Usage:
+# called from check-sales.sh
+# Search with a lower-case string, against the lowercase values of coupon titles.
+# <input.json ./coupons.py --stdin --search 'candy'
+# History:
+# I attempted to write similar logic with jq, but python is way easier
+# Reference:
+# [internal] rod2/rod.py
+# Improve:
+# Documentation: README.md
+import sys, json, requests, os, datetime, re, textwrap
+
+store_urls = {
+ "publix": "https://clipmunk.southernsavers.com/stores/4f823db2090c841ce0000013/widget.json?callback=jQuery111106644488051860198_1661993569105&_=1661993569106",
+ "ingles": "https://clipmunk.southernsavers.com/stores/4f823db2090c841ce000000f/widget.json?callback=jQuery11110011370202243518035_1662043118344&_=1662043118345"
+}
+coupons_version = "2022-09-01a"
+
+def fetch(store, force = False, date = None):
+ """ Given a store name, visit the url and clean the json. If force, then update cached response."""
+ # Reference:
+ # curl 'https://clipmunk.southernsavers.com/stores/4f823db2090c841ce0000013/widget.json?callback=jQuery111106644488051860198_1661993569105&_=1661993569106' | LANG=C sed -r -e 's/\\\\[uU]003[eE]/>/g;' -e 's/\\\\[uU]003[cC]/</g;' -e 's/^.*lists: \[/\[/;' -e 's/\],\\n.*$/\]/;' -e 's/\\\\"/\\"/g;' -e 's/\\"/"/g;' | ./coupons.py 'candy' | jq
+ if store is None or store not in [f for f in store_urls]:
+ print(f"ERROR (fetch): store {store} not a valid option.",file=sys.stderr)
+ return -1
+ # try to use cache at first
+ contents = None
+ if not force:
+ contents = None
+ contents = get_cached_contents(store, date) # it is safe to return None
+ # So if force == True, or the cache failed
+ if contents is None or "" == contents:
+ print(f"INFO (fetch): no cached content, so visiting url",file=sys.stderr)
+ try:
+ url = store_urls[store.lower()]
+ except:
+ print(f"ERROR (fetch): no url saved for store {store}",file=sys.stderr)
+ r = requests.get(url)
+ contents = r.text
+ # try to save to cache, but it is not a blocker
+ try:
+ set_cached_contents(store, date, contents)
+ except:
+ pass
+ return contents
+
+def clean(contents):
+ """ Clean the javascript from southernsavers.com widget.json response. """
+ # Reference:
+ # curl 'https://clipmunk.southernsavers.com/stores/4f823db2090c841ce0000013/widget.json?callback=jQuery111106644488051860198_1661993569105&_=1661993569106' | LANG=C sed -r -e 's/\\\\[uU]003[eE]/>/g;' -e 's/\\\\[uU]003[cC]/</g;' -e 's/^.*lists: \[/\[/;' -e 's/\],\\n.*$/\]/;' -e 's/\\\\"/\\"/g;' -e 's/\\"/"/g;' | ./coupons.py 'candy' | jq
+ a = re.sub("^.*lists: \\[","[",contents)
+ a = re.sub("\],\\\\n.*$","]",a)
+ a = re.sub("\\\\\\\\[uU]003[eE]",">",a)
+ a = re.sub("\\\\\\\\[uU]003[cC]","<",a)
+ contents = re.sub('\\\\"','"',re.sub('\\\\\\\\"','\\\\"',a))
+ return contents
+
+def get_cached_name(store, date = None):
+ """
+ Given store name, return cache filename regardless of existence or contents.
+ """
+ USE_CACHE = True
+ store = store.lower()
+ cache_dir = os.environ.get("XDG_CACHE_DIR") # defauls to ~/.cache
+ if "" == cache_dir or cache_dir is None:
+ cache_dir = os.path.join(os.environ.get("HOME"),".cache")
+ # use an app-specific dir underneath it
+ cache_dir = os.path.join(cache_dir, "coupons")
+ if not os.path.isdir(cache_dir):
+ try:
+ os.mkdir(cache_dir)
+ except:
+ # caching is not available; but this should not stop the program
+ USE_CACHE = False
+ if USE_CACHE:
+ if date is None:
+ date = datetime.datetime.today().strftime("%F")
+ cache_file = os.path.join(cache_dir,"_".join([store,date]) + ".json")
+ return cache_file
+
+def get_cached_contents(store, date = None):
+ """
+ Given store name, get cached contents
+ Also, use today's date if not given a specific one.
+ """
+ cache_file = get_cached_name(store, date)
+ if os.path.exists(cache_file):
+ try:
+ print(f"INFO(get_cached_contents): using cache {cache_file}",file=sys.stderr)
+ return open(cache_file,"r").read()
+ except:
+ print(f"INFO(get_cached_contents): unable to open existing cache file {cache_file}",file=sys.stderr)
+ return None
+
+def set_cached_contents(store, date = None, contents = None):
+ if contents is None or "" == contents:
+ return True # cache nothing so short-circuit
+ if date is None:
+ date = datetime.datetime.today().strftime("%F")
+ store = store.lower()
+ cache_file = get_cached_name(store, date)
+ open(cache_file,"w").write(contents)
+
+def parse_coupons(inputobject, searchstring = None):
+ """
+ Main logic to simplify the json down as well as return only results that match searchstring which should be lowercase.
+ """
+ a = inputobject
+ b = json.loads(a)
+ if searchstring is None:
+ searchstring = ""
+ response = {}
+ for group in b:
+ rgroup = group["name"]
+ #response[rgroup] = {}
+ for c in group["categories"]:
+ rcat = c["name"]
+ #response[rgroup][rcat] = []
+ for i in c["items"]:
+ text = i["html"]
+ if searchstring in text.lower():
+ # only make this group and category if we have a match
+ if rgroup not in response:
+ response[rgroup] = {}
+ if rcat not in response[rgroup]:
+ response[rgroup][rcat] = []
+ response[rgroup][rcat].append(text)
+ return(json.dumps(response))
+
+def fetch_and_search(store, force = False, date = None, searchstring = None):
+ """ Main usage of the whole library. """
+ a = clean(fetch(store, force, date))
+ return parse_coupons(a, searchstring)
+
+if "__main__" == __name__:
+ import argparse
+ parser = argparse.ArgumentParser(prog = sys.argv[0], description = "Search currently listed sales/coupons on SouthernSavers.com", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=textwrap.dedent("""To use the cache file as standard input, run:
+ <~/.cache/coupons/publix_2022-09-01.json ./coupons.py --stdin --clean --search "candy" --pretty
+
+Basic usage:
+ ./coupons.py --store "publix" --search "candy"
+"""))
+ parser.add_argument("-n","--nocache","--nc","--no-cache", action = "store_true", help = "Skip the cache and always visit site.")
+ parser.add_argument("-d", "--date", help = "Use different YYYY-MM-DD than today, for cache purposes. Does not affect visiting the website")
+ parser.add_argument("-s","--search", help = "Search for items that match this, when converted to lowercase. Can leave blank to display all items")
+ parser.add_argument("--store", help = f"Select sales from this store.", choices = [f for f in store_urls])
+ parser.add_argument("--stdin", action = "store_true", help = "Pipe stdin to parse_coupons. Can still use --search")
+ parser.add_argument("--clean", action = "store_true", help = "If using --stdin, also clean the whole javascript input into just the useful json part.")
+ parser.add_argument("-p","--pretty","--prettyprint","--pretty-print", action = "store_true", help = "Pretty-print json output")
+ parser.add_argument("-V|--version", action = "version", version = coupons_version)
+ args = parser.parse_args()
+ #print(args,file=sys.stderr)
+ a = None
+ if args.stdin:
+ if args.clean:
+ a = parse_coupons(clean(sys.stdin.read()),args.search)
+ else:
+ a = parse_coupons(sys.stdin.read(),args.search)
+ else:
+ a = fetch_and_search(args.store,args.nocache,args.date,args.search)
+ if args.pretty:
+ print(json.dumps(json.loads(a),indent=3))
+ else:
+ print(a)
bgstack15