diff options
author | B. Stack <bgstack15@gmail.com> | 2022-09-01 14:05:50 -0400 |
---|---|---|
committer | B. Stack <bgstack15@gmail.com> | 2022-09-01 14:05:50 -0400 |
commit | b13c9d59c64df1e06c5890895a44d3c3a538178e (patch) | |
tree | 9bc9a44bfe9eda7396a91fbdfe326f1d4777caf8 /aux | |
download | coupons-b13c9d59c64df1e06c5890895a44d3c3a538178e.tar.gz coupons-b13c9d59c64df1e06c5890895a44d3c3a538178e.tar.bz2 coupons-b13c9d59c64df1e06c5890895a44d3c3a538178e.zip |
initial commit
Diffstat (limited to 'aux')
-rwxr-xr-x | aux/ads1.py | 28 | ||||
-rw-r--r-- | aux/notes | 21 |
2 files changed, 49 insertions, 0 deletions
diff --git a/aux/ads1.py b/aux/ads1.py new file mode 100755 index 0000000..9a35dd3 --- /dev/null +++ b/aux/ads1.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +from pyvirtualdisplay import Display +from selenium import webdriver +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.common.keys import Keys +from bs4 import BeautifulSoup +import time, json, configparser, sys, os, argparse, textwrap +from json import JSONEncoder +from sys import argv + +display = Display(visible=0, size=(1024,768)) +display.start() + +def find_string(instring): + a = "" + with webdriver.Firefox() as browser: + browser.get("https://www.southernsavers.com/publix-weekly-ad-deals/") + #wait = WebDriverWait(browser, 8) + #wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "selectAllLink"))).click() + time.sleep(15) # wait 15 seconds for good measure + a = browser.page_source + #return browser.page_source + return a + +if __name__ == "__main__": + loop1() diff --git a/aux/notes b/aux/notes new file mode 100644 index 0000000..4a89b79 --- /dev/null +++ b/aux/notes @@ -0,0 +1,21 @@ +# startdate: 2022-08-31 21:49 +# Extracted from visiting https://www.southernsavers.com/publix-weekly-ad-deals/# in firefox +# Ref: +# https://serverfault.com/questions/991982/jq-get-values-from-children-array-and-display-on-parent-array/991996#991996 +curl 'https://clipmunk.southernsavers.com/stores/4f823db2090c841ce0000013/widget.json?callback=jQuery111106644488051860198_1661993569105&_=1661993569106' > ~/foo34 +LANG=C sed -r -e 's/\\\\[uU]003[eE]/>/g;' -e 's/\\\\[uU]003[cC]/</g;' -e 's/^.*lists: \[/\[/;' -e 's/\],\\n.*$/\]/;' -e 's/\\\\"/\\"/g;' -e 's/\\"/"/g;' ~/foo34 > ~/foo35 +<~/foo35 jq +cl ; <~/foo35 jq '. as $input | $input[].categories[] | {name,items}' +# find items where "Cracker" shows up in text of "html" tag. +cl ; <~/foo35 jq '.[].categories[].items[] | select( .html | strings | test("Cracker")?)' +# all things in an easier format but not yet limited to "Cracker" search +cl ; <~/foo35 jq '.[].categories[] as $cat | $cat | [del(.items,.id), (.items[] | { deal: .html }) ] | add' +# does not do what i want +cl ; <~/foo35 jq '.[] | [del(.id,.kind,.categories), (.categories[]|{ category: .name}), (.categories[].items[]|{html: .html}) ] | add' +# instead of all this crazy jq above, use python to process and search +<~/foo35 ./coupons.py 'zevia' | jq + +# all together: +curl 'https://clipmunk.southernsavers.com/stores/4f823db2090c841ce0000013/widget.json?callback=jQuery111106644488051860198_1661993569105&_=1661993569106' | LANG=C sed -r -e 's/\\\\[uU]003[eE]/>/g;' -e 's/\\\\[uU]003[cC]/</g;' -e 's/^.*lists: \[/\[/;' -e 's/\],\\n.*$/\]/;' -e 's/\\\\"/\\"/g;' -e 's/\\"/"/g;' | ./coupons.py 'candy' | jq + +ingles url is https://clipmunk.southernsavers.com/stores/4f823db2090c841ce000000f/widget.json?callback=jQuery11110011370202243518035_1662043118344&_=1662043118345 |