aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/lib/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'pyaggr3g470r/lib/utils.py')
-rw-r--r--pyaggr3g470r/lib/utils.py84
1 files changed, 14 insertions, 70 deletions
diff --git a/pyaggr3g470r/lib/utils.py b/pyaggr3g470r/lib/utils.py
index a0154b7f..280256f6 100644
--- a/pyaggr3g470r/lib/utils.py
+++ b/pyaggr3g470r/lib/utils.py
@@ -1,8 +1,11 @@
import types
import urllib
+import base64
+import logging
import requests
-import feedparser
-from bs4 import BeautifulSoup, SoupStrainer
+from hashlib import md5
+
+logger = logging.getLogger(__name__)
def default_handler(obj):
@@ -37,73 +40,14 @@ def rebuild_url(url, base_split):
return urllib.parse.urlunsplit(new_split)
-def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
- if url is None and fp_parsed is not None:
- url = fp_parsed.get('url')
- if url is not None and fp_parsed is None:
- response = requests.get(url, verify=False)
- fp_parsed = feedparser.parse(response.content)
- assert url is not None and fp_parsed is not None
- feed = feed or {}
- split = urllib.parse.urlsplit(url)
- if not fp_parsed['bozo']:
- feed['link'] = url
- feed['site_link'] = try_keys(fp_parsed['feed'], 'href', 'link')
- feed['title'] = fp_parsed['feed'].get('title')
- feed['description'] = try_keys(fp_parsed['feed'], 'subtitle', 'title')
- feed['icon'] = try_keys(fp_parsed['feed'], 'icon')
- else:
- feed['site_link'] = url
-
- if feed.get('site_link'):
- feed['site_link'] = rebuild_url(feed['site_link'], split)
- split = urllib.parse.urlsplit(feed['site_link'])
-
- if feed.get('icon'):
- feed['icon'] = rebuild_url(feed['icon'], split)
-
- if not feed.get('site_link') or not query_site \
- or all(bool(feed.get(key)) for key in ('link', 'title', 'icon')):
- return feed
-
- response = requests.get(feed['site_link'], verify=False)
- bs_parsed = BeautifulSoup(response.content, 'html.parser',
- parse_only=SoupStrainer('head'))
-
- if not feed.get('title'):
- try:
- feed['title'] = bs_parsed.find_all('title')[0].text
- except Exception:
- pass
-
- def check_keys(**kwargs):
- def wrapper(elem):
- for key, vals in kwargs.items():
- if not elem.has_attr(key):
- return False
- if not all(val in elem.attrs[key] for val in vals):
- return False
- return True
- return wrapper
+def try_splits(url, *splits):
+ for split in splits:
+ rb_url = rebuild_url(url, split)
+ response = requests.get(rb_url, verify=False, timeout=10)
+ if response.ok and 'html' not in response.headers['content-type']:
+ return base64.b64encode(response.content).decode('utf8')
+ return None
- if not feed.get('icon'):
- icons = bs_parsed.find_all(check_keys(rel=['icon', 'shortcut']))
- if not len(icons):
- icons = bs_parsed.find_all(check_keys(rel=['icon']))
- if len(icons) >= 1:
- feed['icon'] = rebuild_url(icons[0].attrs['href'], split)
- else: # trying the default one
- icon = rebuild_url('/favicon.ico', split)
- if requests.get(icon, verify=False).ok:
- feed['icon'] = icon
- if not feed.get('link'):
- alternate = bs_parsed.find_all(check_keys(rel=['alternate'],
- type=['application/rss+xml']))
- if len(alternate) == 1:
- feed['link'] = rebuild_url(alternate[0].attrs['href'], split)
- elif len(alternate) > 1:
- feed['link'] = rebuild_url(alternate[0].attrs['href'], split)
- feed['other_link'] = [rebuild_url(al.attrs['href'], split)
- for al in alternate[1:]]
- return feed
+def to_hash(text):
+ return md5(text.encode('utf8')).hexdigest()
bgstack15