diff options
Diffstat (limited to 'pyaggr3g470r/lib')
-rw-r--r-- | pyaggr3g470r/lib/feed_utils.py | 9 | ||||
-rw-r--r-- | pyaggr3g470r/lib/utils.py | 9 |
2 files changed, 11 insertions, 7 deletions
diff --git a/pyaggr3g470r/lib/feed_utils.py b/pyaggr3g470r/lib/feed_utils.py index 367fd4b5..28123f66 100644 --- a/pyaggr3g470r/lib/feed_utils.py +++ b/pyaggr3g470r/lib/feed_utils.py @@ -4,7 +4,7 @@ import requests import feedparser from bs4 import BeautifulSoup, SoupStrainer -from pyaggr3g470r.lib.utils import try_keys, try_splits, rebuild_url +from pyaggr3g470r.lib.utils import try_keys, try_get_b64icon, rebuild_url logger = logging.getLogger(__name__) @@ -38,7 +38,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): site_split = urllib.parse.urlsplit(feed['site_link']) if feed.get('icon'): - feed['icon'] = try_splits(feed['icon'], site_split, feed_split) + feed['icon'] = try_get_b64icon(feed['icon'], site_split, feed_split) if feed['icon'] is None: del feed['icon'] @@ -72,13 +72,14 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): icons = bs_parsed.find_all(check_keys(rel=['icon'])) if len(icons) >= 1: for icon in icons: - feed['icon'] = try_splits(icon.attrs['href'], + feed['icon'] = try_get_b64icon(icon.attrs['href'], site_split, feed_split) if feed['icon'] is not None: break if feed.get('icon') is None: - feed['icon'] = try_splits('/favicon.ico', site_split, feed_split) + feed['icon'] = try_get_b64icon('/favicon.ico', + site_split, feed_split) if 'icon' in feed and feed['icon'] is None: del feed['icon'] diff --git a/pyaggr3g470r/lib/utils.py b/pyaggr3g470r/lib/utils.py index 62284de1..a51b6c3e 100644 --- a/pyaggr3g470r/lib/utils.py +++ b/pyaggr3g470r/lib/utils.py @@ -40,14 +40,17 @@ def rebuild_url(url, base_split): return urllib.parse.urlunsplit(new_split) -def try_splits(url, *splits): +def try_get_b64icon(url, *splits): for split in splits: if split is None: continue rb_url = rebuild_url(url, split) response = requests.get(rb_url, verify=False, timeout=10) - if response.ok and 'html' not in response.headers['content-type']: - return base64.b64encode(response.content).decode('utf8') + # if html in content-type, we assume it's a fancy 404 page + content_type = response.headers.get('content-type', '') + if response.ok and 'html' not in content_type: + return content_type + ( + '\n%s' % base64.b64encode(response.content).decode('utf8')) return None |