aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/lib
diff options
context:
space:
mode:
Diffstat (limited to 'pyaggr3g470r/lib')
-rw-r--r--pyaggr3g470r/lib/feed_utils.py9
-rw-r--r--pyaggr3g470r/lib/utils.py9
2 files changed, 11 insertions, 7 deletions
diff --git a/pyaggr3g470r/lib/feed_utils.py b/pyaggr3g470r/lib/feed_utils.py
index 367fd4b5..28123f66 100644
--- a/pyaggr3g470r/lib/feed_utils.py
+++ b/pyaggr3g470r/lib/feed_utils.py
@@ -4,7 +4,7 @@ import requests
import feedparser
from bs4 import BeautifulSoup, SoupStrainer
-from pyaggr3g470r.lib.utils import try_keys, try_splits, rebuild_url
+from pyaggr3g470r.lib.utils import try_keys, try_get_b64icon, rebuild_url
logger = logging.getLogger(__name__)
@@ -38,7 +38,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
site_split = urllib.parse.urlsplit(feed['site_link'])
if feed.get('icon'):
- feed['icon'] = try_splits(feed['icon'], site_split, feed_split)
+ feed['icon'] = try_get_b64icon(feed['icon'], site_split, feed_split)
if feed['icon'] is None:
del feed['icon']
@@ -72,13 +72,14 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
icons = bs_parsed.find_all(check_keys(rel=['icon']))
if len(icons) >= 1:
for icon in icons:
- feed['icon'] = try_splits(icon.attrs['href'],
+ feed['icon'] = try_get_b64icon(icon.attrs['href'],
site_split, feed_split)
if feed['icon'] is not None:
break
if feed.get('icon') is None:
- feed['icon'] = try_splits('/favicon.ico', site_split, feed_split)
+ feed['icon'] = try_get_b64icon('/favicon.ico',
+ site_split, feed_split)
if 'icon' in feed and feed['icon'] is None:
del feed['icon']
diff --git a/pyaggr3g470r/lib/utils.py b/pyaggr3g470r/lib/utils.py
index 62284de1..a51b6c3e 100644
--- a/pyaggr3g470r/lib/utils.py
+++ b/pyaggr3g470r/lib/utils.py
@@ -40,14 +40,17 @@ def rebuild_url(url, base_split):
return urllib.parse.urlunsplit(new_split)
-def try_splits(url, *splits):
+def try_get_b64icon(url, *splits):
for split in splits:
if split is None:
continue
rb_url = rebuild_url(url, split)
response = requests.get(rb_url, verify=False, timeout=10)
- if response.ok and 'html' not in response.headers['content-type']:
- return base64.b64encode(response.content).decode('utf8')
+ # if html in content-type, we assume it's a fancy 404 page
+ content_type = response.headers.get('content-type', '')
+ if response.ok and 'html' not in content_type:
+ return content_type + (
+ '\n%s' % base64.b64encode(response.content).decode('utf8'))
return None
bgstack15