From 92289b32248f4568579edfd5a301e571ade0c284 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Tue, 21 Jul 2015 14:44:11 +0200 Subject: fetching mimetypes with images --- pyaggr3g470r/lib/feed_utils.py | 9 +++++---- pyaggr3g470r/lib/utils.py | 9 ++++++--- pyaggr3g470r/views/feed.py | 7 ++++++- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pyaggr3g470r/lib/feed_utils.py b/pyaggr3g470r/lib/feed_utils.py index 367fd4b5..28123f66 100644 --- a/pyaggr3g470r/lib/feed_utils.py +++ b/pyaggr3g470r/lib/feed_utils.py @@ -4,7 +4,7 @@ import requests import feedparser from bs4 import BeautifulSoup, SoupStrainer -from pyaggr3g470r.lib.utils import try_keys, try_splits, rebuild_url +from pyaggr3g470r.lib.utils import try_keys, try_get_b64icon, rebuild_url logger = logging.getLogger(__name__) @@ -38,7 +38,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): site_split = urllib.parse.urlsplit(feed['site_link']) if feed.get('icon'): - feed['icon'] = try_splits(feed['icon'], site_split, feed_split) + feed['icon'] = try_get_b64icon(feed['icon'], site_split, feed_split) if feed['icon'] is None: del feed['icon'] @@ -72,13 +72,14 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): icons = bs_parsed.find_all(check_keys(rel=['icon'])) if len(icons) >= 1: for icon in icons: - feed['icon'] = try_splits(icon.attrs['href'], + feed['icon'] = try_get_b64icon(icon.attrs['href'], site_split, feed_split) if feed['icon'] is not None: break if feed.get('icon') is None: - feed['icon'] = try_splits('/favicon.ico', site_split, feed_split) + feed['icon'] = try_get_b64icon('/favicon.ico', + site_split, feed_split) if 'icon' in feed and feed['icon'] is None: del feed['icon'] diff --git a/pyaggr3g470r/lib/utils.py b/pyaggr3g470r/lib/utils.py index 62284de1..a51b6c3e 100644 --- a/pyaggr3g470r/lib/utils.py +++ b/pyaggr3g470r/lib/utils.py @@ -40,14 +40,17 @@ def rebuild_url(url, base_split): return urllib.parse.urlunsplit(new_split) -def try_splits(url, *splits): +def try_get_b64icon(url, *splits): for split in splits: if split is None: continue rb_url = rebuild_url(url, split) response = requests.get(rb_url, verify=False, timeout=10) - if response.ok and 'html' not in response.headers['content-type']: - return base64.b64encode(response.content).decode('utf8') + # if html in content-type, we assume it's a fancy 404 page + content_type = response.headers.get('content-type', '') + if response.ok and 'html' not in content_type: + return content_type + ( + '\n%s' % base64.b64encode(response.content).decode('utf8')) return None diff --git a/pyaggr3g470r/views/feed.py b/pyaggr3g470r/views/feed.py index 99986fe7..1a3ad938 100644 --- a/pyaggr3g470r/views/feed.py +++ b/pyaggr3g470r/views/feed.py @@ -207,4 +207,9 @@ def icon(feed_id): headers = {'Cache-Control': 'max-age=86400', 'etag': etag} if request.headers.get('if-none-match') == etag: return Response(status=304, headers=headers) - return Response(base64.b64decode(icon), mimetype='image', headers=headers) + if '\n' in icon: + content_type, icon = icon.split() + headers['content-type'] = content_type + else: + headers['content-type'] = 'application/image' + return Response(base64.b64decode(icon), headers=headers) -- cgit