From cbe51f5f50bceff02f48dfbdd3c09e1660063c4a Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Tue, 21 Jul 2015 10:51:45 +0200 Subject: retrieved date is now when retrieved and comparison are made on the date of articles --- pyaggr3g470r/lib/article_utils.py | 16 +++++++--------- pyaggr3g470r/models/article.py | 8 +++----- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/pyaggr3g470r/lib/article_utils.py b/pyaggr3g470r/lib/article_utils.py index 115b6058..dbe29659 100644 --- a/pyaggr3g470r/lib/article_utils.py +++ b/pyaggr3g470r/lib/article_utils.py @@ -2,7 +2,6 @@ import logging import requests import dateutil.parser from datetime import datetime -from bs4 import BeautifulSoup import conf from pyaggr3g470r.lib.utils import to_hash @@ -10,9 +9,8 @@ from pyaggr3g470r.lib.utils import to_hash logger = logging.getLogger(__name__) -def extract_id(entry, keys=[('link', 'link'), - ('published', 'retrieved_date'), - ('updated', 'retrieved_date')], force_id=False): +def extract_id(entry, keys=[('link', 'link'), ('published', 'date'), + ('updated', 'date')], force_id=False): """For a given entry will return a dict that allows to identify it. The dict will be constructed on the uid of the entry. if that identifier is absent, the dict will be constructed upon the values of "keys". @@ -21,7 +19,7 @@ def extract_id(entry, keys=[('link', 'link'), if entry_id: return {'entry_id': entry_id} if not entry_id and force_id: - entry_id = to_hash("".join(entry[entry_key] for _, entry_key in keys + return to_hash("".join(entry[entry_key] for _, entry_key in keys if entry_key in entry).encode('utf8')) else: ids = {} @@ -38,8 +36,8 @@ def construct_article(entry, feed): if hasattr(feed, 'dump'): # this way can be a sqlalchemy obj or a dict feed = feed.dump() "Safe method to transorm a feedparser entry into an article" - date = datetime.now() - + now = datetime.now() + date = None for date_key in ('published', 'updated'): if entry.get(date_key): try: @@ -72,5 +70,5 @@ def construct_article(entry, feed): 'title': entry.get('title', 'No title'), 'readed': False, 'like': False, 'content': content, - 'retrieved_date': date.isoformat(), - 'date': date.isoformat()} + 'retrieved_date': now.isoformat(), + 'date': (date or now).isoformat()} diff --git a/pyaggr3g470r/models/article.py b/pyaggr3g470r/models/article.py index 16f15b9e..c98164d0 100644 --- a/pyaggr3g470r/models/article.py +++ b/pyaggr3g470r/models/article.py @@ -26,7 +26,6 @@ __revision__ = "$Date: 2014/04/12 $" __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "GPLv3" -import json from bootstrap import db from datetime import datetime from sqlalchemy import asc, desc @@ -66,10 +65,9 @@ class Article(db.Model): .order_by(asc("Article.date")).first() def __repr__(self): - return json.dumps({"title": self.title, - "link": self.link, - "content": self.content - }) + return "" % (self.id, self.entry_id, + self.title, self.date, self.retrieved_date) def dump(self): return {"id": self.id, -- cgit From 8541cbde854fc129cd4009d443b547eac59ffbee Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Tue, 21 Jul 2015 12:35:06 +0200 Subject: adding a no change return policy --- pyaggr3g470r/views/feed.py | 10 ++++++---- pyaggr3g470r/views/views.py | 31 ++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/pyaggr3g470r/views/feed.py b/pyaggr3g470r/views/feed.py index 11f549e6..99986fe7 100644 --- a/pyaggr3g470r/views/feed.py +++ b/pyaggr3g470r/views/feed.py @@ -105,13 +105,15 @@ def bookmarklet(): try: feed = construct_feed_from(url) - except requests.exceptions.ConnectionError as e: - flash(gettext("Impossible to connect to the address: {}.".format(url)), "danger") + except requests.exceptions.ConnectionError: + flash(gettext("Impossible to connect to the address: {}.".format(url)), + "danger") return redirect(url_for('home')) if not feed.get('link'): feed['enabled'] = False flash(gettext("Couldn't find a feed url, you'll need to find a Atom or" - " RSS link manually and reactivate this feed"), 'warning') + " RSS link manually and reactivate this feed"), + 'warning') feed = feed_contr.create(**feed) flash(gettext('Feed was successfully created.'), 'success') if feed.enabled and conf.CRAWLING_METHOD == "classic": @@ -202,7 +204,7 @@ def icon(feed_id): icon = FeedController(None if g.user.is_admin() else g.user.id)\ .get(id=feed_id).icon etag = md5(icon.encode('utf8')).hexdigest() - headers = {'Cache-Control': 'max-age=86400', 'ETag': etag} + headers = {'Cache-Control': 'max-age=86400', 'etag': etag} if request.headers.get('if-none-match') == etag: return Response(status=304, headers=headers) return Response(base64.b64decode(icon), mimetype='image', headers=headers) diff --git a/pyaggr3g470r/views/views.py b/pyaggr3g470r/views/views.py index 189fd53f..2aeaaa20 100644 --- a/pyaggr3g470r/views/views.py +++ b/pyaggr3g470r/views/views.py @@ -30,11 +30,11 @@ import os import string import random import hashlib +import logging import datetime -from collections import namedtuple from bootstrap import application as app, db from flask import render_template, request, flash, session, \ - url_for, redirect, g, current_app, make_response + url_for, redirect, g, current_app, make_response, Response from flask.ext.login import LoginManager, login_user, logout_user, \ login_required, current_user, AnonymousUserMixin from flask.ext.principal import Principal, Identity, AnonymousIdentity, \ @@ -46,12 +46,13 @@ from sqlalchemy.exc import IntegrityError from werkzeug import generate_password_hash import conf +from pyaggr3g470r.lib.utils import to_hash from pyaggr3g470r import utils, notifications, export from pyaggr3g470r.models import User, Feed, Article, Role from pyaggr3g470r.decorators import feed_access_required -from pyaggr3g470r.forms import SignupForm, SigninForm, \ - ProfileForm, UserForm, RecoverPasswordForm, \ - AddFeedForm, InformationMessageForm +from pyaggr3g470r.forms import SignupForm, SigninForm, InformationMessageForm,\ + ProfileForm, UserForm, RecoverPasswordForm \ + from pyaggr3g470r.controllers import UserController, FeedController, \ ArticleController @@ -62,6 +63,7 @@ admin_permission = Permission(RoleNeed('admin')) login_manager = LoginManager() login_manager.init_app(app) +logger = logging.getLogger(__name__) # # Management of the user's session. @@ -235,7 +237,6 @@ def render_home(filters=None, head_titles=None, arti_contr = ArticleController(g.user.id) feeds = {feed.id: feed.title for feed in feed_contr.read()} - unread = arti_contr.count_by_feed(readed=False) in_error = {feed.id: feed.error_count for feed in feed_contr.read(error_count__gt=2)} @@ -282,11 +283,19 @@ def render_home(filters=None, head_titles=None, and filter_ != 'all' and not articles: return redirect(gen_url(filter_='all')) - return render_template('home.html', page_to_render=page_to_render, - gen_url=gen_url, feed_id=feed_id, - filter_=filter_, limit=limit, feeds=feeds, - unread=unread, articles=articles, in_error=in_error, - head_titles=head_titles, sort_=sort_, **kwargs) + etag = to_hash("".join([str(filters[key]) for key in sorted(filters)]) + + "".join([str(art.id) for art in articles])) + if request.headers.get('if-none-match') == etag: + return Response(status=304, headers={'etag': etag, + 'Cache-Control': 'pragma: no-cache'}) + response = make_response(render_template('home.html', gen_url=gen_url, + feed_id=feed_id, page_to_render=page_to_render, + filter_=filter_, limit=limit, feeds=feeds, + unread=arti_contr.count_by_feed(readed=False), + articles=articles, in_error=in_error, + head_titles=head_titles, sort_=sort_, **kwargs)) + response.headers['etag'] = etag + return response @app.route('/') -- cgit From 92289b32248f4568579edfd5a301e571ade0c284 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Tue, 21 Jul 2015 14:44:11 +0200 Subject: fetching mimetypes with images --- pyaggr3g470r/lib/feed_utils.py | 9 +++++---- pyaggr3g470r/lib/utils.py | 9 ++++++--- pyaggr3g470r/views/feed.py | 7 ++++++- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pyaggr3g470r/lib/feed_utils.py b/pyaggr3g470r/lib/feed_utils.py index 367fd4b5..28123f66 100644 --- a/pyaggr3g470r/lib/feed_utils.py +++ b/pyaggr3g470r/lib/feed_utils.py @@ -4,7 +4,7 @@ import requests import feedparser from bs4 import BeautifulSoup, SoupStrainer -from pyaggr3g470r.lib.utils import try_keys, try_splits, rebuild_url +from pyaggr3g470r.lib.utils import try_keys, try_get_b64icon, rebuild_url logger = logging.getLogger(__name__) @@ -38,7 +38,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): site_split = urllib.parse.urlsplit(feed['site_link']) if feed.get('icon'): - feed['icon'] = try_splits(feed['icon'], site_split, feed_split) + feed['icon'] = try_get_b64icon(feed['icon'], site_split, feed_split) if feed['icon'] is None: del feed['icon'] @@ -72,13 +72,14 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): icons = bs_parsed.find_all(check_keys(rel=['icon'])) if len(icons) >= 1: for icon in icons: - feed['icon'] = try_splits(icon.attrs['href'], + feed['icon'] = try_get_b64icon(icon.attrs['href'], site_split, feed_split) if feed['icon'] is not None: break if feed.get('icon') is None: - feed['icon'] = try_splits('/favicon.ico', site_split, feed_split) + feed['icon'] = try_get_b64icon('/favicon.ico', + site_split, feed_split) if 'icon' in feed and feed['icon'] is None: del feed['icon'] diff --git a/pyaggr3g470r/lib/utils.py b/pyaggr3g470r/lib/utils.py index 62284de1..a51b6c3e 100644 --- a/pyaggr3g470r/lib/utils.py +++ b/pyaggr3g470r/lib/utils.py @@ -40,14 +40,17 @@ def rebuild_url(url, base_split): return urllib.parse.urlunsplit(new_split) -def try_splits(url, *splits): +def try_get_b64icon(url, *splits): for split in splits: if split is None: continue rb_url = rebuild_url(url, split) response = requests.get(rb_url, verify=False, timeout=10) - if response.ok and 'html' not in response.headers['content-type']: - return base64.b64encode(response.content).decode('utf8') + # if html in content-type, we assume it's a fancy 404 page + content_type = response.headers.get('content-type', '') + if response.ok and 'html' not in content_type: + return content_type + ( + '\n%s' % base64.b64encode(response.content).decode('utf8')) return None diff --git a/pyaggr3g470r/views/feed.py b/pyaggr3g470r/views/feed.py index 99986fe7..1a3ad938 100644 --- a/pyaggr3g470r/views/feed.py +++ b/pyaggr3g470r/views/feed.py @@ -207,4 +207,9 @@ def icon(feed_id): headers = {'Cache-Control': 'max-age=86400', 'etag': etag} if request.headers.get('if-none-match') == etag: return Response(status=304, headers=headers) - return Response(base64.b64decode(icon), mimetype='image', headers=headers) + if '\n' in icon: + content_type, icon = icon.split() + headers['content-type'] = content_type + else: + headers['content-type'] = 'application/image' + return Response(base64.b64decode(icon), headers=headers) -- cgit From cbd1d29bc4796410223897a3269ba470943bf16f Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Tue, 21 Jul 2015 15:38:12 +0200 Subject: making gen_url get args order predictable so cache generation works --- pyaggr3g470r/views/views.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/pyaggr3g470r/views/views.py b/pyaggr3g470r/views/views.py index 2aeaaa20..0071f887 100644 --- a/pyaggr3g470r/views/views.py +++ b/pyaggr3g470r/views/views.py @@ -32,6 +32,8 @@ import random import hashlib import logging import datetime +from collections import OrderedDict + from bootstrap import application as app, db from flask import render_template, request, flash, session, \ url_for, redirect, g, current_app, make_response, Response @@ -268,15 +270,22 @@ def render_home(filters=None, head_titles=None, def gen_url(filter_=filter_, sort_=sort_, limit=limit, feed_id=feed_id, **kwargs): + o_kwargs = OrderedDict() + for key in sorted(kwargs): + o_kwargs[key] = kwargs[key] if page_to_render == 'search': - kwargs['query'] = request.args.get('query', '') - kwargs['search_title'] = request.args.get('search_title', 'off') - kwargs['search_content'] = request.args.get( + o_kwargs['query'] = request.args.get('query', '') + o_kwargs['search_title'] = request.args.get('search_title', 'off') + o_kwargs['search_content'] = request.args.get( 'search_content', 'off') - if kwargs['search_title'] == kwargs['search_content'] == 'off': - kwargs['search_title'] = 'on' - return url_for(page_to_render, filter_=filter_, sort_=sort_, - limit=limit, feed_id=feed_id, **kwargs) + # if nor title and content are selected, selecting title + if o_kwargs['search_title'] == o_kwargs['search_content'] == 'off': + o_kwargs['search_title'] = 'on' + o_kwargs['filter_'] = filter_ + o_kwargs['sort_'] = sort_ + o_kwargs['limit'] = limit + o_kwargs['feed_id'] = feed_id + return url_for(page_to_render, **o_kwargs) articles = list(articles) if (page_to_render == 'home' and feed_id or page_to_render == 'search') \ -- cgit From d54d24eaced0d6965351ad289ef4d4f00980c8b3 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Tue, 21 Jul 2015 16:10:14 +0200 Subject: fixing overlapping title on small screens --- pyaggr3g470r/templates/layout.html | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pyaggr3g470r/templates/layout.html b/pyaggr3g470r/templates/layout.html index c2c0c07e..93d36096 100644 --- a/pyaggr3g470r/templates/layout.html +++ b/pyaggr3g470r/templates/layout.html @@ -27,10 +27,9 @@ pyAggr3g470r {% if head_titles %} - {% for head_title in head_titles %} - - - {{ head_title }} - {% endfor %} + {% endif %} -- cgit From 9dd55f2eb942c7afa03935e9a18d19b028b392bc Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Tue, 21 Jul 2015 16:47:34 +0200 Subject: improving article listing : default icon and title for article link --- pyaggr3g470r/templates/home.html | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pyaggr3g470r/templates/home.html b/pyaggr3g470r/templates/home.html index 42a5d498..1577bd32 100644 --- a/pyaggr3g470r/templates/home.html +++ b/pyaggr3g470r/templates/home.html @@ -105,11 +105,16 @@ {% if filter_ == 'all' %}{% endif %} {% endif %} - - {% if article.source.icon %}{% endif %} - + + + {% if article.source.icon %} + + {% else %} + + {% endif %} + - {{ article.title|safe }} + {{ article.title|safe }} {{ article.date|datetime }} -- cgit