From 3b3f188e1558987a8a14dc303912e208f77c7b60 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Fri, 31 Jul 2015 13:15:16 +0200 Subject: adding munin probes --- conf.py | 4 +++ conf/conf.cfg-sample | 2 ++ manager.py | 10 ++++-- pyaggr3g470r/controllers/article.py | 6 ++++ pyaggr3g470r/views/api/feed.py | 3 +- scripts/__init__.py | 0 scripts/probes.py | 72 +++++++++++++++++++++++++++++++++++++ 7 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 scripts/__init__.py create mode 100644 scripts/probes.py diff --git a/conf.py b/conf.py index 5704ab83..73e83eca 100644 --- a/conf.py +++ b/conf.py @@ -28,6 +28,8 @@ DEFAULTS = {"platform_url": "https://pyaggr3g470r.herokuapp.com/", "recaptcha_public_key": "", "recaptcha_private_key": "", "nb_worker": "100", + "api_login": "", + "api_passwd": "", "default_max_error": "3", "log_path": "pyaggr3g470r.log", "log_level": "info", @@ -79,6 +81,8 @@ RECAPTCHA_PRIVATE_KEY = config.get('misc', 'recaptcha_private_key') LOG_PATH = config.get('misc', 'log_path') NB_WORKER = config.getint('misc', 'nb_worker') +API_LOGIN = config.get('crawler', 'api_login') +API_PASSWD = config.get('crawler', 'api_passwd') WHOOSH_ENABLED = True diff --git a/conf/conf.cfg-sample b/conf/conf.cfg-sample index 2f30b04a..acd244c8 100644 --- a/conf/conf.cfg-sample +++ b/conf/conf.cfg-sample @@ -18,6 +18,8 @@ default_max_error = 6 http_proxy = user_agent = pyAggr3g470r (https://bitbucket.org/cedricbonhomme/pyaggr3g470r) resolve_article_url = false +api_login = +api_passwd = [notification] notification_email = pyAggr3g470r@no-reply.com host = smtp.googlemail.com diff --git a/manager.py b/manager.py index 1c038dd6..272dfc2e 100755 --- a/manager.py +++ b/manager.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -from bootstrap import application, db, populate_g +from bootstrap import application, db, populate_g, conf from flask.ext.script import Manager from flask.ext.migrate import Migrate, MigrateCommand @@ -27,10 +27,10 @@ def db_create(): pyaggr3g470r.models.db_create(db) @manager.command -def fetch(user, password, limit=100, retreive_all=False): +def fetch(limit=100, retreive_all=False): "Crawl the feeds with the client crawler." from pyaggr3g470r.lib.crawler import CrawlerScheduler - scheduler = CrawlerScheduler(user, password) + scheduler = CrawlerScheduler(conf.API_LOGIN, conf.API_PASSWD) scheduler.run(limit=limit, retreive_all=retreive_all) scheduler.wait() @@ -66,5 +66,9 @@ def fetch_asyncio(user_id, feed_id): feed_getter = crawler.retrieve_feed(loop, g.user, feed_id) loop.close() +from scripts.probes import ArticleProbe, FeedProbe +manager.add_command('probe_articles', ArticleProbe()) +manager.add_command('probe_feeds', FeedProbe()) + if __name__ == '__main__': manager.run() diff --git a/pyaggr3g470r/controllers/article.py b/pyaggr3g470r/controllers/article.py index 70b9d2dd..21b4b5e7 100644 --- a/pyaggr3g470r/controllers/article.py +++ b/pyaggr3g470r/controllers/article.py @@ -33,6 +33,12 @@ class ArticleController(AbstractController): .filter(*self._to_filters(**filters)) .group_by(Article.feed_id).all()) + def count_by_user_id(self, **filters): + return dict(db.session.query(Article.user_id, + func.count(Article.id)) + .filter(*self._to_filters(**filters)) + .group_by(Article.user_id).all()) + def create(self, **attrs): # handling special denorm for article rights assert 'feed_id' in attrs diff --git a/pyaggr3g470r/views/api/feed.py b/pyaggr3g470r/views/api/feed.py index 530f3fef..ae2cd735 100644 --- a/pyaggr3g470r/views/api/feed.py +++ b/pyaggr3g470r/views/api/feed.py @@ -3,6 +3,7 @@ from flask import g +import conf from pyaggr3g470r.controllers.feed import (FeedController, DEFAULT_MAX_ERROR, DEFAULT_LIMIT, @@ -54,7 +55,7 @@ class FetchableFeedAPI(PyAggAbstractResource): if g.user.refresh_rate: args['refresh_rate'] = g.user.refresh_rate - if args.pop('retreive_all'): + if args.pop('retreive_all', False): contr = self.wider_controller else: contr = self.controller diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/probes.py b/scripts/probes.py new file mode 100644 index 00000000..bfad4e6e --- /dev/null +++ b/scripts/probes.py @@ -0,0 +1,72 @@ +#!/usr/bin/python3 +import sys +from datetime import datetime, timedelta +from flask.ext.script import Command, Option + +from pyaggr3g470r.controllers \ + import UserController, FeedController, ArticleController +DEFAULT_HEADERS = {'Content-Type': 'application/json', 'User-Agent': 'munin'} +LATE_AFTER = 60 +FETCH_RATE = 3 + + +class AbstractMuninPlugin(Command): + urn = None + + def execute(self): + raise NotImplementedError() + + def config(self): + raise NotImplementedError() + + def get_options(self): + if sys.argv[-1] == 'config': + return [Option(dest='config', default=sys.argv[-1] == 'config')] + return [] + + def run(self, config=False): + if config: + self.config() + else: + self.execute() + + +class FeedProbe(AbstractMuninPlugin): + + def config(self): + print("graph_title PyAgg - Feeds counts") + print("graph_vlabel feeds") + print("feeds.label Late feeds") + print("feeds_total.label Total feeds") + print("feeds.warning 15") + print("feeds.critical 30") + print("graph_category web") + print("graph_scale yes") + + def execute(self): + delta = datetime.now() - timedelta(minutes=LATE_AFTER + FETCH_RATE + 1) + + print("feeds.value %d" % len(FeedController().list_late(delta))) + print("feeds_total.value %d" % FeedController().read().count()) + + +class ArticleProbe(AbstractMuninPlugin): + + def config(self): + print("graph_title Pyagg - Articles adding rate") + print("graph_vlabel Articles per sec") + print("articles.label Overall rate") + print("articles.type DERIVE") + print("articles.min 0") + for id_ in sorted(user.id for user in UserController().read()): + print("articles_user_%s.label Rate for user %s" % (id_, id_)) + print("articles_user_%s.type DERIVE" % id_) + print("articles_user_%s.min 0" % id_) + print("graph_category web") + print("graph_scale yes") + + def execute(self): + counts = ArticleController().count_by_user_id() + print("articles.value %s" % sum(counts.values())) + for user, count in counts.items(): + print("articles_user_%s.value %s" % (user, count)) -- cgit From 4ad1b29d831633de1430a683c4ad37873007d34c Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Fri, 31 Jul 2015 13:20:55 +0200 Subject: redoing the etag matching mechanism --- pyaggr3g470r/lib/view_utils.py | 20 ++++++++++++++++++++ pyaggr3g470r/views/article.py | 2 ++ pyaggr3g470r/views/feed.py | 4 ++++ pyaggr3g470r/views/views.py | 26 +++++++++++--------------- 4 files changed, 37 insertions(+), 15 deletions(-) create mode 100644 pyaggr3g470r/lib/view_utils.py diff --git a/pyaggr3g470r/lib/view_utils.py b/pyaggr3g470r/lib/view_utils.py new file mode 100644 index 00000000..fa5e1eec --- /dev/null +++ b/pyaggr3g470r/lib/view_utils.py @@ -0,0 +1,20 @@ +from functools import wraps +from flask import request, Response, make_response +from pyaggr3g470r.lib.utils import to_hash + + +def etag_match(func): + @wraps(func) + def wrapper(*args, **kwargs): + response = func(*args, **kwargs) + if not type(response) is str: + return response + etag = to_hash(response) + if request.headers.get('if-none-match') == etag: + response = Response(status=304, headers={'etag': etag, + 'Cache-Control': 'pragma: no-cache'}) + else: + response = make_response(response) + response.headers['etag'] = etag + return response + return wrapper diff --git a/pyaggr3g470r/views/article.py b/pyaggr3g470r/views/article.py index 6de07ad3..524bf6dd 100644 --- a/pyaggr3g470r/views/article.py +++ b/pyaggr3g470r/views/article.py @@ -4,6 +4,7 @@ from flask import Blueprint, g, render_template, redirect from pyaggr3g470r import controllers, utils +from pyaggr3g470r.lib.view_utils import etag_match from pyaggr3g470r.decorators import pyagg_default_decorator articles_bp = Blueprint('articles', __name__, url_prefix='/articles') @@ -19,6 +20,7 @@ def redirect_to_article(article_id): @article_bp.route('/', methods=['GET']) @pyagg_default_decorator +@etag_match def article(article_id=None): """ Presents the content of an article. diff --git a/pyaggr3g470r/views/feed.py b/pyaggr3g470r/views/feed.py index fb3ea4c7..38c7869c 100644 --- a/pyaggr3g470r/views/feed.py +++ b/pyaggr3g470r/views/feed.py @@ -14,6 +14,7 @@ from flask.ext.login import login_required import conf from pyaggr3g470r import utils +from pyaggr3g470r.lib.view_utils import etag_match from pyaggr3g470r.lib.feed_utils import construct_feed_from from pyaggr3g470r.forms import AddFeedForm from pyaggr3g470r.controllers import FeedController, ArticleController @@ -24,6 +25,7 @@ feed_bp = Blueprint('feed', __name__, url_prefix='/feed') @feeds_bp.route('/', methods=['GET']) @login_required +@etag_match def feeds(): "Lists the subscribed feeds in a table." art_contr = ArticleController(g.user.id) @@ -35,6 +37,7 @@ def feeds(): @feed_bp.route('/', methods=['GET']) @login_required +@etag_match def feed(feed_id=None): "Presents detailed information about a feed." feed = FeedController(g.user.id).get(id=feed_id) @@ -138,6 +141,7 @@ def update(action, feed_id=None): @feed_bp.route('/create', methods=['GET']) @feed_bp.route('/edit/', methods=['GET']) @login_required +@etag_match def form(feed_id=None): action = gettext("Add a feed") head_titles = [action] diff --git a/pyaggr3g470r/views/views.py b/pyaggr3g470r/views/views.py index 0071f887..b649d5c6 100644 --- a/pyaggr3g470r/views/views.py +++ b/pyaggr3g470r/views/views.py @@ -36,7 +36,7 @@ from collections import OrderedDict from bootstrap import application as app, db from flask import render_template, request, flash, session, \ - url_for, redirect, g, current_app, make_response, Response + url_for, redirect, g, current_app, make_response from flask.ext.login import LoginManager, login_user, logout_user, \ login_required, current_user, AnonymousUserMixin from flask.ext.principal import Principal, Identity, AnonymousIdentity, \ @@ -48,8 +48,8 @@ from sqlalchemy.exc import IntegrityError from werkzeug import generate_password_hash import conf -from pyaggr3g470r.lib.utils import to_hash from pyaggr3g470r import utils, notifications, export +from pyaggr3g470r.lib.view_utils import etag_match from pyaggr3g470r.models import User, Feed, Article, Role from pyaggr3g470r.decorators import feed_access_required from pyaggr3g470r.forms import SignupForm, SigninForm, InformationMessageForm,\ @@ -229,6 +229,7 @@ def signup(): return render_template('signup.html', form=form) +@etag_match def render_home(filters=None, head_titles=None, page_to_render='home', **kwargs): if filters is None: @@ -292,19 +293,12 @@ def render_home(filters=None, head_titles=None, and filter_ != 'all' and not articles: return redirect(gen_url(filter_='all')) - etag = to_hash("".join([str(filters[key]) for key in sorted(filters)]) - + "".join([str(art.id) for art in articles])) - if request.headers.get('if-none-match') == etag: - return Response(status=304, headers={'etag': etag, - 'Cache-Control': 'pragma: no-cache'}) - response = make_response(render_template('home.html', gen_url=gen_url, - feed_id=feed_id, page_to_render=page_to_render, - filter_=filter_, limit=limit, feeds=feeds, - unread=arti_contr.count_by_feed(readed=False), - articles=articles, in_error=in_error, - head_titles=head_titles, sort_=sort_, **kwargs)) - response.headers['etag'] = etag - return response + return render_template('home.html', gen_url=gen_url, + feed_id=feed_id, page_to_render=page_to_render, + filter_=filter_, limit=limit, feeds=feeds, + unread=arti_contr.count_by_feed(readed=False), + articles=articles, in_error=in_error, + head_titles=head_titles, sort_=sort_, **kwargs) @app.route('/') @@ -362,7 +356,9 @@ def fetch(feed_id=None): "for administrator, on the Heroku platform."), "info") return redirect(redirect_url()) + @app.route('/about', methods=['GET']) +@etag_match def about(): """ 'About' page. -- cgit From 60052ffca30ec33b79eb36b0fe4d49b338f73ca1 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Fri, 31 Jul 2015 13:21:33 +0200 Subject: ensuring the icon isn't empty and redoing a bit of logging --- pyaggr3g470r/lib/crawler.py | 7 ++++--- pyaggr3g470r/lib/utils.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index e5998776..e4dc5955 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -125,7 +125,7 @@ class PyAggUpdater(AbstractCrawler): entry = construct_article( self.entries[tuple(sorted(id_to_create.items()))], self.feed) - logger.warn('%r %r - creating %r for %r - %r', self.feed['id'], + logger.info('%r %r - creating %r for %r - %r', self.feed['id'], self.feed['title'], entry['title'], entry['user_id'], id_to_create) self.query_pyagg('post', 'article', entry) @@ -152,11 +152,12 @@ class PyAggUpdater(AbstractCrawler): up_feed['last_retrieved'] \ = (datetime.now() - timedelta(minutes=45)).isoformat() - logger.info('%r %r - pushing feed attrs %r', + if any([up_feed[key] != self.feed.get(key) for key in up_feed]): + logger.warn('%r %r - pushing feed attrs %r', self.feed['id'], self.feed['title'], {key: "%s -> %s" % (up_feed[key], self.feed.get(key)) for key in up_feed if up_feed[key] != self.feed.get(key)}) - if any([up_feed[key] != self.feed.get(key) for key in up_feed]): + future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], up_feed) future.add_done_callback(self.get_counter_callback()) diff --git a/pyaggr3g470r/lib/utils.py b/pyaggr3g470r/lib/utils.py index a51b6c3e..b937b5a9 100644 --- a/pyaggr3g470r/lib/utils.py +++ b/pyaggr3g470r/lib/utils.py @@ -48,7 +48,7 @@ def try_get_b64icon(url, *splits): response = requests.get(rb_url, verify=False, timeout=10) # if html in content-type, we assume it's a fancy 404 page content_type = response.headers.get('content-type', '') - if response.ok and 'html' not in content_type: + if response.ok and 'html' not in content_type and response.content: return content_type + ( '\n%s' % base64.b64encode(response.content).decode('utf8')) return None -- cgit From 0caffceec8b58bc3f78c0d8ea36d2f7e9da668ec Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Mon, 3 Aug 2015 14:36:13 +0200 Subject: sqlalchemy was requesting icons everytime feed where listed so i choosed to move the icons into their own table --- migrations/versions/19bdaa6208e_add_icon_column.py | 2 +- ...ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py | 36 ++++++++++++++++++++++ pyaggr3g470r/controllers/__init__.py | 4 ++- pyaggr3g470r/controllers/abstract.py | 10 ++++-- pyaggr3g470r/controllers/feed.py | 19 ++++++++++++ pyaggr3g470r/controllers/icon.py | 23 ++++++++++++++ pyaggr3g470r/lib/crawler.py | 2 +- pyaggr3g470r/lib/feed_utils.py | 33 ++++++++++---------- pyaggr3g470r/lib/utils.py | 9 +++--- pyaggr3g470r/lib/view_utils.py | 18 +++++++---- pyaggr3g470r/models/__init__.py | 3 +- pyaggr3g470r/models/feed.py | 8 ++--- pyaggr3g470r/models/icon.py | 7 +++++ pyaggr3g470r/templates/feeds.html | 2 +- pyaggr3g470r/templates/home.html | 4 +-- pyaggr3g470r/views/__init__.py | 1 + pyaggr3g470r/views/api/feed.py | 3 +- pyaggr3g470r/views/feed.py | 17 ---------- pyaggr3g470r/views/icon.py | 14 +++++++++ runserver.py | 1 + scripts/probes.py | 4 ++- 21 files changed, 159 insertions(+), 61 deletions(-) create mode 100644 migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py create mode 100644 pyaggr3g470r/controllers/icon.py create mode 100644 pyaggr3g470r/models/icon.py create mode 100644 pyaggr3g470r/views/icon.py diff --git a/migrations/versions/19bdaa6208e_add_icon_column.py b/migrations/versions/19bdaa6208e_add_icon_column.py index 5762d6d7..2efa376f 100644 --- a/migrations/versions/19bdaa6208e_add_icon_column.py +++ b/migrations/versions/19bdaa6208e_add_icon_column.py @@ -1,4 +1,4 @@ -"""empty message +"""adding icon columns Revision ID: 19bdaa6208e Revises: 422da2d0234 diff --git a/migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py b/migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py new file mode 100644 index 00000000..2c8eeda5 --- /dev/null +++ b/migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py @@ -0,0 +1,36 @@ +"""moving icons to their own table + +Revision ID: 25ca960a207 +Revises: 19bdaa6208e +Create Date: 2015-08-03 14:36:21.626411 + +""" + +# revision identifiers, used by Alembic. +revision = '25ca960a207' +down_revision = '19bdaa6208e' + +from alembic import op +import sqlalchemy as sa + +import conf + + +def upgrade(): + op.create_table('icon', + sa.Column('url', sa.String(), nullable=False), + sa.Column('content', sa.String(), nullable=True), + sa.Column('mimetype', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('url')) + op.add_column('feed', sa.Column('icon_url', sa.String(), nullable=True)) + if 'sqlite' not in conf.SQLALCHEMY_DATABASE_URI: + op.create_foreign_key(None, 'feed', 'icon', ['icon_url'], ['url']) + op.drop_column('feed', 'icon') + + +def downgrade(): + op.add_column('feed', sa.Column('icon', sa.VARCHAR(), nullable=True)) + if 'sqlite' not in conf.SQLALCHEMY_DATABASE_URI: + op.drop_constraint(None, 'feed', type_='foreignkey') + op.drop_column('feed', 'icon_url') + op.drop_table('icon') diff --git a/pyaggr3g470r/controllers/__init__.py b/pyaggr3g470r/controllers/__init__.py index d8d1a104..ad77fa1d 100644 --- a/pyaggr3g470r/controllers/__init__.py +++ b/pyaggr3g470r/controllers/__init__.py @@ -1,6 +1,8 @@ from .feed import FeedController from .article import ArticleController from .user import UserController +from .icon import IconController -__all__ = ['FeedController', 'ArticleController', 'UserController'] +__all__ = ['FeedController', 'ArticleController', 'UserController', + 'IconController'] diff --git a/pyaggr3g470r/controllers/abstract.py b/pyaggr3g470r/controllers/abstract.py index 281e1415..f33d241e 100644 --- a/pyaggr3g470r/controllers/abstract.py +++ b/pyaggr3g470r/controllers/abstract.py @@ -65,7 +65,8 @@ class AbstractController(object): dependant) and the user is not an admin and the filters doesn't already contains a filter for that user. """ - if self.user_id and filters.get(self._user_id_key) != self.user_id: + if self._user_id_key is not None and self.user_id \ + and filters.get(self._user_id_key) != self.user_id: filters[self._user_id_key] = self.user_id return self._db_cls.query.filter(*self._to_filters(**filters)) @@ -82,10 +83,11 @@ class AbstractController(object): return obj def create(self, **attrs): - assert self._user_id_key in attrs or self.user_id is not None, \ + assert self._user_id_key is None or self._user_id_key in attrs \ + or self.user_id is not None, \ "You must provide user_id one way or another" - if self._user_id_key not in attrs: + if self._user_id_key is not None and self._user_id_key not in attrs: attrs[self._user_id_key] = self.user_id obj = self._db_cls(**attrs) db.session.add(obj) @@ -108,5 +110,7 @@ class AbstractController(object): def _has_right_on(self, obj): # user_id == None is like being admin + if self._user_id_key is None: + return True return self.user_id is None \ or getattr(obj, self._user_id_key, None) == self.user_id diff --git a/pyaggr3g470r/controllers/feed.py b/pyaggr3g470r/controllers/feed.py index 82714e39..6b3c4fb5 100644 --- a/pyaggr3g470r/controllers/feed.py +++ b/pyaggr3g470r/controllers/feed.py @@ -21,9 +21,11 @@ import logging from datetime import datetime, timedelta +from werkzeug.exceptions import NotFound import conf from .abstract import AbstractController +from .icon import IconController from pyaggr3g470r.models import Feed logger = logging.getLogger(__name__) @@ -52,3 +54,20 @@ class FeedController(AbstractController): self.update({'id__in': [feed.id for feed in feeds]}, {'last_retrieved': now}) return feeds + + def _ensure_icon(self, attrs): + if not attrs.get('icon_url'): + return + icon_contr = IconController() + try: + icon_contr.get(url=attrs['icon_url']) + except NotFound: + icon_contr.create(**{'url': attrs['icon_url']}) + + def create(self, **attrs): + self._ensure_icon(attrs) + return super().create(**attrs) + + def update(self, filters, attrs): + self._ensure_icon(attrs) + return super().update(filters, attrs) diff --git a/pyaggr3g470r/controllers/icon.py b/pyaggr3g470r/controllers/icon.py new file mode 100644 index 00000000..194c601c --- /dev/null +++ b/pyaggr3g470r/controllers/icon.py @@ -0,0 +1,23 @@ +import base64 +import requests +from pyaggr3g470r.models import Icon +from .abstract import AbstractController + + +class IconController(AbstractController): + _db_cls = Icon + _user_id_key = None + + def _build_from_url(self, attrs): + if 'url' in attrs and 'content' not in attrs: + resp = requests.get(attrs['url'], verify=False) + attrs.update({'url': resp.url, + 'mimetype': resp.headers.get('content-type', None), + 'content': base64.b64encode(resp.content).decode('utf8')}) + return attrs + + def create(self, **attrs): + return super().create(**self._build_from_url(attrs)) + + def update(self, filters, attrs): + return super().update(filters, self._build_from_url(attrs)) diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index e4dc5955..216e7a96 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -141,7 +141,7 @@ class PyAggUpdater(AbstractCrawler): strftime('%a, %d %b %Y %X %Z', gmtime()))} fresh_feed = construct_feed_from(url=self.feed['link'], fp_parsed=self.parsed_feed) - for key in ('description', 'site_link', 'icon'): + for key in ('description', 'site_link', 'icon_url'): if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key): up_feed[key] = fresh_feed[key] if not self.feed.get('title'): diff --git a/pyaggr3g470r/lib/feed_utils.py b/pyaggr3g470r/lib/feed_utils.py index 28123f66..aa9db29c 100644 --- a/pyaggr3g470r/lib/feed_utils.py +++ b/pyaggr3g470r/lib/feed_utils.py @@ -4,7 +4,7 @@ import requests import feedparser from bs4 import BeautifulSoup, SoupStrainer -from pyaggr3g470r.lib.utils import try_keys, try_get_b64icon, rebuild_url +from pyaggr3g470r.lib.utils import try_keys, try_get_icon_url, rebuild_url logger = logging.getLogger(__name__) @@ -29,7 +29,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): feed['site_link'] = try_keys(fp_parsed['feed'], 'href', 'link') feed['title'] = fp_parsed['feed'].get('title') feed['description'] = try_keys(fp_parsed['feed'], 'subtitle', 'title') - feed['icon'] = try_keys(fp_parsed['feed'], 'icon') + feed['icon_url'] = try_keys(fp_parsed['feed'], 'icon') else: feed['site_link'] = url @@ -37,13 +37,14 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): feed['site_link'] = rebuild_url(feed['site_link'], feed_split) site_split = urllib.parse.urlsplit(feed['site_link']) - if feed.get('icon'): - feed['icon'] = try_get_b64icon(feed['icon'], site_split, feed_split) - if feed['icon'] is None: - del feed['icon'] + if feed.get('icon_url'): + feed['icon_url'] = try_get_icon_url( + feed['icon_url'], site_split, feed_split) + if feed['icon_url'] is None: + del feed['icon_url'] if not feed.get('site_link') or not query_site \ - or all(bool(feed.get(key)) for key in ('link', 'title', 'icon')): + or all(bool(feed.get(k)) for k in ('link', 'title', 'icon_url')): return feed response = requests.get(feed['site_link'], verify=False) @@ -66,22 +67,22 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): return True return wrapper - if not feed.get('icon'): + if not feed.get('icon_url'): icons = bs_parsed.find_all(check_keys(rel=['icon', 'shortcut'])) if not len(icons): icons = bs_parsed.find_all(check_keys(rel=['icon'])) if len(icons) >= 1: for icon in icons: - feed['icon'] = try_get_b64icon(icon.attrs['href'], - site_split, feed_split) - if feed['icon'] is not None: + feed['icon_url'] = try_get_icon_url(icon.attrs['href'], + site_split, feed_split) + if feed['icon_url'] is not None: break - if feed.get('icon') is None: - feed['icon'] = try_get_b64icon('/favicon.ico', - site_split, feed_split) - if 'icon' in feed and feed['icon'] is None: - del feed['icon'] + if feed.get('icon_url') is None: + feed['icon_url'] = try_get_icon_url('/favicon.ico', + site_split, feed_split) + if 'icon_url' in feed and feed['icon_url'] is None: + del feed['icon_url'] if not feed.get('link'): alternates = bs_parsed.find_all(check_keys(rel=['alternate'], diff --git a/pyaggr3g470r/lib/utils.py b/pyaggr3g470r/lib/utils.py index b937b5a9..aa552a12 100644 --- a/pyaggr3g470r/lib/utils.py +++ b/pyaggr3g470r/lib/utils.py @@ -1,6 +1,5 @@ import types import urllib -import base64 import logging import requests from hashlib import md5 @@ -40,7 +39,7 @@ def rebuild_url(url, base_split): return urllib.parse.urlunsplit(new_split) -def try_get_b64icon(url, *splits): +def try_get_icon_url(url, *splits): for split in splits: if split is None: continue @@ -49,10 +48,10 @@ def try_get_b64icon(url, *splits): # if html in content-type, we assume it's a fancy 404 page content_type = response.headers.get('content-type', '') if response.ok and 'html' not in content_type and response.content: - return content_type + ( - '\n%s' % base64.b64encode(response.content).decode('utf8')) + return response.url return None def to_hash(text): - return md5(text.encode('utf8')).hexdigest() + return md5(text.encode('utf8') if hasattr(text, 'encode') else text)\ + .hexdigest() diff --git a/pyaggr3g470r/lib/view_utils.py b/pyaggr3g470r/lib/view_utils.py index fa5e1eec..0cfe62c4 100644 --- a/pyaggr3g470r/lib/view_utils.py +++ b/pyaggr3g470r/lib/view_utils.py @@ -7,14 +7,20 @@ def etag_match(func): @wraps(func) def wrapper(*args, **kwargs): response = func(*args, **kwargs) - if not type(response) is str: + if isinstance(response, Response): + etag = to_hash(response.data) + headers = response.headers + elif type(response) is str: + etag = to_hash(response) + headers = {} + else: return response - etag = to_hash(response) if request.headers.get('if-none-match') == etag: - response = Response(status=304, headers={'etag': etag, - 'Cache-Control': 'pragma: no-cache'}) - else: + response = Response(status=304) + response.headers['Cache-Control'] \ + = headers.get('Cache-Control', 'pragma: no-cache') + elif not isinstance(response, Response): response = make_response(response) - response.headers['etag'] = etag + response.headers['etag'] = etag return response return wrapper diff --git a/pyaggr3g470r/models/__init__.py b/pyaggr3g470r/models/__init__.py index ba52b0de..b578094c 100644 --- a/pyaggr3g470r/models/__init__.py +++ b/pyaggr3g470r/models/__init__.py @@ -30,8 +30,9 @@ from .feed import Feed from .role import Role from .user import User from .article import Article +from .icon import Icon -__all__ = ['Feed', 'Role', 'User', 'Article'] +__all__ = ['Feed', 'Role', 'User', 'Article', 'Icon'] import os diff --git a/pyaggr3g470r/models/feed.py b/pyaggr3g470r/models/feed.py index 75e55df1..07b0fc99 100644 --- a/pyaggr3g470r/models/feed.py +++ b/pyaggr3g470r/models/feed.py @@ -35,7 +35,7 @@ class Feed(db.Model): """ Represent a feed. """ - id = db.Column(db.Integer, primary_key=True) + id = db.Column(db.Integer(), primary_key=True) title = db.Column(db.String(), default="") description = db.Column(db.String(), default="FR") link = db.Column(db.String()) @@ -43,7 +43,6 @@ class Feed(db.Model): enabled = db.Column(db.Boolean(), default=True) created_date = db.Column(db.DateTime(), default=datetime.now) filters = db.Column(db.PickleType, default=[]) - icon = db.Column(db.String(), default="") # cache handling etag = db.Column(db.String(), default="") @@ -55,7 +54,8 @@ class Feed(db.Model): error_count = db.Column(db.Integer(), default=0) # relationship - user_id = db.Column(db.Integer, db.ForeignKey('user.id')) + icon_url = db.Column(db.String(), db.ForeignKey('icon.url'), default=None) + user_id = db.Column(db.Integer(), db.ForeignKey('user.id')) articles = db.relationship('Article', backref='source', lazy='dynamic', cascade='all,delete-orphan', order_by=desc("Article.date")) @@ -71,7 +71,7 @@ class Feed(db.Model): "link": self.link, "site_link": self.site_link, "etag": self.etag, - "icon": self.icon, + "icon_url": self.icon_url, "error_count": self.error_count, "last_modified": self.last_modified, "last_retrieved": self.last_retrieved} diff --git a/pyaggr3g470r/models/icon.py b/pyaggr3g470r/models/icon.py new file mode 100644 index 00000000..22ef1164 --- /dev/null +++ b/pyaggr3g470r/models/icon.py @@ -0,0 +1,7 @@ +from bootstrap import db + + +class Icon(db.Model): + url = db.Column(db.String(), primary_key=True) + content = db.Column(db.String(), default=None) + mimetype = db.Column(db.String(), default="application/image") diff --git a/pyaggr3g470r/templates/feeds.html b/pyaggr3g470r/templates/feeds.html index 20e0cccb..82af2411 100644 --- a/pyaggr3g470r/templates/feeds.html +++ b/pyaggr3g470r/templates/feeds.html @@ -29,7 +29,7 @@ {% endif %} - {% if feed.icon %}{% endif %} + {% if feed.icon_url %}{% endif %} {{ feed.title }} {{ feed.site_link }} diff --git a/pyaggr3g470r/templates/home.html b/pyaggr3g470r/templates/home.html index 1577bd32..86d96e94 100644 --- a/pyaggr3g470r/templates/home.html +++ b/pyaggr3g470r/templates/home.html @@ -107,8 +107,8 @@ - {% if article.source.icon %} - + {% if article.source.icon_url %} + {% else %} {% endif %} diff --git a/pyaggr3g470r/views/__init__.py b/pyaggr3g470r/views/__init__.py index 029dcb7d..36d382bd 100644 --- a/pyaggr3g470r/views/__init__.py +++ b/pyaggr3g470r/views/__init__.py @@ -3,3 +3,4 @@ from .api import * from .article import article_bp, articles_bp from .feed import feed_bp, feeds_bp +from .icon import icon_bp diff --git a/pyaggr3g470r/views/api/feed.py b/pyaggr3g470r/views/api/feed.py index ae2cd735..7d8cdf38 100644 --- a/pyaggr3g470r/views/api/feed.py +++ b/pyaggr3g470r/views/api/feed.py @@ -3,7 +3,6 @@ from flask import g -import conf from pyaggr3g470r.controllers.feed import (FeedController, DEFAULT_MAX_ERROR, DEFAULT_LIMIT, @@ -21,7 +20,7 @@ FEED_ATTRS = {'title': {'type': str}, 'site_link': {'type': str}, 'enabled': {'type': bool, 'default': True}, 'etag': {'type': str, 'default': ''}, - 'icon': {'type': str, 'default': ''}, + 'icon_url': {'type': str, 'default': ''}, 'last_modified': {'type': str}, 'last_retrieved': {'type': str}, 'last_error': {'type': str}, diff --git a/pyaggr3g470r/views/feed.py b/pyaggr3g470r/views/feed.py index 38c7869c..afb51903 100644 --- a/pyaggr3g470r/views/feed.py +++ b/pyaggr3g470r/views/feed.py @@ -200,20 +200,3 @@ def process_form(feed_id=None): flash(gettext("Downloading articles for the new feed..."), 'info') return redirect(url_for('feed.form', feed_id=new_feed.id)) - - -@feed_bp.route('/icon/', methods=['GET']) -@login_required -def icon(feed_id): - icon = FeedController(None if g.user.is_admin() else g.user.id)\ - .get(id=feed_id).icon - etag = md5(icon.encode('utf8')).hexdigest() - headers = {'Cache-Control': 'max-age=86400', 'etag': etag} - if request.headers.get('if-none-match') == etag: - return Response(status=304, headers=headers) - if '\n' in icon: - content_type, *_, icon = icon.split() - headers['content-type'] = content_type - else: - headers['content-type'] = 'application/image' - return Response(base64.b64decode(icon), headers=headers) diff --git a/pyaggr3g470r/views/icon.py b/pyaggr3g470r/views/icon.py new file mode 100644 index 00000000..2f51304a --- /dev/null +++ b/pyaggr3g470r/views/icon.py @@ -0,0 +1,14 @@ +import base64 +from flask import Blueprint, Response, request +from pyaggr3g470r.controllers import IconController +from pyaggr3g470r.lib.view_utils import etag_match + +icon_bp = Blueprint('icon', __name__, url_prefix='/icon') + +@icon_bp.route('/', methods=['GET']) +@etag_match +def icon(): + icon = IconController().get(url=request.args['url']) + headers = {'Cache-Control': 'max-age=86400', + 'Content-Type': icon.mimetype} + return Response(base64.b64decode(icon.content), headers=headers) diff --git a/runserver.py b/runserver.py index 5f20ddd4..ccd8bc60 100755 --- a/runserver.py +++ b/runserver.py @@ -51,6 +51,7 @@ with application.app_context(): application.register_blueprint(views.article_bp) application.register_blueprint(views.feeds_bp) application.register_blueprint(views.feed_bp) + application.register_blueprint(views.icon_bp) if __name__ == '__main__': diff --git a/scripts/probes.py b/scripts/probes.py index bfad4e6e..4c632184 100644 --- a/scripts/probes.py +++ b/scripts/probes.py @@ -45,8 +45,10 @@ class FeedProbe(AbstractMuninPlugin): def execute(self): delta = datetime.now() - timedelta(minutes=LATE_AFTER + FETCH_RATE + 1) + total = FeedController().read().count() - print("feeds.value %d" % len(FeedController().list_late(delta))) + print("feeds.value %d" + % len(FeedController().list_late(delta, limit=total))) print("feeds_total.value %d" % FeedController().read().count()) -- cgit