aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrançois Schmidts <francois.schmidts@gmail.com>2015-08-03 14:36:13 +0200
committerFrançois Schmidts <francois.schmidts@gmail.com>2015-08-03 15:50:41 +0200
commit0caffceec8b58bc3f78c0d8ea36d2f7e9da668ec (patch)
tree25ede52ae4b02a2377ae40d2c146c7ed2e9abe2a
parentensuring the icon isn't empty and redoing a bit of logging (diff)
downloadnewspipe-0caffceec8b58bc3f78c0d8ea36d2f7e9da668ec.tar.gz
newspipe-0caffceec8b58bc3f78c0d8ea36d2f7e9da668ec.tar.bz2
newspipe-0caffceec8b58bc3f78c0d8ea36d2f7e9da668ec.zip
sqlalchemy was requesting icons everytime feed where listed
so i choosed to move the icons into their own table
-rw-r--r--migrations/versions/19bdaa6208e_add_icon_column.py2
-rw-r--r--migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py36
-rw-r--r--pyaggr3g470r/controllers/__init__.py4
-rw-r--r--pyaggr3g470r/controllers/abstract.py10
-rw-r--r--pyaggr3g470r/controllers/feed.py19
-rw-r--r--pyaggr3g470r/controllers/icon.py23
-rw-r--r--pyaggr3g470r/lib/crawler.py2
-rw-r--r--pyaggr3g470r/lib/feed_utils.py33
-rw-r--r--pyaggr3g470r/lib/utils.py9
-rw-r--r--pyaggr3g470r/lib/view_utils.py18
-rw-r--r--pyaggr3g470r/models/__init__.py3
-rw-r--r--pyaggr3g470r/models/feed.py8
-rw-r--r--pyaggr3g470r/models/icon.py7
-rw-r--r--pyaggr3g470r/templates/feeds.html2
-rw-r--r--pyaggr3g470r/templates/home.html4
-rw-r--r--pyaggr3g470r/views/__init__.py1
-rw-r--r--pyaggr3g470r/views/api/feed.py3
-rw-r--r--pyaggr3g470r/views/feed.py17
-rw-r--r--pyaggr3g470r/views/icon.py14
-rwxr-xr-xrunserver.py1
-rw-r--r--scripts/probes.py4
21 files changed, 159 insertions, 61 deletions
diff --git a/migrations/versions/19bdaa6208e_add_icon_column.py b/migrations/versions/19bdaa6208e_add_icon_column.py
index 5762d6d7..2efa376f 100644
--- a/migrations/versions/19bdaa6208e_add_icon_column.py
+++ b/migrations/versions/19bdaa6208e_add_icon_column.py
@@ -1,4 +1,4 @@
-"""empty message
+"""adding icon columns
Revision ID: 19bdaa6208e
Revises: 422da2d0234
diff --git a/migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py b/migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py
new file mode 100644
index 00000000..2c8eeda5
--- /dev/null
+++ b/migrations/versions/25ca960a207_mv_icons_from_feed_tbl_to_icon_tbl.py
@@ -0,0 +1,36 @@
+"""moving icons to their own table
+
+Revision ID: 25ca960a207
+Revises: 19bdaa6208e
+Create Date: 2015-08-03 14:36:21.626411
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '25ca960a207'
+down_revision = '19bdaa6208e'
+
+from alembic import op
+import sqlalchemy as sa
+
+import conf
+
+
+def upgrade():
+ op.create_table('icon',
+ sa.Column('url', sa.String(), nullable=False),
+ sa.Column('content', sa.String(), nullable=True),
+ sa.Column('mimetype', sa.String(), nullable=True),
+ sa.PrimaryKeyConstraint('url'))
+ op.add_column('feed', sa.Column('icon_url', sa.String(), nullable=True))
+ if 'sqlite' not in conf.SQLALCHEMY_DATABASE_URI:
+ op.create_foreign_key(None, 'feed', 'icon', ['icon_url'], ['url'])
+ op.drop_column('feed', 'icon')
+
+
+def downgrade():
+ op.add_column('feed', sa.Column('icon', sa.VARCHAR(), nullable=True))
+ if 'sqlite' not in conf.SQLALCHEMY_DATABASE_URI:
+ op.drop_constraint(None, 'feed', type_='foreignkey')
+ op.drop_column('feed', 'icon_url')
+ op.drop_table('icon')
diff --git a/pyaggr3g470r/controllers/__init__.py b/pyaggr3g470r/controllers/__init__.py
index d8d1a104..ad77fa1d 100644
--- a/pyaggr3g470r/controllers/__init__.py
+++ b/pyaggr3g470r/controllers/__init__.py
@@ -1,6 +1,8 @@
from .feed import FeedController
from .article import ArticleController
from .user import UserController
+from .icon import IconController
-__all__ = ['FeedController', 'ArticleController', 'UserController']
+__all__ = ['FeedController', 'ArticleController', 'UserController',
+ 'IconController']
diff --git a/pyaggr3g470r/controllers/abstract.py b/pyaggr3g470r/controllers/abstract.py
index 281e1415..f33d241e 100644
--- a/pyaggr3g470r/controllers/abstract.py
+++ b/pyaggr3g470r/controllers/abstract.py
@@ -65,7 +65,8 @@ class AbstractController(object):
dependant) and the user is not an admin and the filters doesn't already
contains a filter for that user.
"""
- if self.user_id and filters.get(self._user_id_key) != self.user_id:
+ if self._user_id_key is not None and self.user_id \
+ and filters.get(self._user_id_key) != self.user_id:
filters[self._user_id_key] = self.user_id
return self._db_cls.query.filter(*self._to_filters(**filters))
@@ -82,10 +83,11 @@ class AbstractController(object):
return obj
def create(self, **attrs):
- assert self._user_id_key in attrs or self.user_id is not None, \
+ assert self._user_id_key is None or self._user_id_key in attrs \
+ or self.user_id is not None, \
"You must provide user_id one way or another"
- if self._user_id_key not in attrs:
+ if self._user_id_key is not None and self._user_id_key not in attrs:
attrs[self._user_id_key] = self.user_id
obj = self._db_cls(**attrs)
db.session.add(obj)
@@ -108,5 +110,7 @@ class AbstractController(object):
def _has_right_on(self, obj):
# user_id == None is like being admin
+ if self._user_id_key is None:
+ return True
return self.user_id is None \
or getattr(obj, self._user_id_key, None) == self.user_id
diff --git a/pyaggr3g470r/controllers/feed.py b/pyaggr3g470r/controllers/feed.py
index 82714e39..6b3c4fb5 100644
--- a/pyaggr3g470r/controllers/feed.py
+++ b/pyaggr3g470r/controllers/feed.py
@@ -21,9 +21,11 @@
import logging
from datetime import datetime, timedelta
+from werkzeug.exceptions import NotFound
import conf
from .abstract import AbstractController
+from .icon import IconController
from pyaggr3g470r.models import Feed
logger = logging.getLogger(__name__)
@@ -52,3 +54,20 @@ class FeedController(AbstractController):
self.update({'id__in': [feed.id for feed in feeds]},
{'last_retrieved': now})
return feeds
+
+ def _ensure_icon(self, attrs):
+ if not attrs.get('icon_url'):
+ return
+ icon_contr = IconController()
+ try:
+ icon_contr.get(url=attrs['icon_url'])
+ except NotFound:
+ icon_contr.create(**{'url': attrs['icon_url']})
+
+ def create(self, **attrs):
+ self._ensure_icon(attrs)
+ return super().create(**attrs)
+
+ def update(self, filters, attrs):
+ self._ensure_icon(attrs)
+ return super().update(filters, attrs)
diff --git a/pyaggr3g470r/controllers/icon.py b/pyaggr3g470r/controllers/icon.py
new file mode 100644
index 00000000..194c601c
--- /dev/null
+++ b/pyaggr3g470r/controllers/icon.py
@@ -0,0 +1,23 @@
+import base64
+import requests
+from pyaggr3g470r.models import Icon
+from .abstract import AbstractController
+
+
+class IconController(AbstractController):
+ _db_cls = Icon
+ _user_id_key = None
+
+ def _build_from_url(self, attrs):
+ if 'url' in attrs and 'content' not in attrs:
+ resp = requests.get(attrs['url'], verify=False)
+ attrs.update({'url': resp.url,
+ 'mimetype': resp.headers.get('content-type', None),
+ 'content': base64.b64encode(resp.content).decode('utf8')})
+ return attrs
+
+ def create(self, **attrs):
+ return super().create(**self._build_from_url(attrs))
+
+ def update(self, filters, attrs):
+ return super().update(filters, self._build_from_url(attrs))
diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py
index e4dc5955..216e7a96 100644
--- a/pyaggr3g470r/lib/crawler.py
+++ b/pyaggr3g470r/lib/crawler.py
@@ -141,7 +141,7 @@ class PyAggUpdater(AbstractCrawler):
strftime('%a, %d %b %Y %X %Z', gmtime()))}
fresh_feed = construct_feed_from(url=self.feed['link'],
fp_parsed=self.parsed_feed)
- for key in ('description', 'site_link', 'icon'):
+ for key in ('description', 'site_link', 'icon_url'):
if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key):
up_feed[key] = fresh_feed[key]
if not self.feed.get('title'):
diff --git a/pyaggr3g470r/lib/feed_utils.py b/pyaggr3g470r/lib/feed_utils.py
index 28123f66..aa9db29c 100644
--- a/pyaggr3g470r/lib/feed_utils.py
+++ b/pyaggr3g470r/lib/feed_utils.py
@@ -4,7 +4,7 @@ import requests
import feedparser
from bs4 import BeautifulSoup, SoupStrainer
-from pyaggr3g470r.lib.utils import try_keys, try_get_b64icon, rebuild_url
+from pyaggr3g470r.lib.utils import try_keys, try_get_icon_url, rebuild_url
logger = logging.getLogger(__name__)
@@ -29,7 +29,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
feed['site_link'] = try_keys(fp_parsed['feed'], 'href', 'link')
feed['title'] = fp_parsed['feed'].get('title')
feed['description'] = try_keys(fp_parsed['feed'], 'subtitle', 'title')
- feed['icon'] = try_keys(fp_parsed['feed'], 'icon')
+ feed['icon_url'] = try_keys(fp_parsed['feed'], 'icon')
else:
feed['site_link'] = url
@@ -37,13 +37,14 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
feed['site_link'] = rebuild_url(feed['site_link'], feed_split)
site_split = urllib.parse.urlsplit(feed['site_link'])
- if feed.get('icon'):
- feed['icon'] = try_get_b64icon(feed['icon'], site_split, feed_split)
- if feed['icon'] is None:
- del feed['icon']
+ if feed.get('icon_url'):
+ feed['icon_url'] = try_get_icon_url(
+ feed['icon_url'], site_split, feed_split)
+ if feed['icon_url'] is None:
+ del feed['icon_url']
if not feed.get('site_link') or not query_site \
- or all(bool(feed.get(key)) for key in ('link', 'title', 'icon')):
+ or all(bool(feed.get(k)) for k in ('link', 'title', 'icon_url')):
return feed
response = requests.get(feed['site_link'], verify=False)
@@ -66,22 +67,22 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
return True
return wrapper
- if not feed.get('icon'):
+ if not feed.get('icon_url'):
icons = bs_parsed.find_all(check_keys(rel=['icon', 'shortcut']))
if not len(icons):
icons = bs_parsed.find_all(check_keys(rel=['icon']))
if len(icons) >= 1:
for icon in icons:
- feed['icon'] = try_get_b64icon(icon.attrs['href'],
- site_split, feed_split)
- if feed['icon'] is not None:
+ feed['icon_url'] = try_get_icon_url(icon.attrs['href'],
+ site_split, feed_split)
+ if feed['icon_url'] is not None:
break
- if feed.get('icon') is None:
- feed['icon'] = try_get_b64icon('/favicon.ico',
- site_split, feed_split)
- if 'icon' in feed and feed['icon'] is None:
- del feed['icon']
+ if feed.get('icon_url') is None:
+ feed['icon_url'] = try_get_icon_url('/favicon.ico',
+ site_split, feed_split)
+ if 'icon_url' in feed and feed['icon_url'] is None:
+ del feed['icon_url']
if not feed.get('link'):
alternates = bs_parsed.find_all(check_keys(rel=['alternate'],
diff --git a/pyaggr3g470r/lib/utils.py b/pyaggr3g470r/lib/utils.py
index b937b5a9..aa552a12 100644
--- a/pyaggr3g470r/lib/utils.py
+++ b/pyaggr3g470r/lib/utils.py
@@ -1,6 +1,5 @@
import types
import urllib
-import base64
import logging
import requests
from hashlib import md5
@@ -40,7 +39,7 @@ def rebuild_url(url, base_split):
return urllib.parse.urlunsplit(new_split)
-def try_get_b64icon(url, *splits):
+def try_get_icon_url(url, *splits):
for split in splits:
if split is None:
continue
@@ -49,10 +48,10 @@ def try_get_b64icon(url, *splits):
# if html in content-type, we assume it's a fancy 404 page
content_type = response.headers.get('content-type', '')
if response.ok and 'html' not in content_type and response.content:
- return content_type + (
- '\n%s' % base64.b64encode(response.content).decode('utf8'))
+ return response.url
return None
def to_hash(text):
- return md5(text.encode('utf8')).hexdigest()
+ return md5(text.encode('utf8') if hasattr(text, 'encode') else text)\
+ .hexdigest()
diff --git a/pyaggr3g470r/lib/view_utils.py b/pyaggr3g470r/lib/view_utils.py
index fa5e1eec..0cfe62c4 100644
--- a/pyaggr3g470r/lib/view_utils.py
+++ b/pyaggr3g470r/lib/view_utils.py
@@ -7,14 +7,20 @@ def etag_match(func):
@wraps(func)
def wrapper(*args, **kwargs):
response = func(*args, **kwargs)
- if not type(response) is str:
+ if isinstance(response, Response):
+ etag = to_hash(response.data)
+ headers = response.headers
+ elif type(response) is str:
+ etag = to_hash(response)
+ headers = {}
+ else:
return response
- etag = to_hash(response)
if request.headers.get('if-none-match') == etag:
- response = Response(status=304, headers={'etag': etag,
- 'Cache-Control': 'pragma: no-cache'})
- else:
+ response = Response(status=304)
+ response.headers['Cache-Control'] \
+ = headers.get('Cache-Control', 'pragma: no-cache')
+ elif not isinstance(response, Response):
response = make_response(response)
- response.headers['etag'] = etag
+ response.headers['etag'] = etag
return response
return wrapper
diff --git a/pyaggr3g470r/models/__init__.py b/pyaggr3g470r/models/__init__.py
index ba52b0de..b578094c 100644
--- a/pyaggr3g470r/models/__init__.py
+++ b/pyaggr3g470r/models/__init__.py
@@ -30,8 +30,9 @@ from .feed import Feed
from .role import Role
from .user import User
from .article import Article
+from .icon import Icon
-__all__ = ['Feed', 'Role', 'User', 'Article']
+__all__ = ['Feed', 'Role', 'User', 'Article', 'Icon']
import os
diff --git a/pyaggr3g470r/models/feed.py b/pyaggr3g470r/models/feed.py
index 75e55df1..07b0fc99 100644
--- a/pyaggr3g470r/models/feed.py
+++ b/pyaggr3g470r/models/feed.py
@@ -35,7 +35,7 @@ class Feed(db.Model):
"""
Represent a feed.
"""
- id = db.Column(db.Integer, primary_key=True)
+ id = db.Column(db.Integer(), primary_key=True)
title = db.Column(db.String(), default="")
description = db.Column(db.String(), default="FR")
link = db.Column(db.String())
@@ -43,7 +43,6 @@ class Feed(db.Model):
enabled = db.Column(db.Boolean(), default=True)
created_date = db.Column(db.DateTime(), default=datetime.now)
filters = db.Column(db.PickleType, default=[])
- icon = db.Column(db.String(), default="")
# cache handling
etag = db.Column(db.String(), default="")
@@ -55,7 +54,8 @@ class Feed(db.Model):
error_count = db.Column(db.Integer(), default=0)
# relationship
- user_id = db.Column(db.Integer, db.ForeignKey('user.id'))
+ icon_url = db.Column(db.String(), db.ForeignKey('icon.url'), default=None)
+ user_id = db.Column(db.Integer(), db.ForeignKey('user.id'))
articles = db.relationship('Article', backref='source', lazy='dynamic',
cascade='all,delete-orphan',
order_by=desc("Article.date"))
@@ -71,7 +71,7 @@ class Feed(db.Model):
"link": self.link,
"site_link": self.site_link,
"etag": self.etag,
- "icon": self.icon,
+ "icon_url": self.icon_url,
"error_count": self.error_count,
"last_modified": self.last_modified,
"last_retrieved": self.last_retrieved}
diff --git a/pyaggr3g470r/models/icon.py b/pyaggr3g470r/models/icon.py
new file mode 100644
index 00000000..22ef1164
--- /dev/null
+++ b/pyaggr3g470r/models/icon.py
@@ -0,0 +1,7 @@
+from bootstrap import db
+
+
+class Icon(db.Model):
+ url = db.Column(db.String(), primary_key=True)
+ content = db.Column(db.String(), default=None)
+ mimetype = db.Column(db.String(), default="application/image")
diff --git a/pyaggr3g470r/templates/feeds.html b/pyaggr3g470r/templates/feeds.html
index 20e0cccb..82af2411 100644
--- a/pyaggr3g470r/templates/feeds.html
+++ b/pyaggr3g470r/templates/feeds.html
@@ -29,7 +29,7 @@
{% endif %}
</td>
<td><a href="{{ url_for("feed.feed", feed_id=feed.id) }}" {% if feed.description %}title="{{ feed.description }}"{% endif %}>
- {% if feed.icon %}<img src="{{ url_for('feed.icon', feed_id=feed.id) }}" width="16px" />{% endif %}
+ {% if feed.icon_url %}<img src="{{ url_for('icon.icon', url=feed.icon_url) }}" width="16px" />{% endif %}
{{ feed.title }}
</a></td>
<td><a href="{{ feed.site_link }}">{{ feed.site_link }}</a></td>
diff --git a/pyaggr3g470r/templates/home.html b/pyaggr3g470r/templates/home.html
index 1577bd32..86d96e94 100644
--- a/pyaggr3g470r/templates/home.html
+++ b/pyaggr3g470r/templates/home.html
@@ -107,8 +107,8 @@
</td>
<td>
<a class="open-article" href="{{ url_for("article.redirect_to_article", article_id=article.id)}}" target="_blank" title="{{article.link}}" alt="{{article.link}}">
- {% if article.source.icon %}
- <img src="{{ url_for('feed.icon', feed_id=article.feed_id) }}" width="16px" />
+ {% if article.source.icon_url %}
+ <img src="{{ url_for('icon.icon', url=article.source.icon_url) }}" width="16px" />
{% else %}
<span class="glyphicon glyphicon-ban-circle" title='{{_("No icon found for this feed")}}' alt='{{_("No icon found for this feed")}}'></span>
{% endif %}
diff --git a/pyaggr3g470r/views/__init__.py b/pyaggr3g470r/views/__init__.py
index 029dcb7d..36d382bd 100644
--- a/pyaggr3g470r/views/__init__.py
+++ b/pyaggr3g470r/views/__init__.py
@@ -3,3 +3,4 @@ from .api import *
from .article import article_bp, articles_bp
from .feed import feed_bp, feeds_bp
+from .icon import icon_bp
diff --git a/pyaggr3g470r/views/api/feed.py b/pyaggr3g470r/views/api/feed.py
index ae2cd735..7d8cdf38 100644
--- a/pyaggr3g470r/views/api/feed.py
+++ b/pyaggr3g470r/views/api/feed.py
@@ -3,7 +3,6 @@
from flask import g
-import conf
from pyaggr3g470r.controllers.feed import (FeedController,
DEFAULT_MAX_ERROR,
DEFAULT_LIMIT,
@@ -21,7 +20,7 @@ FEED_ATTRS = {'title': {'type': str},
'site_link': {'type': str},
'enabled': {'type': bool, 'default': True},
'etag': {'type': str, 'default': ''},
- 'icon': {'type': str, 'default': ''},
+ 'icon_url': {'type': str, 'default': ''},
'last_modified': {'type': str},
'last_retrieved': {'type': str},
'last_error': {'type': str},
diff --git a/pyaggr3g470r/views/feed.py b/pyaggr3g470r/views/feed.py
index 38c7869c..afb51903 100644
--- a/pyaggr3g470r/views/feed.py
+++ b/pyaggr3g470r/views/feed.py
@@ -200,20 +200,3 @@ def process_form(feed_id=None):
flash(gettext("Downloading articles for the new feed..."), 'info')
return redirect(url_for('feed.form', feed_id=new_feed.id))
-
-
-@feed_bp.route('/icon/<int:feed_id>', methods=['GET'])
-@login_required
-def icon(feed_id):
- icon = FeedController(None if g.user.is_admin() else g.user.id)\
- .get(id=feed_id).icon
- etag = md5(icon.encode('utf8')).hexdigest()
- headers = {'Cache-Control': 'max-age=86400', 'etag': etag}
- if request.headers.get('if-none-match') == etag:
- return Response(status=304, headers=headers)
- if '\n' in icon:
- content_type, *_, icon = icon.split()
- headers['content-type'] = content_type
- else:
- headers['content-type'] = 'application/image'
- return Response(base64.b64decode(icon), headers=headers)
diff --git a/pyaggr3g470r/views/icon.py b/pyaggr3g470r/views/icon.py
new file mode 100644
index 00000000..2f51304a
--- /dev/null
+++ b/pyaggr3g470r/views/icon.py
@@ -0,0 +1,14 @@
+import base64
+from flask import Blueprint, Response, request
+from pyaggr3g470r.controllers import IconController
+from pyaggr3g470r.lib.view_utils import etag_match
+
+icon_bp = Blueprint('icon', __name__, url_prefix='/icon')
+
+@icon_bp.route('/', methods=['GET'])
+@etag_match
+def icon():
+ icon = IconController().get(url=request.args['url'])
+ headers = {'Cache-Control': 'max-age=86400',
+ 'Content-Type': icon.mimetype}
+ return Response(base64.b64decode(icon.content), headers=headers)
diff --git a/runserver.py b/runserver.py
index 5f20ddd4..ccd8bc60 100755
--- a/runserver.py
+++ b/runserver.py
@@ -51,6 +51,7 @@ with application.app_context():
application.register_blueprint(views.article_bp)
application.register_blueprint(views.feeds_bp)
application.register_blueprint(views.feed_bp)
+ application.register_blueprint(views.icon_bp)
if __name__ == '__main__':
diff --git a/scripts/probes.py b/scripts/probes.py
index bfad4e6e..4c632184 100644
--- a/scripts/probes.py
+++ b/scripts/probes.py
@@ -45,8 +45,10 @@ class FeedProbe(AbstractMuninPlugin):
def execute(self):
delta = datetime.now() - timedelta(minutes=LATE_AFTER + FETCH_RATE + 1)
+ total = FeedController().read().count()
- print("feeds.value %d" % len(FeedController().list_late(delta)))
+ print("feeds.value %d"
+ % len(FeedController().list_late(delta, limit=total)))
print("feeds_total.value %d" % FeedController().read().count())
bgstack15