From 5572851eca3b2f1bc56aed7232284acc436d2f49 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Sun, 1 Mar 2015 03:20:12 +0100 Subject: new crawler with cache control and error handling --- pyaggr3g470r/controllers/abstract.py | 15 ++++++++++++--- pyaggr3g470r/controllers/article.py | 11 +++++++++-- pyaggr3g470r/controllers/feed.py | 17 ++++++----------- 3 files changed, 27 insertions(+), 16 deletions(-) (limited to 'pyaggr3g470r/controllers') diff --git a/pyaggr3g470r/controllers/abstract.py b/pyaggr3g470r/controllers/abstract.py index 6fe45461..ebb73e30 100644 --- a/pyaggr3g470r/controllers/abstract.py +++ b/pyaggr3g470r/controllers/abstract.py @@ -1,5 +1,9 @@ +import logging from bootstrap import db -from pyaggr3g470r.lib.exceptions import Forbidden, NotFound +from sqlalchemy import update +from werkzeug.exceptions import Forbidden, NotFound + +logger = logging.getLogger(__name__) class AbstractController(object): @@ -9,7 +13,7 @@ class AbstractController(object): def __init__(self, user_id): self.user_id = user_id - def _get(self, **filters): + def _to_filters(self, **filters): if self.user_id: filters[self._user_id_key] = self.user_id db_filters = set() @@ -28,7 +32,10 @@ class AbstractController(object): db_filters.add(getattr(self._db_cls, key[:-4]).in_(value)) else: db_filters.add(getattr(self._db_cls, key) == value) - return self._db_cls.query.filter(*db_filters) + return db_filters + + def _get(self, **filters): + return self._db_cls.query.filter(*self._to_filters(**filters)) def get(self, **filters): obj = self._get(**filters).first() @@ -41,7 +48,9 @@ class AbstractController(object): return obj def create(self, **attrs): + attrs['user_id'] = self.user_id obj = self._db_cls(**attrs) + db.session.add(obj) db.session.commit() return obj diff --git a/pyaggr3g470r/controllers/article.py b/pyaggr3g470r/controllers/article.py index 0de223ee..46ca0988 100644 --- a/pyaggr3g470r/controllers/article.py +++ b/pyaggr3g470r/controllers/article.py @@ -7,9 +7,9 @@ class ArticleController(AbstractController): _db_cls = Article def get(self, **filters): - article = super(ArticleController, self).read(**filters) + article = super(ArticleController, self).get(**filters) if not article.readed: - self.update(article.id, readed=True) + self.update({'id': article.id}, {'readed': True}) return article def delete(self, obj_id): @@ -18,3 +18,10 @@ class ArticleController(AbstractController): import pyaggr3g470r.search as fastsearch fastsearch.delete_article(self.user_id, obj.feed_id, obj_id) return obj + + def challenge(self, ids): + """Will return each id that wasn't found in the database.""" + for id_ in ids: + if self.read(**id_).first(): + continue + yield id_ diff --git a/pyaggr3g470r/controllers/feed.py b/pyaggr3g470r/controllers/feed.py index 56cef997..286bea1e 100644 --- a/pyaggr3g470r/controllers/feed.py +++ b/pyaggr3g470r/controllers/feed.py @@ -13,17 +13,12 @@ class FeedController(AbstractController): from pyaggr3g470r.controllers import UserController now = datetime.now() user = UserController(self.user_id).get(id=self.user_id) - max_last_refresh = now - timedelta(minutes=user.refresh_rate or 60) + max_last = now - timedelta(minutes=user.refresh_rate or 60) feeds = [feed for feed in self.read(user_id=self.user_id, - error_count__le=max_error, - last_modified=max_last_refresh).limit(limit)] + error_count__le=max_error, enabled=True, + last_retreived__lt=max_last).limit(limit)] - self.update({'id__in': [feed.id for feed in feeds]}, - {'last_modified': now}) + if feeds: + self.update({'id__in': [feed.id for feed in feeds]}, + {'last_retreived': now}) return feeds - - def list_last_articles(self, feed_id, limit=50): - from pyaggr3g470r.controllers import ArticleController - return ArticleController(self.user_id)._get(feed_id=feed_id)\ - .order_by(ArticleController._db_cls.retrieved_date.desc())\ - .limit(limit) -- cgit