From 2d72f44a90a76fe7450e59fdfdf4d42f44b9cd96 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Tue, 8 Nov 2016 14:39:47 +0100 Subject: various improvements to the crawler (better use of coroutines, test if an article should be updated). tags are now retrieved for the k-means clustering (previously achived with the content of articles) --- src/web/controllers/abstract.py | 7 ++----- src/web/controllers/article.py | 29 ++++++----------------------- 2 files changed, 8 insertions(+), 28 deletions(-) (limited to 'src/web/controllers') diff --git a/src/web/controllers/abstract.py b/src/web/controllers/abstract.py index 58532660..3c91e08a 100644 --- a/src/web/controllers/abstract.py +++ b/src/web/controllers/abstract.py @@ -91,11 +91,8 @@ class AbstractController: obj = self._db_cls(**attrs) db.session.add(obj) - try: - db.session.commit() - except Exception as e: - db.session.rollback() - logger.exception(str(e)) + db.session.flush() + db.session.commit() return obj def read(self, **filters): diff --git a/src/web/controllers/article.py b/src/web/controllers/article.py index 02c8fc75..4607b225 100644 --- a/src/web/controllers/article.py +++ b/src/web/controllers/article.py @@ -6,6 +6,7 @@ from collections import Counter from bootstrap import db from .abstract import AbstractController +from web.lib.article_utils import process_filters from web.controllers import CategoryController, FeedController from web.models import Article @@ -43,29 +44,11 @@ class ArticleController(AbstractController): "no right on feed %r" % feed.id attrs['user_id'], attrs['category_id'] = feed.user_id, feed.category_id - # handling feed's filters - for filter_ in feed.filters or []: - match = False - if filter_.get('type') == 'regex': - match = re.match(filter_['pattern'], attrs.get('title', '')) - elif filter_.get('type') == 'simple match': - match = filter_['pattern'] in attrs.get('title', '') - take_action = match and filter_.get('action on') == 'match' \ - or not match and filter_.get('action on') == 'no match' - - if not take_action: - continue - - if filter_.get('action') == 'mark as read': - attrs['readed'] = True - logger.warn("article %s will be created as read", - attrs['link']) - elif filter_.get('action') == 'mark as favorite': - attrs['like'] = True - logger.warn("article %s will be created as liked", - attrs['link']) - - return super().create(**attrs) + skipped, read, liked = process_filters(feed.filters, attrs) + if skipped: + return None + article = super().create(**attrs) + return article def update(self, filters, attrs): user_id = attrs.get('user_id', self.user_id) -- cgit