From 44edd51e211ab4c0e192a41423b1e57474754198 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Mon, 4 Jan 2016 22:39:48 +0100 Subject: Minor improvements to the crawler (logging of errors). --- src/manager.py | 2 +- src/web/crawler.py | 32 ++++++++++++++++++-------------- src/web/templates/feed.html | 2 +- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/manager.py b/src/manager.py index 5aedbe18..6967176e 100755 --- a/src/manager.py +++ b/src/manager.py @@ -44,7 +44,7 @@ def fetch_asyncio(user_id, feed_id): from flask import g from web.models import User from web import crawler - users, feed_id = [], None + users = [] try: users = User.query.filter(User.id == int(user_id)).all() except: diff --git a/src/web/crawler.py b/src/web/crawler.py index 47ce97ce..ad27fd4b 100644 --- a/src/web/crawler.py +++ b/src/web/crawler.py @@ -70,38 +70,42 @@ async def get(*args, **kwargs): async def parse_feed(user, feed): """ Fetch a feed. + Update the feed and return the articles. """ - a_feed = None + parsed_feed = None + up_feed = {} + articles = [] with (await sem): try: - a_feed = await get(feed.link) + parsed_feed = await get(feed.link) except Exception as e: - feed.last_error = str(e) + up_feed['last_error'] = str(e) + up_feed['error_count'] = feed.error_count + 1 finally: - if a_feed is None: - feed.error_count += 1 - db.session.commit() + up_feed['last_retrieved'] = datetime.now(dateutil.tz.tzlocal()) + if parsed_feed is None: + FeedController().update({'id': feed.id}, up_feed) return - up_feed = {} - if a_feed['bozo'] == 1: - up_feed['last_error'] = str(a_feed['bozo_exception']) + if parsed_feed['bozo'] == 1: + up_feed['last_error'] = str(parsed_feed['bozo_exception']) up_feed['error_count'] = feed.error_count + 1 - db.session.commit() - if a_feed['entries'] == []: + FeedController().update({'id': feed.id}, up_feed) return + if parsed_feed['entries'] != []: + articles = parsed_feed['entries'] - up_feed['last_retrieved'] = datetime.now(dateutil.tz.tzlocal()) up_feed['error_count'] = 0 up_feed['last_error'] = "" # Feed informations - up_feed.update(construct_feed_from(feed.link, a_feed)) + construct_feed_from(feed.link, parsed_feed).update(up_feed) if feed.title and 'title' in up_feed: + # do not override the title set by the user del up_feed['title'] FeedController().update({'id': feed.id}, up_feed) - return a_feed['entries'] + return articles async def insert_database(user, feed): diff --git a/src/web/templates/feed.html b/src/web/templates/feed.html index c421a411..cce74b19 100644 --- a/src/web/templates/feed.html +++ b/src/web/templates/feed.html @@ -21,7 +21,7 @@ {{ _("Last download:") }} {{ feed.last_retrieved | datetime }}
{% endif %} - {% if feed.error_count >= conf.DEFAULT_MAX_ERROR %} + {% if feed.error_count > conf.DEFAULT_MAX_ERROR %} {{ _("That feed has encountered too much consecutive errors and won't be retrieved anymore.") }}
{{ _("You can click here to reset the error count and reactivate the feed.", reset_error_url=url_for("feed.reset_errors", feed_id=feed.id)) }} {% elif feed.error_count > 0 %} -- cgit