diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2016-01-04 22:39:48 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2016-01-04 22:39:48 +0100 |
commit | 44edd51e211ab4c0e192a41423b1e57474754198 (patch) | |
tree | 125fc2f3223781bec30ea82116fdfa13984784a9 | |
parent | Add the possibility to mark as read articles older than 5 days. (diff) | |
download | newspipe-44edd51e211ab4c0e192a41423b1e57474754198.tar.gz newspipe-44edd51e211ab4c0e192a41423b1e57474754198.tar.bz2 newspipe-44edd51e211ab4c0e192a41423b1e57474754198.zip |
Minor improvements to the crawler (logging of errors).
-rwxr-xr-x | src/manager.py | 2 | ||||
-rw-r--r-- | src/web/crawler.py | 32 | ||||
-rw-r--r-- | src/web/templates/feed.html | 2 |
3 files changed, 20 insertions, 16 deletions
diff --git a/src/manager.py b/src/manager.py index 5aedbe18..6967176e 100755 --- a/src/manager.py +++ b/src/manager.py @@ -44,7 +44,7 @@ def fetch_asyncio(user_id, feed_id): from flask import g from web.models import User from web import crawler - users, feed_id = [], None + users = [] try: users = User.query.filter(User.id == int(user_id)).all() except: diff --git a/src/web/crawler.py b/src/web/crawler.py index 47ce97ce..ad27fd4b 100644 --- a/src/web/crawler.py +++ b/src/web/crawler.py @@ -70,38 +70,42 @@ async def get(*args, **kwargs): async def parse_feed(user, feed): """ Fetch a feed. + Update the feed and return the articles. """ - a_feed = None + parsed_feed = None + up_feed = {} + articles = [] with (await sem): try: - a_feed = await get(feed.link) + parsed_feed = await get(feed.link) except Exception as e: - feed.last_error = str(e) + up_feed['last_error'] = str(e) + up_feed['error_count'] = feed.error_count + 1 finally: - if a_feed is None: - feed.error_count += 1 - db.session.commit() + up_feed['last_retrieved'] = datetime.now(dateutil.tz.tzlocal()) + if parsed_feed is None: + FeedController().update({'id': feed.id}, up_feed) return - up_feed = {} - if a_feed['bozo'] == 1: - up_feed['last_error'] = str(a_feed['bozo_exception']) + if parsed_feed['bozo'] == 1: + up_feed['last_error'] = str(parsed_feed['bozo_exception']) up_feed['error_count'] = feed.error_count + 1 - db.session.commit() - if a_feed['entries'] == []: + FeedController().update({'id': feed.id}, up_feed) return + if parsed_feed['entries'] != []: + articles = parsed_feed['entries'] - up_feed['last_retrieved'] = datetime.now(dateutil.tz.tzlocal()) up_feed['error_count'] = 0 up_feed['last_error'] = "" # Feed informations - up_feed.update(construct_feed_from(feed.link, a_feed)) + construct_feed_from(feed.link, parsed_feed).update(up_feed) if feed.title and 'title' in up_feed: + # do not override the title set by the user del up_feed['title'] FeedController().update({'id': feed.id}, up_feed) - return a_feed['entries'] + return articles async def insert_database(user, feed): diff --git a/src/web/templates/feed.html b/src/web/templates/feed.html index c421a411..cce74b19 100644 --- a/src/web/templates/feed.html +++ b/src/web/templates/feed.html @@ -21,7 +21,7 @@ {{ _("Last download:") }} {{ feed.last_retrieved | datetime }}<br /> {% endif %} - {% if feed.error_count >= conf.DEFAULT_MAX_ERROR %} + {% if feed.error_count > conf.DEFAULT_MAX_ERROR %} <b>{{ _("That feed has encountered too much consecutive errors and won't be retrieved anymore.") }}</b><br /> {{ _("You can click <a href='%(reset_error_url)s'>here</a> to reset the error count and reactivate the feed.", reset_error_url=url_for("feed.reset_errors", feed_id=feed.id)) }} {% elif feed.error_count > 0 %} |