aboutsummaryrefslogtreecommitdiff
path: root/src/web/crawler.py
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2016-01-04 22:39:48 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2016-01-04 22:39:48 +0100
commit44edd51e211ab4c0e192a41423b1e57474754198 (patch)
tree125fc2f3223781bec30ea82116fdfa13984784a9 /src/web/crawler.py
parentAdd the possibility to mark as read articles older than 5 days. (diff)
downloadnewspipe-44edd51e211ab4c0e192a41423b1e57474754198.tar.gz
newspipe-44edd51e211ab4c0e192a41423b1e57474754198.tar.bz2
newspipe-44edd51e211ab4c0e192a41423b1e57474754198.zip
Minor improvements to the crawler (logging of errors).
Diffstat (limited to 'src/web/crawler.py')
-rw-r--r--src/web/crawler.py32
1 files changed, 18 insertions, 14 deletions
diff --git a/src/web/crawler.py b/src/web/crawler.py
index 47ce97ce..ad27fd4b 100644
--- a/src/web/crawler.py
+++ b/src/web/crawler.py
@@ -70,38 +70,42 @@ async def get(*args, **kwargs):
async def parse_feed(user, feed):
"""
Fetch a feed.
+ Update the feed and return the articles.
"""
- a_feed = None
+ parsed_feed = None
+ up_feed = {}
+ articles = []
with (await sem):
try:
- a_feed = await get(feed.link)
+ parsed_feed = await get(feed.link)
except Exception as e:
- feed.last_error = str(e)
+ up_feed['last_error'] = str(e)
+ up_feed['error_count'] = feed.error_count + 1
finally:
- if a_feed is None:
- feed.error_count += 1
- db.session.commit()
+ up_feed['last_retrieved'] = datetime.now(dateutil.tz.tzlocal())
+ if parsed_feed is None:
+ FeedController().update({'id': feed.id}, up_feed)
return
- up_feed = {}
- if a_feed['bozo'] == 1:
- up_feed['last_error'] = str(a_feed['bozo_exception'])
+ if parsed_feed['bozo'] == 1:
+ up_feed['last_error'] = str(parsed_feed['bozo_exception'])
up_feed['error_count'] = feed.error_count + 1
- db.session.commit()
- if a_feed['entries'] == []:
+ FeedController().update({'id': feed.id}, up_feed)
return
+ if parsed_feed['entries'] != []:
+ articles = parsed_feed['entries']
- up_feed['last_retrieved'] = datetime.now(dateutil.tz.tzlocal())
up_feed['error_count'] = 0
up_feed['last_error'] = ""
# Feed informations
- up_feed.update(construct_feed_from(feed.link, a_feed))
+ construct_feed_from(feed.link, parsed_feed).update(up_feed)
if feed.title and 'title' in up_feed:
+ # do not override the title set by the user
del up_feed['title']
FeedController().update({'id': feed.id}, up_feed)
- return a_feed['entries']
+ return articles
async def insert_database(user, feed):
bgstack15