diff options
author | François Schmidts <francois.schmidts@gmail.com> | 2015-07-07 11:21:51 +0200 |
---|---|---|
committer | François Schmidts <francois.schmidts@gmail.com> | 2015-07-07 11:49:25 +0200 |
commit | dba5533af05a63cd2cb8ad8bdaf62c38b19ea71b (patch) | |
tree | 7469c1ae181cc13993f8f936ea33b3e974c0fe7a /pyaggr3g470r/lib | |
parent | protecting pyagg against empty feed (diff) | |
download | newspipe-dba5533af05a63cd2cb8ad8bdaf62c38b19ea71b.tar.gz newspipe-dba5533af05a63cd2cb8ad8bdaf62c38b19ea71b.tar.bz2 newspipe-dba5533af05a63cd2cb8ad8bdaf62c38b19ea71b.zip |
making the crawler getting the feed with high traffic earlier
Diffstat (limited to 'pyaggr3g470r/lib')
-rw-r--r-- | pyaggr3g470r/lib/crawler.py | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index de557e45..e5998776 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -17,6 +17,7 @@ import conf import json import logging import feedparser +from datetime import datetime, timedelta from functools import wraps from time import strftime, gmtime from concurrent.futures import ThreadPoolExecutor @@ -118,7 +119,9 @@ class PyAggUpdater(AbstractCrawler): results = response.result().json() logger.debug('%r %r - %d entries were not matched and will be created', self.feed['id'], self.feed['title'], len(results)) + article_created = False for id_to_create in results: + article_created = True entry = construct_article( self.entries[tuple(sorted(id_to_create.items()))], self.feed) @@ -144,6 +147,10 @@ class PyAggUpdater(AbstractCrawler): if not self.feed.get('title'): up_feed['title'] = fresh_feed.get('title', '') up_feed['user_id'] = self.feed['user_id'] + # re-getting that feed earlier since new entries appeared + if article_created: + up_feed['last_retrieved'] \ + = (datetime.now() - timedelta(minutes=45)).isoformat() logger.info('%r %r - pushing feed attrs %r', self.feed['id'], self.feed['title'], |