aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/lib/crawler.py
diff options
context:
space:
mode:
authorFrançois Schmidts <francois.schmidts@gmail.com>2015-03-06 11:07:43 +0100
committerFrançois Schmidts <francois.schmidts@gmail.com>2015-03-06 11:07:43 +0100
commit822e59f043ba7b12962c5e65f59f2fd33a339f54 (patch)
treee92c92daa8e81d7b52640d301dc615d9911ce902 /pyaggr3g470r/lib/crawler.py
parentcorrecting wait counter and reactivating last_retrieved (diff)
downloadnewspipe-822e59f043ba7b12962c5e65f59f2fd33a339f54.tar.gz
newspipe-822e59f043ba7b12962c5e65f59f2fd33a339f54.tar.bz2
newspipe-822e59f043ba7b12962c5e65f59f2fd33a339f54.zip
better crawling crontrol
Diffstat (limited to 'pyaggr3g470r/lib/crawler.py')
-rw-r--r--pyaggr3g470r/lib/crawler.py12
1 files changed, 8 insertions, 4 deletions
diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py
index 97f14363..8e61b7cf 100644
--- a/pyaggr3g470r/lib/crawler.py
+++ b/pyaggr3g470r/lib/crawler.py
@@ -111,16 +111,16 @@ class AbstractCrawler:
"See count_on_me, that method will just wait for the counter to be 0"
time.sleep(1)
while cls.__counter__:
- print('running %d' % cls.__counter__)
time.sleep(1)
class PyAggUpdater(AbstractCrawler):
- def __init__(self, feed, entries, headers, auth):
+ def __init__(self, feed, entries, headers, parsed_feed, auth):
self.feed = feed
self.entries = entries
self.headers = headers
+ self.parsed_feed = parsed_feed.get('feed', {})
super(PyAggUpdater, self).__init__(auth)
def to_article(self, entry):
@@ -171,7 +171,10 @@ class PyAggUpdater(AbstractCrawler):
dico = {'error_count': 0, 'last_error': '',
'etag': self.headers.get('etag', ''),
- 'last_modified': self.headers.get('last-modified', '')}
+ 'last_modified': self.headers.get('last-modified', ''),
+ 'site_link': self.parsed_feed.get('link')}
+ if not self.feed.get('title'):
+ dico['title'] = self.parsed_feed.get('title', '')
if any([dico[key] == self.feed.get(key) for key in dico]):
future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], dico)
future.add_done_callback(self.get_counter_callback())
@@ -229,7 +232,8 @@ class FeedCrawler(AbstractCrawler):
logger.debug('%r %r - found %d entries %r',
self.feed['id'], self.feed['title'], len(ids), ids)
future = self.query_pyagg('get', 'articles/challenge', {'ids': ids})
- updater = PyAggUpdater(self.feed, entries, response.headers, self.auth)
+ updater = PyAggUpdater(self.feed, entries, response.headers,
+ parsed_response, self.auth)
future.add_done_callback(updater.callback)
bgstack15