diff options
author | François Schmidts <francois.schmidts@gmail.com> | 2015-10-21 17:03:56 +0200 |
---|---|---|
committer | François Schmidts <francois.schmidts@gmail.com> | 2015-10-21 17:03:56 +0200 |
commit | d119bf7a5bc33e6419b0f9b2e06ede5ad8ab4704 (patch) | |
tree | 3a97b069f7f28d0680ccaf0513dde52a89dc7cc9 /pyaggr3g470r/lib/crawler.py | |
parent | fixing warning SAWarning: Can't resolve label reference 'Feed.last_retrieved' (diff) | |
download | newspipe-d119bf7a5bc33e6419b0f9b2e06ede5ad8ab4704.tar.gz newspipe-d119bf7a5bc33e6419b0f9b2e06ede5ad8ab4704.tar.bz2 newspipe-d119bf7a5bc33e6419b0f9b2e06ede5ad8ab4704.zip |
limiting the feed editing request by removing the dull updating of etag and last modified of no meaning
Diffstat (limited to 'pyaggr3g470r/lib/crawler.py')
-rw-r--r-- | pyaggr3g470r/lib/crawler.py | 20 |
1 files changed, 12 insertions, 8 deletions
diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index 3f62792f..62d41494 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -121,14 +121,18 @@ class PyAggUpdater(AbstractCrawler): up_feed['last_retrieved'] \ = (datetime.now() - timedelta(minutes=45)).isoformat() - if any([up_feed[key] != self.feed.get(key) for key in up_feed]): - logger.warn('%r %r - pushing feed attrs %r', - self.feed['id'], self.feed['title'], - {key: "%s -> %s" % (up_feed[key], self.feed.get(key)) - for key in up_feed if up_feed[key] != self.feed.get(key)}) - - future = self.query_pyagg('put', - 'feed/%d' % self.feed['id'], up_feed) + diff_keys = {key for key in up_feed + if up_feed[key] != self.feed.get(key)} + if not diff_keys: + return # no change in the feed, no update + if not article_created and diff_keys == {'last_modified', 'etag'}: + return # meaningless if no new article has been published + logger.info('%r %r - pushing feed attrs %r', + self.feed['id'], self.feed['title'], + {key: "%s -> %s" % (up_feed[key], self.feed.get(key)) + for key in up_feed if up_feed[key] != self.feed.get(key)}) + + future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], up_feed) class FeedCrawler(AbstractCrawler): |