aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/lib/crawler.py
diff options
context:
space:
mode:
authorFrançois Schmidts <francois.schmidts@gmail.com>2015-10-21 17:03:56 +0200
committerFrançois Schmidts <francois.schmidts@gmail.com>2015-10-21 17:03:56 +0200
commitd119bf7a5bc33e6419b0f9b2e06ede5ad8ab4704 (patch)
tree3a97b069f7f28d0680ccaf0513dde52a89dc7cc9 /pyaggr3g470r/lib/crawler.py
parentfixing warning SAWarning: Can't resolve label reference 'Feed.last_retrieved' (diff)
downloadnewspipe-d119bf7a5bc33e6419b0f9b2e06ede5ad8ab4704.tar.gz
newspipe-d119bf7a5bc33e6419b0f9b2e06ede5ad8ab4704.tar.bz2
newspipe-d119bf7a5bc33e6419b0f9b2e06ede5ad8ab4704.zip
limiting the feed editing request by removing the dull updating of etag and last modified of no meaning
Diffstat (limited to 'pyaggr3g470r/lib/crawler.py')
-rw-r--r--pyaggr3g470r/lib/crawler.py20
1 files changed, 12 insertions, 8 deletions
diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py
index 3f62792f..62d41494 100644
--- a/pyaggr3g470r/lib/crawler.py
+++ b/pyaggr3g470r/lib/crawler.py
@@ -121,14 +121,18 @@ class PyAggUpdater(AbstractCrawler):
up_feed['last_retrieved'] \
= (datetime.now() - timedelta(minutes=45)).isoformat()
- if any([up_feed[key] != self.feed.get(key) for key in up_feed]):
- logger.warn('%r %r - pushing feed attrs %r',
- self.feed['id'], self.feed['title'],
- {key: "%s -> %s" % (up_feed[key], self.feed.get(key))
- for key in up_feed if up_feed[key] != self.feed.get(key)})
-
- future = self.query_pyagg('put',
- 'feed/%d' % self.feed['id'], up_feed)
+ diff_keys = {key for key in up_feed
+ if up_feed[key] != self.feed.get(key)}
+ if not diff_keys:
+ return # no change in the feed, no update
+ if not article_created and diff_keys == {'last_modified', 'etag'}:
+ return # meaningless if no new article has been published
+ logger.info('%r %r - pushing feed attrs %r',
+ self.feed['id'], self.feed['title'],
+ {key: "%s -> %s" % (up_feed[key], self.feed.get(key))
+ for key in up_feed if up_feed[key] != self.feed.get(key)})
+
+ future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], up_feed)
class FeedCrawler(AbstractCrawler):
bgstack15