diff options
-rw-r--r-- | pyaggr3g470r/lib/crawler.py | 7 | ||||
-rw-r--r-- | pyaggr3g470r/lib/utils.py | 2 |
2 files changed, 5 insertions, 4 deletions
diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index e5998776..e4dc5955 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -125,7 +125,7 @@ class PyAggUpdater(AbstractCrawler): entry = construct_article( self.entries[tuple(sorted(id_to_create.items()))], self.feed) - logger.warn('%r %r - creating %r for %r - %r', self.feed['id'], + logger.info('%r %r - creating %r for %r - %r', self.feed['id'], self.feed['title'], entry['title'], entry['user_id'], id_to_create) self.query_pyagg('post', 'article', entry) @@ -152,11 +152,12 @@ class PyAggUpdater(AbstractCrawler): up_feed['last_retrieved'] \ = (datetime.now() - timedelta(minutes=45)).isoformat() - logger.info('%r %r - pushing feed attrs %r', + if any([up_feed[key] != self.feed.get(key) for key in up_feed]): + logger.warn('%r %r - pushing feed attrs %r', self.feed['id'], self.feed['title'], {key: "%s -> %s" % (up_feed[key], self.feed.get(key)) for key in up_feed if up_feed[key] != self.feed.get(key)}) - if any([up_feed[key] != self.feed.get(key) for key in up_feed]): + future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], up_feed) future.add_done_callback(self.get_counter_callback()) diff --git a/pyaggr3g470r/lib/utils.py b/pyaggr3g470r/lib/utils.py index a51b6c3e..b937b5a9 100644 --- a/pyaggr3g470r/lib/utils.py +++ b/pyaggr3g470r/lib/utils.py @@ -48,7 +48,7 @@ def try_get_b64icon(url, *splits): response = requests.get(rb_url, verify=False, timeout=10) # if html in content-type, we assume it's a fancy 404 page content_type = response.headers.get('content-type', '') - if response.ok and 'html' not in content_type: + if response.ok and 'html' not in content_type and response.content: return content_type + ( '\n%s' % base64.b64encode(response.content).decode('utf8')) return None |