aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/lib/crawler.py
diff options
context:
space:
mode:
authorCédric Bonhomme <kimble.mandel+bitbucket@gmail.com>2015-07-03 15:27:36 +0200
committerCédric Bonhomme <kimble.mandel+bitbucket@gmail.com>2015-07-03 15:27:36 +0200
commit7f3c9790fcbe513436cb515b31b4ad81f2d10336 (patch)
treeb621c1e4cbd1698bfcf52695e3cc8b547cfd5e9e /pyaggr3g470r/lib/crawler.py
parentMinor improvements to the edit feed forms. (diff)
parentfixing bug on reset link for feeds and hidding feed title for eXtraSmall device (diff)
downloadnewspipe-7f3c9790fcbe513436cb515b31b4ad81f2d10336.tar.gz
newspipe-7f3c9790fcbe513436cb515b31b4ad81f2d10336.tar.bz2
newspipe-7f3c9790fcbe513436cb515b31b4ad81f2d10336.zip
Merged in jaesivsm/pyaggr3g470r (pull request #15)
the icon feature
Diffstat (limited to 'pyaggr3g470r/lib/crawler.py')
-rw-r--r--pyaggr3g470r/lib/crawler.py33
1 files changed, 20 insertions, 13 deletions
diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py
index 324f0d8e..2ba5403a 100644
--- a/pyaggr3g470r/lib/crawler.py
+++ b/pyaggr3g470r/lib/crawler.py
@@ -24,7 +24,7 @@ from datetime import datetime
from time import strftime, gmtime
from concurrent.futures import ThreadPoolExecutor
from requests_futures.sessions import FuturesSession
-from pyaggr3g470r.lib.utils import default_handler
+from pyaggr3g470r.lib.utils import default_handler, construct_feed_from
logger = logging.getLogger(__name__)
logging.captureWarnings(True)
@@ -136,7 +136,7 @@ class PyAggUpdater(AbstractCrawler):
self.feed = feed
self.entries = entries
self.headers = headers
- self.parsed_feed = parsed_feed.get('feed', {})
+ self.parsed_feed = parsed_feed
super(PyAggUpdater, self).__init__(auth)
def to_article(self, entry):
@@ -188,19 +188,26 @@ class PyAggUpdater(AbstractCrawler):
self.headers.get('etag', ''),
self.headers.get('last-modified', ''))
- dico = {'error_count': 0, 'last_error': None,
- 'etag': self.headers.get('etag', ''),
- 'last_modified': self.headers.get('last-modified',
- strftime('%a, %d %b %Y %X %Z', gmtime())),
- 'site_link': self.parsed_feed.get('link')}
+ up_feed = {'error_count': 0, 'last_error': None,
+ 'etag': self.headers.get('etag', ''),
+ 'last_modified': self.headers.get('last-modified',
+ strftime('%a, %d %b %Y %X %Z', gmtime()))}
+ fresh_feed = construct_feed_from(url=self.feed['link'],
+ fp_parsed=self.parsed_feed,
+ feed=self.feed)
+ for key in ('description', 'site_link', 'icon'):
+ if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key):
+ up_feed[key] = fresh_feed[key]
if not self.feed.get('title'):
- dico['title'] = self.parsed_feed.get('title', '')
+ up_feed['title'] = fresh_feed.get('title', '')
+
logger.info('%r %r - pushing feed attrs %r',
self.feed['id'], self.feed['title'],
- {key: "%s -> %s" % (dico[key], self.feed.get(key))
- for key in dico if dico[key] != self.feed.get(key)})
- if any([dico[key] != self.feed.get(key) for key in dico]):
- future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], dico)
+ {key: "%s -> %s" % (up_feed[key], self.feed.get(key))
+ for key in up_feed if up_feed[key] != self.feed.get(key)})
+ if any([up_feed[key] != self.feed.get(key) for key in up_feed]):
+ future = self.query_pyagg('put',
+ 'feed/%d' % self.feed['id'], up_feed)
future.add_done_callback(self.get_counter_callback())
@@ -265,7 +272,7 @@ class FeedCrawler(AbstractCrawler):
self.feed['id'], self.feed['title'])
ids, entries = [], {}
- parsed_response = feedparser.parse(response.text)
+ parsed_response = feedparser.parse(response.content)
for entry in parsed_response['entries']:
entry_ids = extract_id(entry)
entry_ids['feed_id'] = self.feed['id']
bgstack15