aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/crawler.py
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2015-02-11 23:27:26 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2015-02-11 23:27:26 +0100
commit41d6d9726b9526d633fde816fd012491c30584f6 (patch)
tree639b690d161694f0d77e419ffd1c69efc39ca27d /pyaggr3g470r/crawler.py
parentIn the case it is not possible to resolve the URL of an article we just ignor... (diff)
downloadnewspipe-41d6d9726b9526d633fde816fd012491c30584f6.tar.gz
newspipe-41d6d9726b9526d633fde816fd012491c30584f6.tar.bz2
newspipe-41d6d9726b9526d633fde816fd012491c30584f6.zip
Some minor improvements concerning the parsing of the article publication date.
Diffstat (limited to 'pyaggr3g470r/crawler.py')
-rw-r--r--pyaggr3g470r/crawler.py19
1 files changed, 12 insertions, 7 deletions
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py
index 74e0a71c..119a40f5 100644
--- a/pyaggr3g470r/crawler.py
+++ b/pyaggr3g470r/crawler.py
@@ -128,6 +128,7 @@ def parse_feed(user, feed):
logger.error("Problem when sanitizing the content of the article %s (%s)",
article_title, nice_url)
+ # Get the date of publication of the article
post_date = None
for date_key in ('published_parsed', 'published',
'updated_parsed', 'updated'):
@@ -146,6 +147,8 @@ def parse_feed(user, feed):
break
except:
pass
+ finally:
+ post_date = datetime.now(dateutil.tz.tzlocal())
# create the models.Article object and append it to the list of articles
article = Article(link=nice_url, title=article_title,
@@ -204,18 +207,20 @@ def retrieve_feed(user, feed_id=None):
if feed_id is not None:
feeds = [feed for feed in feeds if feed.id == feed_id]
- if feeds != []:
- # 2 - Fetch the feeds.
- loop = asyncio.get_event_loop()
- f = asyncio.wait([init_process(user, feed) for feed in feeds])
- loop.run_until_complete(f)
+ if feeds == []:
+ return
+
+ # 2 - Fetch the feeds.
+ loop = asyncio.get_event_loop()
+ f = asyncio.wait([init_process(user, feed) for feed in feeds])
+ loop.run_until_complete(f)
"""
- # 4 - Indexation
+ # 3 - Indexation
if not conf.ON_HEROKU:
self.index(new_articles)
- # 5 - Mail notification
+ # 4 - Mail notification
if not conf.ON_HEROKU and conf.NOTIFICATION_ENABLED:
self.mail_notification(new_articles)
"""
bgstack15