From 80834199a4177db0f19263bfb76b0e87b102caf4 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Mon, 30 Mar 2020 22:45:33 +0200 Subject: wip --- newspipe/crawler/default_crawler.py | 5 ++++- newspipe/lib/article_utils.py | 4 ++-- newspipe/lib/feed_utils.py | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/newspipe/crawler/default_crawler.py b/newspipe/crawler/default_crawler.py index 42b1450f..a76eca9c 100644 --- a/newspipe/crawler/default_crawler.py +++ b/newspipe/crawler/default_crawler.py @@ -103,7 +103,10 @@ async def parse_feed(user, feed): if feed.title and "title" in up_feed: # do not override the title set by the user del up_feed["title"] - FeedController().update({"id": feed.id}, up_feed) + try: + FeedController().update({"id": feed.id}, up_feed) + except: + logger.exception("error when updating feed: {}".format(feed.link)) return articles diff --git a/newspipe/lib/article_utils.py b/newspipe/lib/article_utils.py index 7e1b2a36..d343f0a1 100644 --- a/newspipe/lib/article_utils.py +++ b/newspipe/lib/article_utils.py @@ -49,9 +49,9 @@ async def construct_article(entry, feed, fields=None, fetch=True): timezone.utc ) except ParserError: - logger.exception("Error when parsing date {}".format(entry[date_key])) + logger.exception("Error when parsing date: {}".format(entry[date_key])) except Exception as e: - logger.exception(e) + pass else: break push_in_article("content", get_article_content(entry)) diff --git a/newspipe/lib/feed_utils.py b/newspipe/lib/feed_utils.py index 70ded817..0de78580 100644 --- a/newspipe/lib/feed_utils.py +++ b/newspipe/lib/feed_utils.py @@ -42,6 +42,7 @@ def escape_keys(*keys): def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): requests_kwargs = { "headers": {"User-Agent": application.config["CRAWLER_USER_AGENT"]}, + "timeout": application.config["CRAWLER_TIMEOUT"], "verify": False, } if url is None and fp_parsed is not None: @@ -87,7 +88,9 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): try: response = requests.get(feed["site_link"], **requests_kwargs) - except requests.exceptions.InvalidSchema as e: + except requests.exceptions.InvalidSchema: + return feed + except requests.exceptions.ConnectionError: return feed except: logger.exception("failed to retrieve %r", feed["site_link"]) -- cgit