aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2020-03-30 22:45:33 +0200
committerCédric Bonhomme <cedric@cedricbonhomme.org>2020-03-30 22:45:33 +0200
commit80834199a4177db0f19263bfb76b0e87b102caf4 (patch)
tree2e2e8d3f80b08fe85d4864670cbc72059d18ac3d
parentcatch ParseError form dateutil (diff)
downloadnewspipe-80834199a4177db0f19263bfb76b0e87b102caf4.tar.gz
newspipe-80834199a4177db0f19263bfb76b0e87b102caf4.tar.bz2
newspipe-80834199a4177db0f19263bfb76b0e87b102caf4.zip
wip
-rw-r--r--newspipe/crawler/default_crawler.py5
-rw-r--r--newspipe/lib/article_utils.py4
-rw-r--r--newspipe/lib/feed_utils.py5
3 files changed, 10 insertions, 4 deletions
diff --git a/newspipe/crawler/default_crawler.py b/newspipe/crawler/default_crawler.py
index 42b1450f..a76eca9c 100644
--- a/newspipe/crawler/default_crawler.py
+++ b/newspipe/crawler/default_crawler.py
@@ -103,7 +103,10 @@ async def parse_feed(user, feed):
if feed.title and "title" in up_feed:
# do not override the title set by the user
del up_feed["title"]
- FeedController().update({"id": feed.id}, up_feed)
+ try:
+ FeedController().update({"id": feed.id}, up_feed)
+ except:
+ logger.exception("error when updating feed: {}".format(feed.link))
return articles
diff --git a/newspipe/lib/article_utils.py b/newspipe/lib/article_utils.py
index 7e1b2a36..d343f0a1 100644
--- a/newspipe/lib/article_utils.py
+++ b/newspipe/lib/article_utils.py
@@ -49,9 +49,9 @@ async def construct_article(entry, feed, fields=None, fetch=True):
timezone.utc
)
except ParserError:
- logger.exception("Error when parsing date {}".format(entry[date_key]))
+ logger.exception("Error when parsing date: {}".format(entry[date_key]))
except Exception as e:
- logger.exception(e)
+ pass
else:
break
push_in_article("content", get_article_content(entry))
diff --git a/newspipe/lib/feed_utils.py b/newspipe/lib/feed_utils.py
index 70ded817..0de78580 100644
--- a/newspipe/lib/feed_utils.py
+++ b/newspipe/lib/feed_utils.py
@@ -42,6 +42,7 @@ def escape_keys(*keys):
def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
requests_kwargs = {
"headers": {"User-Agent": application.config["CRAWLER_USER_AGENT"]},
+ "timeout": application.config["CRAWLER_TIMEOUT"],
"verify": False,
}
if url is None and fp_parsed is not None:
@@ -87,7 +88,9 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
try:
response = requests.get(feed["site_link"], **requests_kwargs)
- except requests.exceptions.InvalidSchema as e:
+ except requests.exceptions.InvalidSchema:
+ return feed
+ except requests.exceptions.ConnectionError:
return feed
except:
logger.exception("failed to retrieve %r", feed["site_link"])
bgstack15