diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2020-04-02 22:35:43 +0200 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2020-04-02 22:35:43 +0200 |
commit | 0b6ee9a1c44c802a63e790f9fd9602133b121ce0 (patch) | |
tree | 739bac7039b39cd84c2593c08c5e8470f988d999 | |
parent | Improved the feed creation form. (diff) | |
parent | wip (diff) | |
download | newspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.tar.gz newspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.tar.bz2 newspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.zip |
Merge branch 'master' of git.sr.ht:~cedric/newspipe
-rw-r--r-- | newspipe/controllers/icon.py | 20 | ||||
-rw-r--r-- | newspipe/crawler/default_crawler.py | 7 | ||||
-rw-r--r-- | newspipe/lib/article_utils.py | 5 | ||||
-rw-r--r-- | newspipe/lib/feed_utils.py | 5 |
4 files changed, 25 insertions, 12 deletions
diff --git a/newspipe/controllers/icon.py b/newspipe/controllers/icon.py index d5dd7fe6..b0fad5ac 100644 --- a/newspipe/controllers/icon.py +++ b/newspipe/controllers/icon.py @@ -2,6 +2,7 @@ import base64 import requests +from newspipe.lib.utils import newspipe_get from newspipe.models import Icon from .abstract import AbstractController @@ -13,14 +14,17 @@ class IconController(AbstractController): def _build_from_url(self, attrs): if "url" in attrs and "content" not in attrs: - resp = requests.get(attrs["url"], verify=False) - attrs.update( - { - "url": resp.url, - "mimetype": resp.headers.get("content-type", None), - "content": base64.b64encode(resp.content).decode("utf8"), - } - ) + try: + resp = newspipe_get(attrs["url"], timeout=5) + attrs.update( + { + "url": resp.url, + "mimetype": resp.headers.get("content-type", None), + "content": base64.b64encode(resp.content).decode("utf8"), + } + ) + except requests.exceptions.ConnectionError: + pass return attrs def create(self, **attrs): diff --git a/newspipe/crawler/default_crawler.py b/newspipe/crawler/default_crawler.py index 0584fb90..a76eca9c 100644 --- a/newspipe/crawler/default_crawler.py +++ b/newspipe/crawler/default_crawler.py @@ -103,7 +103,10 @@ async def parse_feed(user, feed): if feed.title and "title" in up_feed: # do not override the title set by the user del up_feed["title"] - FeedController().update({"id": feed.id}, up_feed) + try: + FeedController().update({"id": feed.id}, up_feed) + except: + logger.exception("error when updating feed: {}".format(feed.link)) return articles @@ -180,7 +183,7 @@ async def retrieve_feed(queue, users, feed_id=None): continue if None is feed_id or (feed_id and feed_id == feed.id): feeds.append(feed) - logger.info(feeds) + if feeds == []: logger.info("No feed to retrieve for {}".format(user.nickname)) diff --git a/newspipe/lib/article_utils.py b/newspipe/lib/article_utils.py index 0490d4d7..d343f0a1 100644 --- a/newspipe/lib/article_utils.py +++ b/newspipe/lib/article_utils.py @@ -2,6 +2,7 @@ import html import logging import re from datetime import datetime, timezone +from dateutil.parser._parser import ParserError from enum import Enum from urllib.parse import SplitResult, urlsplit, urlunsplit @@ -47,8 +48,10 @@ async def construct_article(entry, feed, fields=None, fetch=True): article["date"] = dateutil.parser.parse(entry[date_key]).astimezone( timezone.utc ) + except ParserError: + logger.exception("Error when parsing date: {}".format(entry[date_key])) except Exception as e: - logger.exception(e) + pass else: break push_in_article("content", get_article_content(entry)) diff --git a/newspipe/lib/feed_utils.py b/newspipe/lib/feed_utils.py index 70ded817..0de78580 100644 --- a/newspipe/lib/feed_utils.py +++ b/newspipe/lib/feed_utils.py @@ -42,6 +42,7 @@ def escape_keys(*keys): def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): requests_kwargs = { "headers": {"User-Agent": application.config["CRAWLER_USER_AGENT"]}, + "timeout": application.config["CRAWLER_TIMEOUT"], "verify": False, } if url is None and fp_parsed is not None: @@ -87,7 +88,9 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): try: response = requests.get(feed["site_link"], **requests_kwargs) - except requests.exceptions.InvalidSchema as e: + except requests.exceptions.InvalidSchema: + return feed + except requests.exceptions.ConnectionError: return feed except: logger.exception("failed to retrieve %r", feed["site_link"]) |