aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2020-04-02 22:35:43 +0200
committerCédric Bonhomme <cedric@cedricbonhomme.org>2020-04-02 22:35:43 +0200
commit0b6ee9a1c44c802a63e790f9fd9602133b121ce0 (patch)
tree739bac7039b39cd84c2593c08c5e8470f988d999
parentImproved the feed creation form. (diff)
parentwip (diff)
downloadnewspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.tar.gz
newspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.tar.bz2
newspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.zip
Merge branch 'master' of git.sr.ht:~cedric/newspipe
-rw-r--r--newspipe/controllers/icon.py20
-rw-r--r--newspipe/crawler/default_crawler.py7
-rw-r--r--newspipe/lib/article_utils.py5
-rw-r--r--newspipe/lib/feed_utils.py5
4 files changed, 25 insertions, 12 deletions
diff --git a/newspipe/controllers/icon.py b/newspipe/controllers/icon.py
index d5dd7fe6..b0fad5ac 100644
--- a/newspipe/controllers/icon.py
+++ b/newspipe/controllers/icon.py
@@ -2,6 +2,7 @@ import base64
import requests
+from newspipe.lib.utils import newspipe_get
from newspipe.models import Icon
from .abstract import AbstractController
@@ -13,14 +14,17 @@ class IconController(AbstractController):
def _build_from_url(self, attrs):
if "url" in attrs and "content" not in attrs:
- resp = requests.get(attrs["url"], verify=False)
- attrs.update(
- {
- "url": resp.url,
- "mimetype": resp.headers.get("content-type", None),
- "content": base64.b64encode(resp.content).decode("utf8"),
- }
- )
+ try:
+ resp = newspipe_get(attrs["url"], timeout=5)
+ attrs.update(
+ {
+ "url": resp.url,
+ "mimetype": resp.headers.get("content-type", None),
+ "content": base64.b64encode(resp.content).decode("utf8"),
+ }
+ )
+ except requests.exceptions.ConnectionError:
+ pass
return attrs
def create(self, **attrs):
diff --git a/newspipe/crawler/default_crawler.py b/newspipe/crawler/default_crawler.py
index 0584fb90..a76eca9c 100644
--- a/newspipe/crawler/default_crawler.py
+++ b/newspipe/crawler/default_crawler.py
@@ -103,7 +103,10 @@ async def parse_feed(user, feed):
if feed.title and "title" in up_feed:
# do not override the title set by the user
del up_feed["title"]
- FeedController().update({"id": feed.id}, up_feed)
+ try:
+ FeedController().update({"id": feed.id}, up_feed)
+ except:
+ logger.exception("error when updating feed: {}".format(feed.link))
return articles
@@ -180,7 +183,7 @@ async def retrieve_feed(queue, users, feed_id=None):
continue
if None is feed_id or (feed_id and feed_id == feed.id):
feeds.append(feed)
- logger.info(feeds)
+
if feeds == []:
logger.info("No feed to retrieve for {}".format(user.nickname))
diff --git a/newspipe/lib/article_utils.py b/newspipe/lib/article_utils.py
index 0490d4d7..d343f0a1 100644
--- a/newspipe/lib/article_utils.py
+++ b/newspipe/lib/article_utils.py
@@ -2,6 +2,7 @@ import html
import logging
import re
from datetime import datetime, timezone
+from dateutil.parser._parser import ParserError
from enum import Enum
from urllib.parse import SplitResult, urlsplit, urlunsplit
@@ -47,8 +48,10 @@ async def construct_article(entry, feed, fields=None, fetch=True):
article["date"] = dateutil.parser.parse(entry[date_key]).astimezone(
timezone.utc
)
+ except ParserError:
+ logger.exception("Error when parsing date: {}".format(entry[date_key]))
except Exception as e:
- logger.exception(e)
+ pass
else:
break
push_in_article("content", get_article_content(entry))
diff --git a/newspipe/lib/feed_utils.py b/newspipe/lib/feed_utils.py
index 70ded817..0de78580 100644
--- a/newspipe/lib/feed_utils.py
+++ b/newspipe/lib/feed_utils.py
@@ -42,6 +42,7 @@ def escape_keys(*keys):
def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
requests_kwargs = {
"headers": {"User-Agent": application.config["CRAWLER_USER_AGENT"]},
+ "timeout": application.config["CRAWLER_TIMEOUT"],
"verify": False,
}
if url is None and fp_parsed is not None:
@@ -87,7 +88,9 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
try:
response = requests.get(feed["site_link"], **requests_kwargs)
- except requests.exceptions.InvalidSchema as e:
+ except requests.exceptions.InvalidSchema:
+ return feed
+ except requests.exceptions.ConnectionError:
return feed
except:
logger.exception("failed to retrieve %r", feed["site_link"])
bgstack15