Merge branch 'master' of git.sr.ht:~cedric/newspipe

author: Cédric Bonhomme <cedric@cedricbonhomme.org> 2020-04-02 22:35:43 +0200
committer: Cédric Bonhomme <cedric@cedricbonhomme.org> 2020-04-02 22:35:43 +0200
commit: 0b6ee9a1c44c802a63e790f9fd9602133b121ce0 (patch)
tree: 739bac7039b39cd84c2593c08c5e8470f988d999
parent: Improved the feed creation form. (diff)
parent: wip (diff)
download: newspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.tar.gz
newspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.tar.bz2
newspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.zip
4 files changed, 25 insertions, 12 deletions
diff --git a/newspipe/controllers/icon.py b/newspipe/controllers/icon.py
index d5dd7fe6..b0fad5ac 100644
--- a/newspipe/controllers/icon.py
+++ b/newspipe/controllers/icon.py
@@ -2,6 +2,7 @@ import base64
 
 import requests
 
+from newspipe.lib.utils import newspipe_get
 from newspipe.models import Icon
 
 from .abstract import AbstractController
@@ -13,14 +14,17 @@ class IconController(AbstractController):
 
     def _build_from_url(self, attrs):
         if "url" in attrs and "content" not in attrs:
-            resp = requests.get(attrs["url"], verify=False)
-            attrs.update(
-                {
-                    "url": resp.url,
-                    "mimetype": resp.headers.get("content-type", None),
-                    "content": base64.b64encode(resp.content).decode("utf8"),
-                }
-            )
+            try:
+                resp = newspipe_get(attrs["url"], timeout=5)
+                attrs.update(
+                    {
+                        "url": resp.url,
+                        "mimetype": resp.headers.get("content-type", None),
+                        "content": base64.b64encode(resp.content).decode("utf8"),
+                    }
+                )
+            except requests.exceptions.ConnectionError:
+                pass
         return attrs
 
     def create(self, **attrs):
diff --git a/newspipe/crawler/default_crawler.py b/newspipe/crawler/default_crawler.py
index 0584fb90..a76eca9c 100644
--- a/newspipe/crawler/default_crawler.py
+++ b/newspipe/crawler/default_crawler.py
@@ -103,7 +103,10 @@ async def parse_feed(user, feed):
     if feed.title and "title" in up_feed:
         # do not override the title set by the user
         del up_feed["title"]
-    FeedController().update({"id": feed.id}, up_feed)
+    try:
+        FeedController().update({"id": feed.id}, up_feed)
+    except:
+        logger.exception("error when updating feed: {}".format(feed.link))
 
     return articles
 
@@ -180,7 +183,7 @@ async def retrieve_feed(queue, users, feed_id=None):
                 continue
             if None is feed_id or (feed_id and feed_id == feed.id):
                 feeds.append(feed)
-        logger.info(feeds)
+
         if feeds == []:
             logger.info("No feed to retrieve for {}".format(user.nickname))
 
diff --git a/newspipe/lib/article_utils.py b/newspipe/lib/article_utils.py
index 0490d4d7..d343f0a1 100644
--- a/newspipe/lib/article_utils.py
+++ b/newspipe/lib/article_utils.py
@@ -2,6 +2,7 @@ import html
 import logging
 import re
 from datetime import datetime, timezone
+from dateutil.parser._parser import ParserError
 from enum import Enum
 from urllib.parse import SplitResult, urlsplit, urlunsplit
 
@@ -47,8 +48,10 @@ async def construct_article(entry, feed, fields=None, fetch=True):
                     article["date"] = dateutil.parser.parse(entry[date_key]).astimezone(
                         timezone.utc
                     )
+                except ParserError:
+                    logger.exception("Error when parsing date: {}".format(entry[date_key]))
                 except Exception as e:
-                    logger.exception(e)
+                    pass
                 else:
                     break
     push_in_article("content", get_article_content(entry))
diff --git a/newspipe/lib/feed_utils.py b/newspipe/lib/feed_utils.py
index 70ded817..0de78580 100644
--- a/newspipe/lib/feed_utils.py
+++ b/newspipe/lib/feed_utils.py
@@ -42,6 +42,7 @@ def escape_keys(*keys):
 def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
     requests_kwargs = {
         "headers": {"User-Agent": application.config["CRAWLER_USER_AGENT"]},
+        "timeout": application.config["CRAWLER_TIMEOUT"],
         "verify": False,
     }
     if url is None and fp_parsed is not None:
@@ -87,7 +88,9 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
 
     try:
         response = requests.get(feed["site_link"], **requests_kwargs)
-    except requests.exceptions.InvalidSchema as e:
+    except requests.exceptions.InvalidSchema:
+        return feed
+    except requests.exceptions.ConnectionError:
         return feed
     except:
         logger.exception("failed to retrieve %r", feed["site_link"])
author	Cédric Bonhomme <cedric@cedricbonhomme.org>	2020-04-02 22:35:43 +0200
committer	Cédric Bonhomme <cedric@cedricbonhomme.org>	2020-04-02 22:35:43 +0200
commit	0b6ee9a1c44c802a63e790f9fd9602133b121ce0 (patch)
tree	739bac7039b39cd84c2593c08c5e8470f988d999
parent	Improved the feed creation form. (diff)
parent	wip (diff)
download	newspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.tar.gz newspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.tar.bz2 newspipe-0b6ee9a1c44c802a63e790f9fd9602133b121ce0.zip