diff options
-rw-r--r-- | newspipe/crawler/default_crawler.py | 11 | ||||
-rw-r--r-- | newspipe/lib/article_utils.py | 10 |
2 files changed, 18 insertions, 3 deletions
diff --git a/newspipe/crawler/default_crawler.py b/newspipe/crawler/default_crawler.py index 3d6222e9..b1153582 100644 --- a/newspipe/crawler/default_crawler.py +++ b/newspipe/crawler/default_crawler.py @@ -169,7 +169,16 @@ async def retrieve_feed(queue, users, feed_id=None): filters["last_retrieved__lt"] = datetime.now() - timedelta( minutes=application.config["FEED_REFRESH_INTERVAL"] ) - feeds = FeedController().read(**filters).all() + #feeds = FeedController().read(**filters).all() + feeds = [] # temporary fix for: sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) SSL SYSCALL error: EOF detected + for feed in user.feeds: + if not feed.enabled: + continue + if feed.error_count > application.config["DEFAULT_MAX_ERROR"]: + continue + if feed.last_retrieved > (datetime.now() - timedelta(minutes=application.config["FEED_REFRESH_INTERVAL"])): + continue + feeds.append(feed) if feeds == []: logger.info("No feed to retrieve for {}".format(user.nickname)) diff --git a/newspipe/lib/article_utils.py b/newspipe/lib/article_utils.py index 3f6ee2ba..ec074fa9 100644 --- a/newspipe/lib/article_utils.py +++ b/newspipe/lib/article_utils.py @@ -19,7 +19,12 @@ PROCESSED_DATE_KEYS = {"published", "created", "updated"} def extract_id(entry): """ extract a value from an entry that will identify it among the other of that feed""" - return entry.get("entry_id") or entry.get("id") or entry["link"] + entry_id = 'undefined' + try: + entry_id = entry.get("entry_id") or entry.get("id") or entry["link"] + except: + pass + return entry_id async def construct_article(entry, feed, fields=None, fetch=True): @@ -85,12 +90,13 @@ async def get_article_details(entry, fetch=True): ): try: # resolves URL behind proxies (like feedproxy.google.com) + print('trying to resolve URL...') response = await newspipe_get(article_link, timeout=5) except MissingSchema: split, failed = urlsplit(article_link), False for scheme in "https", "http": - new_link = urlunsplit(SplitResult(scheme, *split[1:])) try: + new_link = urlunsplit(SplitResult(scheme, *split[1:])) response = await newspipe_get(new_link, timeout=5) except Exception: failed = True |