From a55fab48c46a7e358ec1506ca6a407f3428ca489 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Thu, 10 Nov 2016 07:54:50 +0100 Subject: It is now possible to configure the refresh interval of feeds (in minutes). --- src/conf.py | 4 +++- src/conf/conf.cfg-sample | 1 + src/crawler/classic_crawler.py | 5 ++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/conf.py b/src/conf.py index 807c97e5..466d9c88 100644 --- a/src/conf.py +++ b/src/conf.py @@ -46,7 +46,8 @@ DEFAULTS = {"platform_url": "https://www.newspipe.org/", "crawling_method": "classic", "crawler_user_agent": "Newspipe (https://github.com/newspipe)", "crawler_timeout": "30", - "crawler_resolv": "false" + "crawler_resolv": "false", + "feed_refresh_interval": "120" } if not ON_HEROKU: @@ -98,6 +99,7 @@ DEFAULT_MAX_ERROR = config.getint('crawler', 'default_max_error') ERROR_THRESHOLD = int(DEFAULT_MAX_ERROR / 2) CRAWLER_TIMEOUT = config.get('crawler', 'timeout') CRAWLER_RESOLV = config.getboolean('crawler', 'resolv') +FEED_REFRESH_INTERVAL = config.getint('crawler', 'feed_refresh_interval') WEBSERVER_HOST = config.get('webserver', 'host') WEBSERVER_PORT = config.getint('webserver', 'port') diff --git a/src/conf/conf.cfg-sample b/src/conf/conf.cfg-sample index c3cce42d..6fae48b5 100644 --- a/src/conf/conf.cfg-sample +++ b/src/conf/conf.cfg-sample @@ -22,6 +22,7 @@ api_login = api_passwd = timeout = 30 resolv = true +feed_refresh_interval = 120 [notification] notification_email = Newspipe@no-reply.com host = smtp.googlemail.com diff --git a/src/crawler/classic_crawler.py b/src/crawler/classic_crawler.py index 7d29d462..eb75b78f 100644 --- a/src/crawler/classic_crawler.py +++ b/src/crawler/classic_crawler.py @@ -30,7 +30,7 @@ import asyncio import logging import feedparser import dateutil.parser -from datetime import datetime, timezone +from datetime import datetime, timezone, timedelta from sqlalchemy import or_ import conf @@ -184,9 +184,12 @@ def retrieve_feed(loop, user, feed_id=None): filters['id'] = feed_id filters['enabled'] = True filters['error_count__lt'] = conf.DEFAULT_MAX_ERROR + filters['last_retrieved__lt'] = datetime.now() - \ + timedelta(minutes=conf.FEED_REFRESH_INTERVAL) feeds = FeedController().read(**filters).all() if feeds == []: + logger.info('No feed to retrieve for {}'.format(user.nickname)) return # Launch the process for all the feeds -- cgit