aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2016-11-10 07:54:50 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2016-11-10 07:54:50 +0100
commita55fab48c46a7e358ec1506ca6a407f3428ca489 (patch)
tree24d1d7c2347d90247282d5facbbdf8a07eb7ebf8
parentSet the initial value of the reduce. (diff)
downloadnewspipe-a55fab48c46a7e358ec1506ca6a407f3428ca489.tar.gz
newspipe-a55fab48c46a7e358ec1506ca6a407f3428ca489.tar.bz2
newspipe-a55fab48c46a7e358ec1506ca6a407f3428ca489.zip
It is now possible to configure the refresh interval of feeds (in minutes).
-rw-r--r--src/conf.py4
-rw-r--r--src/conf/conf.cfg-sample1
-rw-r--r--src/crawler/classic_crawler.py5
3 files changed, 8 insertions, 2 deletions
diff --git a/src/conf.py b/src/conf.py
index 807c97e5..466d9c88 100644
--- a/src/conf.py
+++ b/src/conf.py
@@ -46,7 +46,8 @@ DEFAULTS = {"platform_url": "https://www.newspipe.org/",
"crawling_method": "classic",
"crawler_user_agent": "Newspipe (https://github.com/newspipe)",
"crawler_timeout": "30",
- "crawler_resolv": "false"
+ "crawler_resolv": "false",
+ "feed_refresh_interval": "120"
}
if not ON_HEROKU:
@@ -98,6 +99,7 @@ DEFAULT_MAX_ERROR = config.getint('crawler', 'default_max_error')
ERROR_THRESHOLD = int(DEFAULT_MAX_ERROR / 2)
CRAWLER_TIMEOUT = config.get('crawler', 'timeout')
CRAWLER_RESOLV = config.getboolean('crawler', 'resolv')
+FEED_REFRESH_INTERVAL = config.getint('crawler', 'feed_refresh_interval')
WEBSERVER_HOST = config.get('webserver', 'host')
WEBSERVER_PORT = config.getint('webserver', 'port')
diff --git a/src/conf/conf.cfg-sample b/src/conf/conf.cfg-sample
index c3cce42d..6fae48b5 100644
--- a/src/conf/conf.cfg-sample
+++ b/src/conf/conf.cfg-sample
@@ -22,6 +22,7 @@ api_login =
api_passwd =
timeout = 30
resolv = true
+feed_refresh_interval = 120
[notification]
notification_email = Newspipe@no-reply.com
host = smtp.googlemail.com
diff --git a/src/crawler/classic_crawler.py b/src/crawler/classic_crawler.py
index 7d29d462..eb75b78f 100644
--- a/src/crawler/classic_crawler.py
+++ b/src/crawler/classic_crawler.py
@@ -30,7 +30,7 @@ import asyncio
import logging
import feedparser
import dateutil.parser
-from datetime import datetime, timezone
+from datetime import datetime, timezone, timedelta
from sqlalchemy import or_
import conf
@@ -184,9 +184,12 @@ def retrieve_feed(loop, user, feed_id=None):
filters['id'] = feed_id
filters['enabled'] = True
filters['error_count__lt'] = conf.DEFAULT_MAX_ERROR
+ filters['last_retrieved__lt'] = datetime.now() - \
+ timedelta(minutes=conf.FEED_REFRESH_INTERVAL)
feeds = FeedController().read(**filters).all()
if feeds == []:
+ logger.info('No feed to retrieve for {}'.format(user.nickname))
return
# Launch the process for all the feeds
bgstack15