aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2016-11-03 13:39:27 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2016-11-03 13:39:27 +0100
commit955b126903eb46065a1e90a986e6af197481437b (patch)
tree11ecf96023d6f3c136bd83147081df72f5956984 /src
parentFixed an error in the logging after the insertion of an article (diff)
downloadnewspipe-955b126903eb46065a1e90a986e6af197481437b.tar.gz
newspipe-955b126903eb46065a1e90a986e6af197481437b.tar.bz2
newspipe-955b126903eb46065a1e90a986e6af197481437b.zip
Some improvements for the manager and the asyncio crawler.
Diffstat (limited to 'src')
-rw-r--r--src/crawler/classic_crawler.py19
-rwxr-xr-xsrc/manager.py9
2 files changed, 18 insertions, 10 deletions
diff --git a/src/crawler/classic_crawler.py b/src/crawler/classic_crawler.py
index 4f21a29f..dac34e8c 100644
--- a/src/crawler/classic_crawler.py
+++ b/src/crawler/classic_crawler.py
@@ -118,7 +118,6 @@ async def insert_database(user, feed):
logger.info('Inserting articles for {}'.format(feed.title))
- logger.info('Database insertion for {}'.format(feed.title))
new_articles = []
art_contr = ArticleController(user.id)
for article in articles:
@@ -133,8 +132,8 @@ async def insert_database(user, feed):
if exist:
# if the article has been already retrieved, we only update
# the content or the title
- logger.debug("Article %r (%r) already in the database.",
- article['title'], article['link'])
+ logger.debug('Article already in the database: '. \
+ format(article['title']))
existing_article = existing_article_req.first()
new_updated_date = None
try:
@@ -187,11 +186,13 @@ def retrieve_feed(loop, user, feed_id=None):
logger.info('Starting to retrieve feeds for {}'.format(user.nickname))
# Get the list of feeds to fetch
- user = User.query.filter(User.email == user.email).first()
- feeds = [feed for feed in user.feeds if
- feed.error_count <= conf.DEFAULT_MAX_ERROR and feed.enabled]
+ filters = {}
+ filters['user_id'] = user.id
if feed_id is not None:
- feeds = [feed for feed in feeds if feed.id == feed_id]
+ filters['id'] = feed_id
+ filters['enabled'] = True
+ filters['error_count__lt'] = conf.DEFAULT_MAX_ERROR
+ feeds = FeedController().read(**filters).all()
if feeds == []:
return
@@ -203,5 +204,5 @@ def retrieve_feed(loop, user, feed_id=None):
loop.run_until_complete(asyncio.wait(tasks))
except Exception:
logger.exception('an error occured')
-
- logger.info("All articles retrieved. End of the processus.")
+ finally:
+ logger.info('Articles retrieved for {}'.format(user.nickname))
diff --git a/src/manager.py b/src/manager.py
index 47a88339..46f8fe10 100755
--- a/src/manager.py
+++ b/src/manager.py
@@ -3,6 +3,7 @@
import os
import logging
+from datetime import datetime
from werkzeug import generate_password_hash
from bootstrap import application, db, conf, set_logging
from flask_script import Manager
@@ -67,11 +68,17 @@ def fetch_asyncio(user_id=None, feed_id=None):
except:
feed_id = None
+ logger.info('Starting crawler.')
+
+ start = datetime.now()
loop = asyncio.get_event_loop()
for user in users:
- logger.info("Fetching articles for " + user.nickname)
classic_crawler.retrieve_feed(loop, user, feed_id)
loop.close()
+ end = datetime.now()
+
+ logger.info('Crawler finished in {} seconds.' \
+ .format((end - start).seconds))
if __name__ == '__main__':
bgstack15