diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2016-11-03 13:39:27 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2016-11-03 13:39:27 +0100 |
commit | 955b126903eb46065a1e90a986e6af197481437b (patch) | |
tree | 11ecf96023d6f3c136bd83147081df72f5956984 /src/crawler | |
parent | Fixed an error in the logging after the insertion of an article (diff) | |
download | newspipe-955b126903eb46065a1e90a986e6af197481437b.tar.gz newspipe-955b126903eb46065a1e90a986e6af197481437b.tar.bz2 newspipe-955b126903eb46065a1e90a986e6af197481437b.zip |
Some improvements for the manager and the asyncio crawler.
Diffstat (limited to 'src/crawler')
-rw-r--r-- | src/crawler/classic_crawler.py | 19 |
1 files changed, 10 insertions, 9 deletions
diff --git a/src/crawler/classic_crawler.py b/src/crawler/classic_crawler.py index 4f21a29f..dac34e8c 100644 --- a/src/crawler/classic_crawler.py +++ b/src/crawler/classic_crawler.py @@ -118,7 +118,6 @@ async def insert_database(user, feed): logger.info('Inserting articles for {}'.format(feed.title)) - logger.info('Database insertion for {}'.format(feed.title)) new_articles = [] art_contr = ArticleController(user.id) for article in articles: @@ -133,8 +132,8 @@ async def insert_database(user, feed): if exist: # if the article has been already retrieved, we only update # the content or the title - logger.debug("Article %r (%r) already in the database.", - article['title'], article['link']) + logger.debug('Article already in the database: '. \ + format(article['title'])) existing_article = existing_article_req.first() new_updated_date = None try: @@ -187,11 +186,13 @@ def retrieve_feed(loop, user, feed_id=None): logger.info('Starting to retrieve feeds for {}'.format(user.nickname)) # Get the list of feeds to fetch - user = User.query.filter(User.email == user.email).first() - feeds = [feed for feed in user.feeds if - feed.error_count <= conf.DEFAULT_MAX_ERROR and feed.enabled] + filters = {} + filters['user_id'] = user.id if feed_id is not None: - feeds = [feed for feed in feeds if feed.id == feed_id] + filters['id'] = feed_id + filters['enabled'] = True + filters['error_count__lt'] = conf.DEFAULT_MAX_ERROR + feeds = FeedController().read(**filters).all() if feeds == []: return @@ -203,5 +204,5 @@ def retrieve_feed(loop, user, feed_id=None): loop.run_until_complete(asyncio.wait(tasks)) except Exception: logger.exception('an error occured') - - logger.info("All articles retrieved. End of the processus.") + finally: + logger.info('Articles retrieved for {}'.format(user.nickname)) |