From 8eb8b7377802f5ae2e74796773e1bb972435cfb7 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Sat, 10 Mar 2018 22:24:33 +0100 Subject: Ignore duplicate. --- src/crawler/default_crawler.py | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/crawler/default_crawler.py b/src/crawler/default_crawler.py index 34726a83..3a60ede4 100644 --- a/src/crawler/default_crawler.py +++ b/src/crawler/default_crawler.py @@ -130,6 +130,7 @@ async def insert_database(user, feed): continue exist = existing_article_req.count() != 0 if exist: + continue # if the article has been already retrieved, we only update # the content or the title logger.info('Article already in the database: {}'. \ -- cgit