diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2018-03-10 22:24:33 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2018-03-10 22:24:33 +0100 |
commit | 8eb8b7377802f5ae2e74796773e1bb972435cfb7 (patch) | |
tree | c95e5f0374dc2deba3651527a71de4c39ee8c962 /src/crawler | |
parent | Limit the search for duplicates to 1000 articles. (diff) | |
download | newspipe-8eb8b7377802f5ae2e74796773e1bb972435cfb7.tar.gz newspipe-8eb8b7377802f5ae2e74796773e1bb972435cfb7.tar.bz2 newspipe-8eb8b7377802f5ae2e74796773e1bb972435cfb7.zip |
Ignore duplicate.
Diffstat (limited to 'src/crawler')
-rw-r--r-- | src/crawler/default_crawler.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/src/crawler/default_crawler.py b/src/crawler/default_crawler.py index 34726a83..3a60ede4 100644 --- a/src/crawler/default_crawler.py +++ b/src/crawler/default_crawler.py @@ -130,6 +130,7 @@ async def insert_database(user, feed): continue exist = existing_article_req.count() != 0 if exist: + continue # if the article has been already retrieved, we only update # the content or the title logger.info('Article already in the database: {}'. \ |