aboutsummaryrefslogtreecommitdiff
path: root/src/crawler
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2018-03-10 22:24:33 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2018-03-10 22:24:33 +0100
commit8eb8b7377802f5ae2e74796773e1bb972435cfb7 (patch)
treec95e5f0374dc2deba3651527a71de4c39ee8c962 /src/crawler
parentLimit the search for duplicates to 1000 articles. (diff)
downloadnewspipe-8eb8b7377802f5ae2e74796773e1bb972435cfb7.tar.gz
newspipe-8eb8b7377802f5ae2e74796773e1bb972435cfb7.tar.bz2
newspipe-8eb8b7377802f5ae2e74796773e1bb972435cfb7.zip
Ignore duplicate.
Diffstat (limited to 'src/crawler')
-rw-r--r--src/crawler/default_crawler.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/src/crawler/default_crawler.py b/src/crawler/default_crawler.py
index 34726a83..3a60ede4 100644
--- a/src/crawler/default_crawler.py
+++ b/src/crawler/default_crawler.py
@@ -130,6 +130,7 @@ async def insert_database(user, feed):
continue
exist = existing_article_req.count() != 0
if exist:
+ continue
# if the article has been already retrieved, we only update
# the content or the title
logger.info('Article already in the database: {}'. \
bgstack15