diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2015-02-19 18:31:51 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2015-02-19 18:31:51 +0100 |
commit | d0b1505f10488d8b426eb442367fed7c63a870cd (patch) | |
tree | eac6e58e2b25249cfa1c996b161762bb19c917a2 | |
parent | It is now unseless to test the value of article.date at this point. (diff) | |
download | newspipe-d0b1505f10488d8b426eb442367fed7c63a870cd.tar.gz newspipe-d0b1505f10488d8b426eb442367fed7c63a870cd.tar.bz2 newspipe-d0b1505f10488d8b426eb442367fed7c63a870cd.zip |
This test will be used for some weeks in order to avoid duplicates with the new article id (entry_id).
-rw-r--r-- | pyaggr3g470r/crawler.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index 3309f4ab..5d7261ff 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -34,6 +34,7 @@ import feedparser import dateutil.parser from datetime import datetime from bs4 import BeautifulSoup +from sqlalchemy import or_ from pyaggr3g470r import utils from pyaggr3g470r import conf @@ -177,7 +178,7 @@ def insert_database(user, feed): query1 = Article.query.filter(Article.user_id == user.id) query2 = query1.filter(Article.feed_id == feed.id) for article in articles: - exist = query2.filter(Article.entry_id == article.entry_id).count() != 0 + exist = query2.filter(or_(Article.entry_id==article.entry_id, Article.link==article.link)).count() != 0 if exist: #logger.debug("Article %r (%r) already in the database.", article.title, article.link) continue |