aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2015-02-19 18:31:51 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2015-02-19 18:31:51 +0100
commitd0b1505f10488d8b426eb442367fed7c63a870cd (patch)
treeeac6e58e2b25249cfa1c996b161762bb19c917a2
parentIt is now unseless to test the value of article.date at this point. (diff)
downloadnewspipe-d0b1505f10488d8b426eb442367fed7c63a870cd.tar.gz
newspipe-d0b1505f10488d8b426eb442367fed7c63a870cd.tar.bz2
newspipe-d0b1505f10488d8b426eb442367fed7c63a870cd.zip
This test will be used for some weeks in order to avoid duplicates with the new article id (entry_id).
-rw-r--r--pyaggr3g470r/crawler.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py
index 3309f4ab..5d7261ff 100644
--- a/pyaggr3g470r/crawler.py
+++ b/pyaggr3g470r/crawler.py
@@ -34,6 +34,7 @@ import feedparser
import dateutil.parser
from datetime import datetime
from bs4 import BeautifulSoup
+from sqlalchemy import or_
from pyaggr3g470r import utils
from pyaggr3g470r import conf
@@ -177,7 +178,7 @@ def insert_database(user, feed):
query1 = Article.query.filter(Article.user_id == user.id)
query2 = query1.filter(Article.feed_id == feed.id)
for article in articles:
- exist = query2.filter(Article.entry_id == article.entry_id).count() != 0
+ exist = query2.filter(or_(Article.entry_id==article.entry_id, Article.link==article.link)).count() != 0
if exist:
#logger.debug("Article %r (%r) already in the database.", article.title, article.link)
continue
bgstack15