From d0b1505f10488d8b426eb442367fed7c63a870cd Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Thu, 19 Feb 2015 18:31:51 +0100 Subject: This test will be used for some weeks in order to avoid duplicates with the new article id (entry_id). --- pyaggr3g470r/crawler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index 3309f4ab..5d7261ff 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -34,6 +34,7 @@ import feedparser import dateutil.parser from datetime import datetime from bs4 import BeautifulSoup +from sqlalchemy import or_ from pyaggr3g470r import utils from pyaggr3g470r import conf @@ -177,7 +178,7 @@ def insert_database(user, feed): query1 = Article.query.filter(Article.user_id == user.id) query2 = query1.filter(Article.feed_id == feed.id) for article in articles: - exist = query2.filter(Article.entry_id == article.entry_id).count() != 0 + exist = query2.filter(or_(Article.entry_id==article.entry_id, Article.link==article.link)).count() != 0 if exist: #logger.debug("Article %r (%r) already in the database.", article.title, article.link) continue -- cgit