From 02709a4c1e3074f75809997445bee2b1214fff51 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Sun, 13 Jul 2014 15:02:07 +0200 Subject: Performance improvement for the crawler (database insertion step). --- pyaggr3g470r/crawler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index 0f820373..c3978413 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -230,11 +230,11 @@ class FeedGetter(object): """ logger.info("Database insertion...") new_articles = [] + query1 = Article.query.filter(Article.user_id == self.user.id) for feed, articles in elements: + query2 = query1.filter(Article.feed_id == feed.id) for article in articles: - exist = Article.query.filter(Article.user_id == self.user.id, - Article.feed_id == feed.id, - Article.link == article.link).count() != 0 + exist = query2.filter(Article.link == article.link).count() != 0 if exist: logger.debug("Article %r (%r) already in the database.", article.title, article.link) -- cgit