diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2014-07-13 10:16:36 +0200 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2014-07-13 10:16:36 +0200 |
commit | bab3cad4aef5e26e5c0662dbc1fc4775b4356b21 (patch) | |
tree | 1491f9593c22c5177a97dcdf2696b108b1a833af /pyaggr3g470r | |
parent | Fixed a bad link. (diff) | |
download | newspipe-bab3cad4aef5e26e5c0662dbc1fc4775b4356b21.tar.gz newspipe-bab3cad4aef5e26e5c0662dbc1fc4775b4356b21.tar.bz2 newspipe-bab3cad4aef5e26e5c0662dbc1fc4775b4356b21.zip |
Minor improvemnts for the crawler.
Diffstat (limited to 'pyaggr3g470r')
-rw-r--r-- | pyaggr3g470r/crawler.py | 17 |
1 files changed, 4 insertions, 13 deletions
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index 70c2b441..0f820373 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -231,14 +231,11 @@ class FeedGetter(object): logger.info("Database insertion...") new_articles = [] for feed, articles in elements: - for article in articles: - - exist = Article.query.filter(Article.user_id == self.user.id, Article.feed_id == feed.id, - Article.link == article.link).first() - if exist is not None: + Article.link == article.link).count() != 0 + if exist: logger.debug("Article %r (%r) already in the database.", article.title, article.link) continue @@ -251,14 +248,8 @@ class FeedGetter(object): feed.articles.append(article) #db.session.merge(article) db.session.commit() - logger.info("New article %r (%r) added.", - article.title, article.link) - except IntegrityError: - logger.debug("Article %r (%r) already in the database.", - article.title, article.link) - articles.remove(article) - db.session.rollback() - continue + logger.info("New article %r (%r) added.", article.title, + article.link) except Exception as e: logger.error("Error when inserting article in database: " + str(e)) continue |