From 2091623e0b7663e45b0d116db410ee8cbd101a4a Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Wed, 23 Apr 2014 13:30:17 +0200 Subject: Autoindexation of new articles (not on Heroku). --- pyaggr3g470r/crawler.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index 072a8f25..661eddf0 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -54,7 +54,7 @@ requests_log.propagate = True import models import conf if not conf.ON_HEROKU: - import search + import search as fastsearch import utils if not conf.ON_HEROKU: @@ -112,10 +112,15 @@ class FeedGetter(object): # 2 - Fetch the feeds. # 'responses' contains all the jobs returned by the function retrieve_async() responses = self.retrieve_async(feeds) + elements = [item.value for item in responses if item.value is not None] # 3 - Insert articles in the database - self.insert_database([item.value for item in responses if item.value is not None]) - + self.insert_database(elements) + + # 4 - Indexation + if not conf.ON_HEROKU: + self.index(elements) + pyaggr3g470r_log.info("All articles retrieved. End of the processus.") def retrieve_async(self, feeds): @@ -227,6 +232,19 @@ class FeedGetter(object): except Exception as e: pyaggr3g470r_log.error("Error when inserting article in database: " + str(e)) continue - db.session.close() + #db.session.close() return True - \ No newline at end of file + + def index(self, elements): + """ + Index new articles. + """ + pyaggr3g470r_log.info("Indexing new articles.") + for feed, articles in elements: + for element in articles: + article = Article.query.filter(Article.user_id == self.user.id, Article.link == element.link).first() + try: + fastsearch.add_to_index([article], article.source) + except: + pyaggr3g470r_log.error("Problem during indexation.") + return True \ No newline at end of file -- cgit