diff options
author | Cédric Bonhomme <kimble.mandel@gmail.com> | 2014-04-23 13:30:17 +0200 |
---|---|---|
committer | Cédric Bonhomme <kimble.mandel@gmail.com> | 2014-04-23 13:30:17 +0200 |
commit | 2091623e0b7663e45b0d116db410ee8cbd101a4a (patch) | |
tree | fa064c6ad4dc8336672c9485695246eb53df5168 | |
parent | Updated README. (diff) | |
download | newspipe-2091623e0b7663e45b0d116db410ee8cbd101a4a.tar.gz newspipe-2091623e0b7663e45b0d116db410ee8cbd101a4a.tar.bz2 newspipe-2091623e0b7663e45b0d116db410ee8cbd101a4a.zip |
Autoindexation of new articles (not on Heroku).
-rw-r--r-- | pyaggr3g470r/crawler.py | 28 |
1 files changed, 23 insertions, 5 deletions
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index 072a8f25..661eddf0 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -54,7 +54,7 @@ requests_log.propagate = True import models import conf if not conf.ON_HEROKU: - import search + import search as fastsearch import utils if not conf.ON_HEROKU: @@ -112,10 +112,15 @@ class FeedGetter(object): # 2 - Fetch the feeds. # 'responses' contains all the jobs returned by the function retrieve_async() responses = self.retrieve_async(feeds) + elements = [item.value for item in responses if item.value is not None] # 3 - Insert articles in the database - self.insert_database([item.value for item in responses if item.value is not None]) - + self.insert_database(elements) + + # 4 - Indexation + if not conf.ON_HEROKU: + self.index(elements) + pyaggr3g470r_log.info("All articles retrieved. End of the processus.") def retrieve_async(self, feeds): @@ -227,6 +232,19 @@ class FeedGetter(object): except Exception as e: pyaggr3g470r_log.error("Error when inserting article in database: " + str(e)) continue - db.session.close() + #db.session.close() return True -
\ No newline at end of file + + def index(self, elements): + """ + Index new articles. + """ + pyaggr3g470r_log.info("Indexing new articles.") + for feed, articles in elements: + for element in articles: + article = Article.query.filter(Article.user_id == self.user.id, Article.link == element.link).first() + try: + fastsearch.add_to_index([article], article.source) + except: + pyaggr3g470r_log.error("Problem during indexation.") + return True
\ No newline at end of file |