Autoindexation of new articles (not on Heroku).

author: Cédric Bonhomme <kimble.mandel@gmail.com> 2014-04-23 13:30:17 +0200
committer: Cédric Bonhomme <kimble.mandel@gmail.com> 2014-04-23 13:30:17 +0200
commit: 2091623e0b7663e45b0d116db410ee8cbd101a4a (patch)
tree: fa064c6ad4dc8336672c9485695246eb53df5168 /pyaggr3g470r
parent: Updated README. (diff)
download: newspipe-2091623e0b7663e45b0d116db410ee8cbd101a4a.tar.gz
newspipe-2091623e0b7663e45b0d116db410ee8cbd101a4a.tar.bz2
newspipe-2091623e0b7663e45b0d116db410ee8cbd101a4a.zip
1 files changed, 23 insertions, 5 deletions
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py
index 072a8f25..661eddf0 100644
--- a/pyaggr3g470r/crawler.py
+++ b/pyaggr3g470r/crawler.py
@@ -54,7 +54,7 @@ requests_log.propagate = True
 import models
 import conf
 if not conf.ON_HEROKU:
-    import search
+    import search as fastsearch
 import utils
 
 if not conf.ON_HEROKU:
@@ -112,10 +112,15 @@ class FeedGetter(object):
         # 2 - Fetch the feeds.
         # 'responses' contains all the jobs returned by the function retrieve_async()
         responses = self.retrieve_async(feeds)
+        elements = [item.value for item in responses if item.value is not None]
 
         # 3 - Insert articles in the database
-        self.insert_database([item.value for item in responses if item.value is not None])
-        
+        self.insert_database(elements)
+
+        # 4 - Indexation
+        if not conf.ON_HEROKU:
+            self.index(elements)
+
         pyaggr3g470r_log.info("All articles retrieved. End of the processus.")
 
     def retrieve_async(self, feeds):
@@ -227,6 +232,19 @@ class FeedGetter(object):
                 except Exception as e:
                     pyaggr3g470r_log.error("Error when inserting article in database: " + str(e))
                     continue
-        db.session.close()
+        #db.session.close()
         return True
-        
-\ No newline at end of file
+
+    def index(self, elements):
+        """
+        Index new articles.
+        """
+        pyaggr3g470r_log.info("Indexing new articles.")
+        for feed, articles in elements:
+            for element in articles:
+                article = Article.query.filter(Article.user_id == self.user.id, Article.link == element.link).first()
+                try:
+                    fastsearch.add_to_index([article], article.source)
+                except:
+                    pyaggr3g470r_log.error("Problem during indexation.")
+        return True
+\ No newline at end of file
author	Cédric Bonhomme <kimble.mandel@gmail.com>	2014-04-23 13:30:17 +0200
committer	Cédric Bonhomme <kimble.mandel@gmail.com>	2014-04-23 13:30:17 +0200
commit	2091623e0b7663e45b0d116db410ee8cbd101a4a (patch)
tree	fa064c6ad4dc8336672c9485695246eb53df5168 /pyaggr3g470r
parent	Updated README. (diff)
download	newspipe-2091623e0b7663e45b0d116db410ee8cbd101a4a.tar.gz newspipe-2091623e0b7663e45b0d116db410ee8cbd101a4a.tar.bz2 newspipe-2091623e0b7663e45b0d116db410ee8cbd101a4a.zip