Search function now using MongoDB indexed fulltext searching.

author: Cédric Bonhomme <kimble.mandel@gmail.com> 2013-01-11 18:35:50 +0100
committer: Cédric Bonhomme <kimble.mandel@gmail.com> 2013-01-11 18:35:50 +0100
commit: 216315958b8af26150562eee634bf4774904c0a6 (patch)
tree: ee4799e8f4a84a5c874aa4cd9d30e2c2536ea65d
parent: We'll try a simple index searching algorithm. (diff)
download: newspipe-216315958b8af26150562eee634bf4774904c0a6.tar.gz
newspipe-216315958b8af26150562eee634bf4774904c0a6.tar.bz2
newspipe-216315958b8af26150562eee634bf4774904c0a6.zip
3 files changed, 52 insertions, 41 deletions
diff --git a/source/mongodb.py b/source/mongodb.py
index 846a7a82..e91fb90b 100644
--- a/source/mongodb.py
+++ b/source/mongodb.py
@@ -47,7 +47,8 @@ class Articles(object):
         Creates a new collection for a new feed.
         """
         collection = self.db[new_collection["feed_id"]]
-        #collection.create_index([("feed_link", pymongo.ASCENDING)], {"unique":True, "sparse":True})
+        collection.create_index([("article_date", pymongo.DESCENDING)], {"unique":False, "sparse":False})
+        collection.ensure_index('article_content', pymongo.ASCENDING)
         collection.insert(new_collection)
 
     def add_articles(self, articles, feed_id):
@@ -56,6 +57,8 @@ class Articles(object):
         """
         collection = self.db[str(feed_id)]
         #collection.create_index([("article_date", pymongo.DESCENDING)], {"unique":False, "sparse":False})
+        #collection.ensure_index('article_content', pymongo.ASCENDING)
+        print(collection.index_information())
         for article in articles:
             cursor = collection.find({"article_id":article["article_id"]})
             if cursor.count() == 0:
@@ -219,6 +222,17 @@ class Articles(object):
         collection = self.db[str(feed_id)]
         collection.update({"type": 0, "feed_id":feed_id}, {"$set": changes}, multi=True)
 
+    def full_search(self, term):
+        """
+        Indexed full text search through content of articles.
+        """
+        articles = {}
+        for collection in self.get_all_feeds():
+            result = self.db[collection["feed_id"]].find({'article_content': {'$regex': term, "$options": 'i' }})
+            if result.count() != 0:
+                articles[collection["feed_id"]] = result.sort([("article_date", pymongo.DESCENDING)])
+        return articles
+
     # Functions on database
     def drop_database(self):
         """
diff --git a/source/pyAggr3g470r.py b/source/pyAggr3g470r.py
index 479d8562..6bc45bfd 100755
--- a/source/pyAggr3g470r.py
+++ b/source/pyAggr3g470r.py
@@ -162,14 +162,12 @@ class pyAggr3g470r(object):
         in the description of the article.
         """
         param, _, value = query.partition(':')
-        wordre = re.compile(r'\b%s\b' % param, re.I)
         feed_id = None
         if param == "Feed":
             feed_id, _, query = value.partition(':')
-        feeds = self.mongo.get_all_feeds()
+        search_result = self.mongo.full_search(param)
         tmpl = lookup.get_template("search.html")
-        return tmpl.render(feeds=feeds, feed_id=feed_id, query=query, \
-                        wordre=wordre, mongo=self.mongo)
+        return tmpl.render(search_result=search_result, query=query, feed_id=feed_id, mongo=self.mongo)
 
     search.exposed = True
 
diff --git a/source/templates/search.html b/source/templates/search.html
index 01043f0e..30ec77c1 100644
--- a/source/templates/search.html
+++ b/source/templates/search.html
@@ -9,44 +9,43 @@ import utils
 <br />
 <%
     html = ""
+    feed_id = None
 %>
-%if feed_id is None:
-    %for feed in feeds:
-        <%
-            new_feed_section = True
-            for article in mongo.get_articles(feed["feed_id"]):
-                article_content = utils.clear_string(article["article_content"])
-                if not article_content:
-                    article_content = utils.clear_string(article["article_title"])
-                if wordre.findall(article_content) != []:
-                    if new_feed_section is True:
-                        new_feed_section = False
-                        html += """<h2><a href="/articles/%s" rel="noreferrer" target="_blank">%s</a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \
-                            (feed["feed_id"], feed["feed_title"], feed["feed_link"], feed["feed_image"])
 
-                    if article["article_readed"] == False:
-                        # not readed articles are in bold
-                        not_read_begin, not_read_end = "<b>", "</b>"
-                    else:
-                        not_read_begin, not_read_end = "", ""
+%for feed_id in search_result.keys():
+    <%
+        new_feed_section = True
+        feed = mongo.get_feed(feed_id)
+        print(search_result[feed["feed_id"]].count())
+        for article in search_result[feed["feed_id"]]:
+            article_content = utils.clear_string(article["article_content"])
+            if new_feed_section is True:
+                new_feed_section = False
+                html += """<h2><a href="/articles/%s" rel="noreferrer" target="_blank">%s</a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \
+                    (feed["feed_id"], feed["feed_title"], feed["feed_link"], feed["feed_image"])
 
-                    # display a heart for faved articles
-                    if article["article_like"] == True:
-                        like = """ <img src="/img/heart.png" title="I like this article!" />"""
-                    else:
-                        like = ""
+            if article["article_readed"] == False:
+                # not readed articles are in bold
+                not_read_begin, not_read_end = "<b>", "</b>"
+            else:
+                not_read_begin, not_read_end = "", ""
 
-                    # descrition for the CSS ToolTips
-                    description = " ".join(article_content[:500].split(' ')[:-1])
+            # display a heart for faved articles
+            if article["article_like"] == True:
+                like = """ <img src="/img/heart.png" title="I like this article!" />"""
+            else:
+                like = ""
 
-                    # a description line per article (date, title of the article and
-                    # CSS description tooltips on mouse over)
-                    html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \
-                            """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \
-                                    (feed["feed_id"], article["article_id"], not_read_begin, \
-                                    article["article_title"][:150], not_read_end, description) + like + "<br />\n"
-        %>
-    %endfor
-%endif
-    ${html}
-        
-\ No newline at end of file
+            # descrition for the CSS ToolTips
+            description = " ".join(article_content[:500].split(' ')[:-1])
+
+            # a description line per article (date, title of the article and
+            # CSS description tooltips on mouse over)
+            html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \
+                    """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \
+                            (feed["feed_id"], article["article_id"], not_read_begin, \
+                            article["article_title"][:150], not_read_end, description) + like + "<br />\n"
+    %>
+%endfor
+
+${html}
+\ No newline at end of file
author	Cédric Bonhomme <kimble.mandel@gmail.com>	2013-01-11 18:35:50 +0100
committer	Cédric Bonhomme <kimble.mandel@gmail.com>	2013-01-11 18:35:50 +0100
commit	216315958b8af26150562eee634bf4774904c0a6 (patch)
tree	ee4799e8f4a84a5c874aa4cd9d30e2c2536ea65d
parent	We'll try a simple index searching algorithm. (diff)
download	newspipe-216315958b8af26150562eee634bf4774904c0a6.tar.gz newspipe-216315958b8af26150562eee634bf4774904c0a6.tar.bz2 newspipe-216315958b8af26150562eee634bf4774904c0a6.zip