From 216315958b8af26150562eee634bf4774904c0a6 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Fri, 11 Jan 2013 18:35:50 +0100 Subject: Search function now using MongoDB indexed fulltext searching. --- source/mongodb.py | 16 +++++++++- source/pyAggr3g470r.py | 6 ++-- source/templates/search.html | 71 ++++++++++++++++++++++---------------------- 3 files changed, 52 insertions(+), 41 deletions(-) diff --git a/source/mongodb.py b/source/mongodb.py index 846a7a82..e91fb90b 100644 --- a/source/mongodb.py +++ b/source/mongodb.py @@ -47,7 +47,8 @@ class Articles(object): Creates a new collection for a new feed. """ collection = self.db[new_collection["feed_id"]] - #collection.create_index([("feed_link", pymongo.ASCENDING)], {"unique":True, "sparse":True}) + collection.create_index([("article_date", pymongo.DESCENDING)], {"unique":False, "sparse":False}) + collection.ensure_index('article_content', pymongo.ASCENDING) collection.insert(new_collection) def add_articles(self, articles, feed_id): @@ -56,6 +57,8 @@ class Articles(object): """ collection = self.db[str(feed_id)] #collection.create_index([("article_date", pymongo.DESCENDING)], {"unique":False, "sparse":False}) + #collection.ensure_index('article_content', pymongo.ASCENDING) + print(collection.index_information()) for article in articles: cursor = collection.find({"article_id":article["article_id"]}) if cursor.count() == 0: @@ -219,6 +222,17 @@ class Articles(object): collection = self.db[str(feed_id)] collection.update({"type": 0, "feed_id":feed_id}, {"$set": changes}, multi=True) + def full_search(self, term): + """ + Indexed full text search through content of articles. + """ + articles = {} + for collection in self.get_all_feeds(): + result = self.db[collection["feed_id"]].find({'article_content': {'$regex': term, "$options": 'i' }}) + if result.count() != 0: + articles[collection["feed_id"]] = result.sort([("article_date", pymongo.DESCENDING)]) + return articles + # Functions on database def drop_database(self): """ diff --git a/source/pyAggr3g470r.py b/source/pyAggr3g470r.py index 479d8562..6bc45bfd 100755 --- a/source/pyAggr3g470r.py +++ b/source/pyAggr3g470r.py @@ -162,14 +162,12 @@ class pyAggr3g470r(object): in the description of the article. """ param, _, value = query.partition(':') - wordre = re.compile(r'\b%s\b' % param, re.I) feed_id = None if param == "Feed": feed_id, _, query = value.partition(':') - feeds = self.mongo.get_all_feeds() + search_result = self.mongo.full_search(param) tmpl = lookup.get_template("search.html") - return tmpl.render(feeds=feeds, feed_id=feed_id, query=query, \ - wordre=wordre, mongo=self.mongo) + return tmpl.render(search_result=search_result, query=query, feed_id=feed_id, mongo=self.mongo) search.exposed = True diff --git a/source/templates/search.html b/source/templates/search.html index 01043f0e..30ec77c1 100644 --- a/source/templates/search.html +++ b/source/templates/search.html @@ -9,44 +9,43 @@ import utils
<% html = "" + feed_id = None %> -%if feed_id is None: - %for feed in feeds: - <% - new_feed_section = True - for article in mongo.get_articles(feed["feed_id"]): - article_content = utils.clear_string(article["article_content"]) - if not article_content: - article_content = utils.clear_string(article["article_title"]) - if wordre.findall(article_content) != []: - if new_feed_section is True: - new_feed_section = False - html += """

%s

\n""" % \ - (feed["feed_id"], feed["feed_title"], feed["feed_link"], feed["feed_image"]) - if article["article_readed"] == False: - # not readed articles are in bold - not_read_begin, not_read_end = "", "" - else: - not_read_begin, not_read_end = "", "" +%for feed_id in search_result.keys(): + <% + new_feed_section = True + feed = mongo.get_feed(feed_id) + print(search_result[feed["feed_id"]].count()) + for article in search_result[feed["feed_id"]]: + article_content = utils.clear_string(article["article_content"]) + if new_feed_section is True: + new_feed_section = False + html += """

%s

\n""" % \ + (feed["feed_id"], feed["feed_title"], feed["feed_link"], feed["feed_image"]) - # display a heart for faved articles - if article["article_like"] == True: - like = """ """ - else: - like = "" + if article["article_readed"] == False: + # not readed articles are in bold + not_read_begin, not_read_end = "", "" + else: + not_read_begin, not_read_end = "", "" - # descrition for the CSS ToolTips - description = " ".join(article_content[:500].split(' ')[:-1]) + # display a heart for faved articles + if article["article_like"] == True: + like = """ """ + else: + like = "" - # a description line per article (date, title of the article and - # CSS description tooltips on mouse over) - html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \ - """%s%s%s%s""" % \ - (feed["feed_id"], article["article_id"], not_read_begin, \ - article["article_title"][:150], not_read_end, description) + like + "
\n" - %> - %endfor -%endif - ${html} - \ No newline at end of file + # descrition for the CSS ToolTips + description = " ".join(article_content[:500].split(' ')[:-1]) + + # a description line per article (date, title of the article and + # CSS description tooltips on mouse over) + html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \ + """%s%s%s%s""" % \ + (feed["feed_id"], article["article_id"], not_read_begin, \ + article["article_title"][:150], not_read_end, description) + like + "
\n" + %> +%endfor + +${html} \ No newline at end of file -- cgit