aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <kimble.mandel@gmail.com>2013-01-11 18:35:50 +0100
committerCédric Bonhomme <kimble.mandel@gmail.com>2013-01-11 18:35:50 +0100
commit216315958b8af26150562eee634bf4774904c0a6 (patch)
treeee4799e8f4a84a5c874aa4cd9d30e2c2536ea65d
parentWe'll try a simple index searching algorithm. (diff)
downloadnewspipe-216315958b8af26150562eee634bf4774904c0a6.tar.gz
newspipe-216315958b8af26150562eee634bf4774904c0a6.tar.bz2
newspipe-216315958b8af26150562eee634bf4774904c0a6.zip
Search function now using MongoDB indexed fulltext searching.
-rw-r--r--source/mongodb.py16
-rwxr-xr-xsource/pyAggr3g470r.py6
-rw-r--r--source/templates/search.html71
3 files changed, 52 insertions, 41 deletions
diff --git a/source/mongodb.py b/source/mongodb.py
index 846a7a82..e91fb90b 100644
--- a/source/mongodb.py
+++ b/source/mongodb.py
@@ -47,7 +47,8 @@ class Articles(object):
Creates a new collection for a new feed.
"""
collection = self.db[new_collection["feed_id"]]
- #collection.create_index([("feed_link", pymongo.ASCENDING)], {"unique":True, "sparse":True})
+ collection.create_index([("article_date", pymongo.DESCENDING)], {"unique":False, "sparse":False})
+ collection.ensure_index('article_content', pymongo.ASCENDING)
collection.insert(new_collection)
def add_articles(self, articles, feed_id):
@@ -56,6 +57,8 @@ class Articles(object):
"""
collection = self.db[str(feed_id)]
#collection.create_index([("article_date", pymongo.DESCENDING)], {"unique":False, "sparse":False})
+ #collection.ensure_index('article_content', pymongo.ASCENDING)
+ print(collection.index_information())
for article in articles:
cursor = collection.find({"article_id":article["article_id"]})
if cursor.count() == 0:
@@ -219,6 +222,17 @@ class Articles(object):
collection = self.db[str(feed_id)]
collection.update({"type": 0, "feed_id":feed_id}, {"$set": changes}, multi=True)
+ def full_search(self, term):
+ """
+ Indexed full text search through content of articles.
+ """
+ articles = {}
+ for collection in self.get_all_feeds():
+ result = self.db[collection["feed_id"]].find({'article_content': {'$regex': term, "$options": 'i' }})
+ if result.count() != 0:
+ articles[collection["feed_id"]] = result.sort([("article_date", pymongo.DESCENDING)])
+ return articles
+
# Functions on database
def drop_database(self):
"""
diff --git a/source/pyAggr3g470r.py b/source/pyAggr3g470r.py
index 479d8562..6bc45bfd 100755
--- a/source/pyAggr3g470r.py
+++ b/source/pyAggr3g470r.py
@@ -162,14 +162,12 @@ class pyAggr3g470r(object):
in the description of the article.
"""
param, _, value = query.partition(':')
- wordre = re.compile(r'\b%s\b' % param, re.I)
feed_id = None
if param == "Feed":
feed_id, _, query = value.partition(':')
- feeds = self.mongo.get_all_feeds()
+ search_result = self.mongo.full_search(param)
tmpl = lookup.get_template("search.html")
- return tmpl.render(feeds=feeds, feed_id=feed_id, query=query, \
- wordre=wordre, mongo=self.mongo)
+ return tmpl.render(search_result=search_result, query=query, feed_id=feed_id, mongo=self.mongo)
search.exposed = True
diff --git a/source/templates/search.html b/source/templates/search.html
index 01043f0e..30ec77c1 100644
--- a/source/templates/search.html
+++ b/source/templates/search.html
@@ -9,44 +9,43 @@ import utils
<br />
<%
html = ""
+ feed_id = None
%>
-%if feed_id is None:
- %for feed in feeds:
- <%
- new_feed_section = True
- for article in mongo.get_articles(feed["feed_id"]):
- article_content = utils.clear_string(article["article_content"])
- if not article_content:
- article_content = utils.clear_string(article["article_title"])
- if wordre.findall(article_content) != []:
- if new_feed_section is True:
- new_feed_section = False
- html += """<h2><a href="/articles/%s" rel="noreferrer" target="_blank">%s</a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \
- (feed["feed_id"], feed["feed_title"], feed["feed_link"], feed["feed_image"])
- if article["article_readed"] == False:
- # not readed articles are in bold
- not_read_begin, not_read_end = "<b>", "</b>"
- else:
- not_read_begin, not_read_end = "", ""
+%for feed_id in search_result.keys():
+ <%
+ new_feed_section = True
+ feed = mongo.get_feed(feed_id)
+ print(search_result[feed["feed_id"]].count())
+ for article in search_result[feed["feed_id"]]:
+ article_content = utils.clear_string(article["article_content"])
+ if new_feed_section is True:
+ new_feed_section = False
+ html += """<h2><a href="/articles/%s" rel="noreferrer" target="_blank">%s</a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \
+ (feed["feed_id"], feed["feed_title"], feed["feed_link"], feed["feed_image"])
- # display a heart for faved articles
- if article["article_like"] == True:
- like = """ <img src="/img/heart.png" title="I like this article!" />"""
- else:
- like = ""
+ if article["article_readed"] == False:
+ # not readed articles are in bold
+ not_read_begin, not_read_end = "<b>", "</b>"
+ else:
+ not_read_begin, not_read_end = "", ""
- # descrition for the CSS ToolTips
- description = " ".join(article_content[:500].split(' ')[:-1])
+ # display a heart for faved articles
+ if article["article_like"] == True:
+ like = """ <img src="/img/heart.png" title="I like this article!" />"""
+ else:
+ like = ""
- # a description line per article (date, title of the article and
- # CSS description tooltips on mouse over)
- html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \
- """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \
- (feed["feed_id"], article["article_id"], not_read_begin, \
- article["article_title"][:150], not_read_end, description) + like + "<br />\n"
- %>
- %endfor
-%endif
- ${html}
- \ No newline at end of file
+ # descrition for the CSS ToolTips
+ description = " ".join(article_content[:500].split(' ')[:-1])
+
+ # a description line per article (date, title of the article and
+ # CSS description tooltips on mouse over)
+ html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \
+ """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \
+ (feed["feed_id"], article["article_id"], not_read_begin, \
+ article["article_title"][:150], not_read_end, description) + like + "<br />\n"
+ %>
+%endfor
+
+${html} \ No newline at end of file
bgstack15