aboutsummaryrefslogtreecommitdiff
path: root/benchmark/testwhoosh.py
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2014-02-03 19:38:50 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2014-02-03 19:38:50 +0100
commita6bc5bf8d7d003b6cf4e623485b330e1e2830703 (patch)
tree0495b71bcb0ec6e9b72dfa954d11c7ed857aee7a /benchmark/testwhoosh.py
parentRemoved import of 'conf' module. (diff)
downloadnewspipe-a6bc5bf8d7d003b6cf4e623485b330e1e2830703.tar.gz
newspipe-a6bc5bf8d7d003b6cf4e623485b330e1e2830703.tar.bz2
newspipe-a6bc5bf8d7d003b6cf4e623485b330e1e2830703.zip
Added naive benchmarks for Whoosh and ElasticSearch.
Diffstat (limited to 'benchmark/testwhoosh.py')
-rw-r--r--benchmark/testwhoosh.py50
1 files changed, 50 insertions, 0 deletions
diff --git a/benchmark/testwhoosh.py b/benchmark/testwhoosh.py
new file mode 100644
index 00000000..b488dcd6
--- /dev/null
+++ b/benchmark/testwhoosh.py
@@ -0,0 +1,50 @@
+#! /usr/bin/env python
+#-*- coding: utf-8 -*-
+
+
+import os
+
+from whoosh.index import create_in, open_dir
+from whoosh.index import EmptyIndexError
+from whoosh.fields import *
+from whoosh.query import *
+from whoosh.qparser import QueryParser
+
+from pyaggr3g470r import utils
+
+indexdir = "./pyaggr3g470r/var/indexdir"
+
+schema = Schema(title=TEXT(stored=True), content=TEXT)
+
+def create_index(articles):
+ """
+ Creates the index.
+ """
+ ix = create_in(indexdir, schema)
+ writer = ix.writer()
+ for article in articles:
+ writer.add_document(content=utils.clear_string(article.content))
+ writer.commit()
+
+def search(term):
+ """
+ Search for `term` in the index.
+ Returns a list of articles.
+ """
+ try:
+ ix = open_dir(indexdir)
+ except (EmptyIndexError, OSError) as e:
+ raise EmptyIndexError
+ with ix.searcher() as searcher:
+ query = QueryParser("content", ix.schema).parse(term)
+ results = searcher.search(query, limit=None)
+ #return [(article["feed_id"], article["article_id"]) for article in results]
+
+
+if __name__ == "__main__":
+ # Point of entry in execution mode.
+ #create_index()
+ print(nb_documents())
+ results = search("Nothomb")
+ for article in results:
+ print(article)
bgstack15