aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2014-02-03 19:38:50 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2014-02-03 19:38:50 +0100
commita6bc5bf8d7d003b6cf4e623485b330e1e2830703 (patch)
tree0495b71bcb0ec6e9b72dfa954d11c7ed857aee7a
parentRemoved import of 'conf' module. (diff)
downloadnewspipe-a6bc5bf8d7d003b6cf4e623485b330e1e2830703.tar.gz
newspipe-a6bc5bf8d7d003b6cf4e623485b330e1e2830703.tar.bz2
newspipe-a6bc5bf8d7d003b6cf4e623485b330e1e2830703.zip
Added naive benchmarks for Whoosh and ElasticSearch.
-rw-r--r--benchmark/__init__.py1
-rw-r--r--benchmark/testelasticsearch.py58
-rw-r--r--benchmark/testwhoosh.py50
-rw-r--r--runbenchmark.py73
4 files changed, 182 insertions, 0 deletions
diff --git a/benchmark/__init__.py b/benchmark/__init__.py
new file mode 100644
index 00000000..8d1c8b69
--- /dev/null
+++ b/benchmark/__init__.py
@@ -0,0 +1 @@
+
diff --git a/benchmark/testelasticsearch.py b/benchmark/testelasticsearch.py
new file mode 100644
index 00000000..7a5d1d5b
--- /dev/null
+++ b/benchmark/testelasticsearch.py
@@ -0,0 +1,58 @@
+#! /usr/bin/env python
+#-*- coding: utf-8 -*-
+
+import elasticsearch
+from elasticsearch import client
+
+from pyaggr3g470r import utils
+
+# Connect to Elasticsearch node specified in the configuration file:
+es = elasticsearch.Elasticsearch(hosts={"127.0.0.1" : 9200})
+
+def delete_index():
+ """
+ Deletes all indexes.
+ """
+ es = elasticsearch.Elasticsearch(hosts={"127.0.0.1" : 9200})
+ ic = client.IndicesClient(es.indices.client)
+ try:
+ ic.delete("")
+ except:
+ pass
+
+def create_index(articles):
+ """
+ Creates the index.
+ """
+ for article in articles:
+ res = es.index(
+ index="pyaggr3g470r",
+ doc_type="text",
+ id=str(article.id),
+ body={
+ "title": article.title,
+ "content": utils.clear_string(article.content)
+ }
+ )
+ return True
+
+def search(term):
+ """
+ Search a term.
+ """
+ try:
+ return es.search(index="pyaggr3g470r", body=
+ {"query" : {
+ "filtered" : {
+ "query" : {
+ "query_string" : {
+ "default_field" : "content",
+ "query" : term
+ }
+ }
+ }
+ }
+ }, size=5000)
+ except elasticsearch.exceptions.NotFoundError as e:
+ logger.warning(str(e))
+ return None \ No newline at end of file
diff --git a/benchmark/testwhoosh.py b/benchmark/testwhoosh.py
new file mode 100644
index 00000000..b488dcd6
--- /dev/null
+++ b/benchmark/testwhoosh.py
@@ -0,0 +1,50 @@
+#! /usr/bin/env python
+#-*- coding: utf-8 -*-
+
+
+import os
+
+from whoosh.index import create_in, open_dir
+from whoosh.index import EmptyIndexError
+from whoosh.fields import *
+from whoosh.query import *
+from whoosh.qparser import QueryParser
+
+from pyaggr3g470r import utils
+
+indexdir = "./pyaggr3g470r/var/indexdir"
+
+schema = Schema(title=TEXT(stored=True), content=TEXT)
+
+def create_index(articles):
+ """
+ Creates the index.
+ """
+ ix = create_in(indexdir, schema)
+ writer = ix.writer()
+ for article in articles:
+ writer.add_document(content=utils.clear_string(article.content))
+ writer.commit()
+
+def search(term):
+ """
+ Search for `term` in the index.
+ Returns a list of articles.
+ """
+ try:
+ ix = open_dir(indexdir)
+ except (EmptyIndexError, OSError) as e:
+ raise EmptyIndexError
+ with ix.searcher() as searcher:
+ query = QueryParser("content", ix.schema).parse(term)
+ results = searcher.search(query, limit=None)
+ #return [(article["feed_id"], article["article_id"]) for article in results]
+
+
+if __name__ == "__main__":
+ # Point of entry in execution mode.
+ #create_index()
+ print(nb_documents())
+ results = search("Nothomb")
+ for article in results:
+ print(article)
diff --git a/runbenchmark.py b/runbenchmark.py
new file mode 100644
index 00000000..8241e52b
--- /dev/null
+++ b/runbenchmark.py
@@ -0,0 +1,73 @@
+#! /usr/bin/env python
+#-*- coding: utf-8 -*-
+
+
+import time
+from benchmark import testelasticsearch
+from benchmark import testwhoosh
+
+import conf
+from pyaggr3g470r import models
+models.connect(conf.DATABASE_NAME)
+
+articles = models.Article.objects()
+
+
+
+#
+# Index generation
+#
+
+print "Indexes generation..."
+# Whoosh
+print "Whoosh"
+begin = time.time()
+testwhoosh.create_index(articles)
+end = time.time()
+print end - begin
+
+print
+
+# ElasticSearch
+print "ElasticSearch"
+testelasticsearch.delete_index()
+begin = time.time()
+testelasticsearch.create_index(articles)
+end = time.time()
+print end - begin
+
+
+
+print
+print
+print
+
+
+
+#
+# Search
+#
+print "Search..."
+for query in ["Edward Snowden", "Saint-Pierre-et-Miquelon", "micropatronage"]:
+ print "Query:", query
+
+ # Whoosh
+ print "with Whoosh"
+ for _ in range(5):
+ begin = time.time()
+ testwhoosh.search(query)
+ end = time.time()
+ print end - begin
+
+ print
+
+ # ElasticSearch
+ print "with ElasticSearch"
+ for _ in range(5):
+ begin = time.time()
+ testelasticsearch.search(query)
+ end = time.time()
+ print end - begin
+
+ print
+ print \ No newline at end of file
bgstack15