diff options
-rw-r--r-- | benchmark/__init__.py | 1 | ||||
-rw-r--r-- | benchmark/result1.txt | 56 | ||||
-rw-r--r-- | benchmark/testelasticsearch.py | 57 | ||||
-rw-r--r-- | benchmark/testwhoosh.py | 50 | ||||
-rw-r--r-- | runbenchmark.py | 73 |
5 files changed, 0 insertions, 237 deletions
diff --git a/benchmark/__init__.py b/benchmark/__init__.py deleted file mode 100644 index 8d1c8b69..00000000 --- a/benchmark/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/benchmark/result1.txt b/benchmark/result1.txt deleted file mode 100644 index 69e022c0..00000000 --- a/benchmark/result1.txt +++ /dev/null @@ -1,56 +0,0 @@ -Indexes generation... -Whoosh -197.024040937 - -ElasticSearch -56.0846140385 - - - -Search... -Query: Edward Snowden -with Whoosh -0.0134670734406 -0.00796294212341 -0.00795412063599 -0.00811314582825 -0.00810718536377 - -with ElasticSearch -0.0482449531555 -0.0591111183167 -0.0442450046539 -0.0511319637299 -0.0456249713898 - - -Query: Saint-Pierre-et-Miquelon -with Whoosh -0.00863313674927 -0.00820684432983 -0.00815606117249 -0.00763082504272 -0.00754308700562 - -with ElasticSearch -0.308684110641 -0.297341108322 -0.336125135422 -0.32342004776 -0.328269958496 - - -Query: micropatronage -with Whoosh -0.00235199928284 -0.00130891799927 -0.0014488697052 -0.00166296958923 -0.00130915641785 - -with ElasticSearch -0.00194191932678 -0.0019211769104 -0.0011739730835 -0.00115418434143 -0.00128579139709 diff --git a/benchmark/testelasticsearch.py b/benchmark/testelasticsearch.py deleted file mode 100644 index accb8792..00000000 --- a/benchmark/testelasticsearch.py +++ /dev/null @@ -1,57 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -import elasticsearch -from elasticsearch import client - -from pyaggr3g470r import utils - -# Connect to Elasticsearch node specified in the configuration file: -es = elasticsearch.Elasticsearch(hosts={"127.0.0.1" : 9200}) - -def delete_index(): - """ - Deletes all indexes. - """ - es = elasticsearch.Elasticsearch(hosts={"127.0.0.1" : 9200}) - ic = client.IndicesClient(es.indices.client) - try: - ic.delete("") - except: - pass - -def create_index(articles): - """ - Creates the index. - """ - for article in articles: - res = es.index( - index="pyaggr3g470r", - doc_type="text", - id=str(article.id), - body={ - "title": article.title, - "content": utils.clear_string(article.content) - } - ) - return True - -def search(term): - """ - Search a term. - """ - try: - es.search(index="pyaggr3g470r", body= - {"query" : { - "filtered" : { - "query" : { - "query_string" : { - "default_field" : "content", - "query" : term - } - } - } - } - }, size=5000) - except elasticsearch.exceptions.NotFoundError as e: - pass
\ No newline at end of file diff --git a/benchmark/testwhoosh.py b/benchmark/testwhoosh.py deleted file mode 100644 index b488dcd6..00000000 --- a/benchmark/testwhoosh.py +++ /dev/null @@ -1,50 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - - -import os - -from whoosh.index import create_in, open_dir -from whoosh.index import EmptyIndexError -from whoosh.fields import * -from whoosh.query import * -from whoosh.qparser import QueryParser - -from pyaggr3g470r import utils - -indexdir = "./pyaggr3g470r/var/indexdir" - -schema = Schema(title=TEXT(stored=True), content=TEXT) - -def create_index(articles): - """ - Creates the index. - """ - ix = create_in(indexdir, schema) - writer = ix.writer() - for article in articles: - writer.add_document(content=utils.clear_string(article.content)) - writer.commit() - -def search(term): - """ - Search for `term` in the index. - Returns a list of articles. - """ - try: - ix = open_dir(indexdir) - except (EmptyIndexError, OSError) as e: - raise EmptyIndexError - with ix.searcher() as searcher: - query = QueryParser("content", ix.schema).parse(term) - results = searcher.search(query, limit=None) - #return [(article["feed_id"], article["article_id"]) for article in results] - - -if __name__ == "__main__": - # Point of entry in execution mode. - #create_index() - print(nb_documents()) - results = search("Nothomb") - for article in results: - print(article) diff --git a/runbenchmark.py b/runbenchmark.py deleted file mode 100644 index 8241e52b..00000000 --- a/runbenchmark.py +++ /dev/null @@ -1,73 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - - -import time -from benchmark import testelasticsearch -from benchmark import testwhoosh - -import conf -from pyaggr3g470r import models -models.connect(conf.DATABASE_NAME) - -articles = models.Article.objects() - - - -# -# Index generation -# - -print "Indexes generation..." -# Whoosh -print "Whoosh" -begin = time.time() -testwhoosh.create_index(articles) -end = time.time() -print end - begin - -print - -# ElasticSearch -print "ElasticSearch" -testelasticsearch.delete_index() -begin = time.time() -testelasticsearch.create_index(articles) -end = time.time() -print end - begin - - - -print -print -print - - - -# -# Search -# -print "Search..." -for query in ["Edward Snowden", "Saint-Pierre-et-Miquelon", "micropatronage"]: - print "Query:", query - - # Whoosh - print "with Whoosh" - for _ in range(5): - begin = time.time() - testwhoosh.search(query) - end = time.time() - print end - begin - - print - - # ElasticSearch - print "with ElasticSearch" - for _ in range(5): - begin = time.time() - testelasticsearch.search(query) - end = time.time() - print end - begin - - print - print
\ No newline at end of file |