From ae67e23af7993199391b962eca2cbecfe9f8a8d5 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Sat, 12 Apr 2014 14:27:10 +0200 Subject: Removed benchmark scripts. --- benchmark/__init__.py | 1 - benchmark/result1.txt | 56 ----------------------------------------- benchmark/testelasticsearch.py | 57 ------------------------------------------ benchmark/testwhoosh.py | 50 ------------------------------------ 4 files changed, 164 deletions(-) delete mode 100644 benchmark/__init__.py delete mode 100644 benchmark/result1.txt delete mode 100644 benchmark/testelasticsearch.py delete mode 100644 benchmark/testwhoosh.py (limited to 'benchmark') diff --git a/benchmark/__init__.py b/benchmark/__init__.py deleted file mode 100644 index 8d1c8b69..00000000 --- a/benchmark/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/benchmark/result1.txt b/benchmark/result1.txt deleted file mode 100644 index 69e022c0..00000000 --- a/benchmark/result1.txt +++ /dev/null @@ -1,56 +0,0 @@ -Indexes generation... -Whoosh -197.024040937 - -ElasticSearch -56.0846140385 - - - -Search... -Query: Edward Snowden -with Whoosh -0.0134670734406 -0.00796294212341 -0.00795412063599 -0.00811314582825 -0.00810718536377 - -with ElasticSearch -0.0482449531555 -0.0591111183167 -0.0442450046539 -0.0511319637299 -0.0456249713898 - - -Query: Saint-Pierre-et-Miquelon -with Whoosh -0.00863313674927 -0.00820684432983 -0.00815606117249 -0.00763082504272 -0.00754308700562 - -with ElasticSearch -0.308684110641 -0.297341108322 -0.336125135422 -0.32342004776 -0.328269958496 - - -Query: micropatronage -with Whoosh -0.00235199928284 -0.00130891799927 -0.0014488697052 -0.00166296958923 -0.00130915641785 - -with ElasticSearch -0.00194191932678 -0.0019211769104 -0.0011739730835 -0.00115418434143 -0.00128579139709 diff --git a/benchmark/testelasticsearch.py b/benchmark/testelasticsearch.py deleted file mode 100644 index accb8792..00000000 --- a/benchmark/testelasticsearch.py +++ /dev/null @@ -1,57 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -import elasticsearch -from elasticsearch import client - -from pyaggr3g470r import utils - -# Connect to Elasticsearch node specified in the configuration file: -es = elasticsearch.Elasticsearch(hosts={"127.0.0.1" : 9200}) - -def delete_index(): - """ - Deletes all indexes. - """ - es = elasticsearch.Elasticsearch(hosts={"127.0.0.1" : 9200}) - ic = client.IndicesClient(es.indices.client) - try: - ic.delete("") - except: - pass - -def create_index(articles): - """ - Creates the index. - """ - for article in articles: - res = es.index( - index="pyaggr3g470r", - doc_type="text", - id=str(article.id), - body={ - "title": article.title, - "content": utils.clear_string(article.content) - } - ) - return True - -def search(term): - """ - Search a term. - """ - try: - es.search(index="pyaggr3g470r", body= - {"query" : { - "filtered" : { - "query" : { - "query_string" : { - "default_field" : "content", - "query" : term - } - } - } - } - }, size=5000) - except elasticsearch.exceptions.NotFoundError as e: - pass \ No newline at end of file diff --git a/benchmark/testwhoosh.py b/benchmark/testwhoosh.py deleted file mode 100644 index b488dcd6..00000000 --- a/benchmark/testwhoosh.py +++ /dev/null @@ -1,50 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - - -import os - -from whoosh.index import create_in, open_dir -from whoosh.index import EmptyIndexError -from whoosh.fields import * -from whoosh.query import * -from whoosh.qparser import QueryParser - -from pyaggr3g470r import utils - -indexdir = "./pyaggr3g470r/var/indexdir" - -schema = Schema(title=TEXT(stored=True), content=TEXT) - -def create_index(articles): - """ - Creates the index. - """ - ix = create_in(indexdir, schema) - writer = ix.writer() - for article in articles: - writer.add_document(content=utils.clear_string(article.content)) - writer.commit() - -def search(term): - """ - Search for `term` in the index. - Returns a list of articles. - """ - try: - ix = open_dir(indexdir) - except (EmptyIndexError, OSError) as e: - raise EmptyIndexError - with ix.searcher() as searcher: - query = QueryParser("content", ix.schema).parse(term) - results = searcher.search(query, limit=None) - #return [(article["feed_id"], article["article_id"]) for article in results] - - -if __name__ == "__main__": - # Point of entry in execution mode. - #create_index() - print(nb_documents()) - results = search("Nothomb") - for article in results: - print(article) -- cgit