From a6bc5bf8d7d003b6cf4e623485b330e1e2830703 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Mon, 3 Feb 2014 19:38:50 +0100 Subject: Added naive benchmarks for Whoosh and ElasticSearch. --- benchmark/testwhoosh.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 benchmark/testwhoosh.py (limited to 'benchmark/testwhoosh.py') diff --git a/benchmark/testwhoosh.py b/benchmark/testwhoosh.py new file mode 100644 index 00000000..b488dcd6 --- /dev/null +++ b/benchmark/testwhoosh.py @@ -0,0 +1,50 @@ +#! /usr/bin/env python +#-*- coding: utf-8 -*- + + +import os + +from whoosh.index import create_in, open_dir +from whoosh.index import EmptyIndexError +from whoosh.fields import * +from whoosh.query import * +from whoosh.qparser import QueryParser + +from pyaggr3g470r import utils + +indexdir = "./pyaggr3g470r/var/indexdir" + +schema = Schema(title=TEXT(stored=True), content=TEXT) + +def create_index(articles): + """ + Creates the index. + """ + ix = create_in(indexdir, schema) + writer = ix.writer() + for article in articles: + writer.add_document(content=utils.clear_string(article.content)) + writer.commit() + +def search(term): + """ + Search for `term` in the index. + Returns a list of articles. + """ + try: + ix = open_dir(indexdir) + except (EmptyIndexError, OSError) as e: + raise EmptyIndexError + with ix.searcher() as searcher: + query = QueryParser("content", ix.schema).parse(term) + results = searcher.search(query, limit=None) + #return [(article["feed_id"], article["article_id"]) for article in results] + + +if __name__ == "__main__": + # Point of entry in execution mode. + #create_index() + print(nb_documents()) + results = search("Nothomb") + for article in results: + print(article) -- cgit