diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2014-02-03 19:38:50 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2014-02-03 19:38:50 +0100 |
commit | a6bc5bf8d7d003b6cf4e623485b330e1e2830703 (patch) | |
tree | 0495b71bcb0ec6e9b72dfa954d11c7ed857aee7a /benchmark/testwhoosh.py | |
parent | Removed import of 'conf' module. (diff) | |
download | newspipe-a6bc5bf8d7d003b6cf4e623485b330e1e2830703.tar.gz newspipe-a6bc5bf8d7d003b6cf4e623485b330e1e2830703.tar.bz2 newspipe-a6bc5bf8d7d003b6cf4e623485b330e1e2830703.zip |
Added naive benchmarks for Whoosh and ElasticSearch.
Diffstat (limited to 'benchmark/testwhoosh.py')
-rw-r--r-- | benchmark/testwhoosh.py | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/benchmark/testwhoosh.py b/benchmark/testwhoosh.py new file mode 100644 index 00000000..b488dcd6 --- /dev/null +++ b/benchmark/testwhoosh.py @@ -0,0 +1,50 @@ +#! /usr/bin/env python +#-*- coding: utf-8 -*- + + +import os + +from whoosh.index import create_in, open_dir +from whoosh.index import EmptyIndexError +from whoosh.fields import * +from whoosh.query import * +from whoosh.qparser import QueryParser + +from pyaggr3g470r import utils + +indexdir = "./pyaggr3g470r/var/indexdir" + +schema = Schema(title=TEXT(stored=True), content=TEXT) + +def create_index(articles): + """ + Creates the index. + """ + ix = create_in(indexdir, schema) + writer = ix.writer() + for article in articles: + writer.add_document(content=utils.clear_string(article.content)) + writer.commit() + +def search(term): + """ + Search for `term` in the index. + Returns a list of articles. + """ + try: + ix = open_dir(indexdir) + except (EmptyIndexError, OSError) as e: + raise EmptyIndexError + with ix.searcher() as searcher: + query = QueryParser("content", ix.schema).parse(term) + results = searcher.search(query, limit=None) + #return [(article["feed_id"], article["article_id"]) for article in results] + + +if __name__ == "__main__": + # Point of entry in execution mode. + #create_index() + print(nb_documents()) + results = search("Nothomb") + for article in results: + print(article) |