From df1a68e06275bbc0b1dd2a45a02d05842884551a Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Mon, 24 Jun 2013 15:05:19 +0200 Subject: A test of the whoosh library --- source/search.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 source/search.py (limited to 'source') diff --git a/source/search.py b/source/search.py new file mode 100644 index 00000000..a870cb23 --- /dev/null +++ b/source/search.py @@ -0,0 +1,62 @@ +#! /usr/bin/env python +#-*- coding: utf-8 -*- + +# pyAggr3g470r - A Web based news aggregator. +# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/ +# +# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see + +__author__ = "Cedric Bonhomme" +__version__ = "$Revision: 0.1 $" +__date__ = "$Date: 2010/06/24 $" +__revision__ = "$Date: 2013/06/24 $" +__copyright__ = "Copyright (c) Cedric Bonhomme" +__license__ = "GPLv3" + +from whoosh.index import create_in +from whoosh.fields import * +from whoosh.qparser import QueryParser + +import conf +import utils +import mongodb + +schema = Schema(title=TEXT(stored=True), \ + content=TEXT, \ + article_id=TEXT, \ + feed_id=TEXT) + +def create_index(): + """ + """ + self.mongo = mongodb.Articles(conf.MONGODB_ADDRESS, conf.MONGODB_PORT, \ + conf.MONGODB_DBNAME, conf.MONGODB_USER, conf.MONGODB_PASSWORD) + feeds = self.mongo.get_all_feeds() + ix = create_in("indexdir", schema) + writer = ix.writer() + for article in mongo.get_articles(feed["feed_id"], limit=10) + writer.add_document(title=article["article_title"], \ + content=utils.clear_string(article["article_content"])) + writer.commit() + + +def search(index, term): + """ + """ + with ix.searcher() as searcher: + query = QueryParser("content", ix.schema).parse(term) + results = searcher.search(query) + results[0] \ No newline at end of file -- cgit