diff options
Diffstat (limited to 'source/search.py')
-rw-r--r-- | source/search.py | 129 |
1 files changed, 0 insertions, 129 deletions
diff --git a/source/search.py b/source/search.py deleted file mode 100644 index a9248a09..00000000 --- a/source/search.py +++ /dev/null @@ -1,129 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -# pyAggr3g470r - A Web based news aggregator. -# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/ -# -# For more information : https://bitbucket.org/cedricbonhomme/pyaggr3g470r/ -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/> - -__author__ = "Cedric Bonhomme" -__version__ = "$Revision: 0.2 $" -__date__ = "$Date: 2013/06/24 $" -__revision__ = "$Date: 2013/06/25 $" -__copyright__ = "Copyright (c) Cedric Bonhomme" -__license__ = "GPLv3" - -import os - -from whoosh.index import create_in, open_dir -from whoosh.index import EmptyIndexError -from whoosh.fields import * -from whoosh.query import * -from whoosh.qparser import QueryParser -from whoosh.writing import AsyncWriter - -import conf -import utils -import mongodb - -indexdir = "./var/indexdir" - -schema = Schema(title=TEXT(stored=True), \ - content=TEXT, \ - article_id=TEXT(stored=True), \ - feed_id=TEXT(stored=True)) - -def create_index(): - """ - Creates the index. - """ - mongo = mongodb.Articles(conf.MONGODB_ADDRESS, conf.MONGODB_PORT, \ - conf.MONGODB_DBNAME, conf.MONGODB_USER, conf.MONGODB_PASSWORD) - feeds = mongo.get_all_feeds() - if not os.path.exists(indexdir): - os.mkdir(indexdir) - ix = create_in(indexdir, schema) - writer = ix.writer() - for feed in feeds: - for article in mongo.get_articles(feed["feed_id"]): - writer.add_document(title=article["article_title"], \ - content=utils.clear_string(article["article_content"]), \ - article_id=article["article_id"] , \ - feed_id=feed["feed_id"]) - writer.commit() - -def add_to_index(articles, feed): - """ - Add a list of articles to the index. - Here an AsyncWriter is used because the function will - be called in multiple threads by the feedgetter module. - """ - try: - ix = open_dir(indexdir) - except (EmptyIndexError, OSError) as e: - raise EmptyIndexError - writer = AsyncWriter(ix) - for article in articles: - writer.add_document(title=article["article_title"], \ - content=utils.clear_string(article["article_content"]), \ - article_id=article["article_id"] , \ - feed_id=feed["feed_id"]) - writer.commit() - -def delete_article(feed_id, article_id): - """ - Delete an article from the index. - """ - try: - ix = open_dir(indexdir) - except (EmptyIndexError, OSError) as e: - raise EmptyIndexError - writer = ix.writer() - document = And([Term("feed_id", feed_id), Term("article_id", article_id)]) - writer.delete_by_query(document) - writer.commit() - -def search(term): - """ - Search for `term` in the index. - Returns a list of articles. - """ - try: - ix = open_dir(indexdir) - except (EmptyIndexError, OSError) as e: - raise EmptyIndexError - with ix.searcher() as searcher: - query = QueryParser("content", ix.schema).parse(term) - results = searcher.search(query, limit=None) - return [(article["feed_id"], article["article_id"]) for article in results] - -def nb_documents(): - """ - Return the number of undeleted documents. - """ - try: - ix = open_dir(indexdir) - except (EmptyIndexError, OSError) as e: - raise EmptyIndexError - return ix.doc_count() - -if __name__ == "__main__": - # Point of entry in execution mode. - #create_index() - print(nb_documents()) - results = search("Nothomb") - for article in results: - print(article) |