diff options
Diffstat (limited to 'pyaggr3g470r/search.py')
-rw-r--r-- | pyaggr3g470r/search.py | 148 |
1 files changed, 0 insertions, 148 deletions
diff --git a/pyaggr3g470r/search.py b/pyaggr3g470r/search.py deleted file mode 100644 index a7f780df..00000000 --- a/pyaggr3g470r/search.py +++ /dev/null @@ -1,148 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -# pyAggr3g470r - A Web based news aggregator. -# Copyright (C) 2010-2015 Cédric Bonhomme - https://www.cedricbonhomme.org -# -# For more information : https://bitbucket.org/cedricbonhomme/pyaggr3g470r/ -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -__author__ = "Cedric Bonhomme" -__version__ = "$Revision: 0.3 $" -__date__ = "$Date: 2013/06/24 $" -__revision__ = "$Date: 2013/11/10 $" -__copyright__ = "Copyright (c) Cedric Bonhomme" -__license__ = "AGPLv3" - -import os -import logging - -from whoosh.index import create_in, open_dir -from whoosh.index import EmptyIndexError -from whoosh.fields import * -from whoosh.query import * -from whoosh.qparser import QueryParser -from whoosh.writing import AsyncWriter -from collections import defaultdict - -from pyaggr3g470r.models import User -from pyaggr3g470r.decorators import async -from pyaggr3g470r import utils - -logger = logging.getLogger(__name__) - -indexdir = "./pyaggr3g470r/var/indexdir" - -schema = Schema(title=TEXT, - content=TEXT, - article_id=NUMERIC(int, stored=True), - feed_id=NUMERIC(int, stored=True), - user_id=NUMERIC(int, stored=True)) - -@async -def create_index(user_id): - """ - Creates the index. - """ - user = User.query.filter(User.id == user_id).first() - logger.info("Starting indexation of the database.") - if not os.path.exists(indexdir): - os.makedirs(indexdir) - ix = create_in(indexdir, schema) - writer = ix.writer() - for feed in user.feeds: - logger.info("Indexing " + feed.title) - for article in feed.articles: - writer.add_document(title=article.title, - content=utils.clear_string(article.content), - article_id=article.id, - feed_id=feed.id, - user_id=user.id) - writer.commit() - logger.info("Indexation done.") - -def add_to_index(user_id, articles, feed): - """ - Add a list of articles to the index. - Here an AsyncWriter is used because the function will - be called in multiple threads by the feedgetter module. - """ - try: - ix = open_dir(indexdir) - except (EmptyIndexError, OSError): - if not os.path.exists(indexdir): - os.makedirs(indexdir) - ix = create_in(indexdir, schema) - writer = AsyncWriter(ix) - for article in articles: - writer.add_document(title=article.title, - content=utils.clear_string(article.content), - article_id=article.id, - feed_id=feed.id, - user_id=user_id) - writer.commit() - - -def delete_article(user_id, feed_id, article_id): - """ - Delete an article from the index. - """ - try: - ix = open_dir(indexdir) - except (EmptyIndexError, OSError): - return - writer = ix.writer() - document = And([Term("user_id", user_id), Term("feed_id", feed_id), - Term("article_id", article_id)]) - writer.delete_by_query(document) - writer.commit() - - -def search(user_id, term): - """ - Search for `term` in the index. - Returns a list of articles. - """ - result_dict = defaultdict(list) - try: - ix = open_dir(indexdir) - except (EmptyIndexError, OSError): - raise EmptyIndexError - with ix.searcher() as searcher: - query = QueryParser("content", ix.schema).parse(term) - user_doc = Term("user_id", user_id) - results = searcher.search(query, filter=user_doc, limit=None) - for article in results: - result_dict[article["feed_id"]].append(article["article_id"]) - return result_dict, len(results) - - -def nb_documents(): - """ - Return the number of undeleted documents. - """ - try: - ix = open_dir(indexdir) - except (EmptyIndexError, OSError): - raise EmptyIndexError - return ix.doc_count() - -if __name__ == "__main__": - # Point of entry in execution mode. - #create_index() - print(nb_documents()) - results = search("Nothomb") - for article in results: - print(article) |