aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/search.py
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2015-04-22 08:36:40 +0200
committerCédric Bonhomme <cedric@cedricbonhomme.org>2015-04-22 08:36:40 +0200
commit58c89a2fe1e1118d6e37551f4d660d5b2e2e0b67 (patch)
treebadbb2486b5cd6c639924a06a01fd185db2dbc7c /pyaggr3g470r/search.py
parentFixed the default configuration file. Minor update to the install.sh script. (diff)
downloadnewspipe-58c89a2fe1e1118d6e37551f4d660d5b2e2e0b67.tar.gz
newspipe-58c89a2fe1e1118d6e37551f4d660d5b2e2e0b67.tar.bz2
newspipe-58c89a2fe1e1118d6e37551f4d660d5b2e2e0b67.zip
Full text seaerch with Whoosh has been removed.
Diffstat (limited to 'pyaggr3g470r/search.py')
-rw-r--r--pyaggr3g470r/search.py148
1 files changed, 0 insertions, 148 deletions
diff --git a/pyaggr3g470r/search.py b/pyaggr3g470r/search.py
deleted file mode 100644
index a7f780df..00000000
--- a/pyaggr3g470r/search.py
+++ /dev/null
@@ -1,148 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2015 Cédric Bonhomme - https://www.cedricbonhomme.org
-#
-# For more information : https://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.3 $"
-__date__ = "$Date: 2013/06/24 $"
-__revision__ = "$Date: 2013/11/10 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "AGPLv3"
-
-import os
-import logging
-
-from whoosh.index import create_in, open_dir
-from whoosh.index import EmptyIndexError
-from whoosh.fields import *
-from whoosh.query import *
-from whoosh.qparser import QueryParser
-from whoosh.writing import AsyncWriter
-from collections import defaultdict
-
-from pyaggr3g470r.models import User
-from pyaggr3g470r.decorators import async
-from pyaggr3g470r import utils
-
-logger = logging.getLogger(__name__)
-
-indexdir = "./pyaggr3g470r/var/indexdir"
-
-schema = Schema(title=TEXT,
- content=TEXT,
- article_id=NUMERIC(int, stored=True),
- feed_id=NUMERIC(int, stored=True),
- user_id=NUMERIC(int, stored=True))
-
-@async
-def create_index(user_id):
- """
- Creates the index.
- """
- user = User.query.filter(User.id == user_id).first()
- logger.info("Starting indexation of the database.")
- if not os.path.exists(indexdir):
- os.makedirs(indexdir)
- ix = create_in(indexdir, schema)
- writer = ix.writer()
- for feed in user.feeds:
- logger.info("Indexing " + feed.title)
- for article in feed.articles:
- writer.add_document(title=article.title,
- content=utils.clear_string(article.content),
- article_id=article.id,
- feed_id=feed.id,
- user_id=user.id)
- writer.commit()
- logger.info("Indexation done.")
-
-def add_to_index(user_id, articles, feed):
- """
- Add a list of articles to the index.
- Here an AsyncWriter is used because the function will
- be called in multiple threads by the feedgetter module.
- """
- try:
- ix = open_dir(indexdir)
- except (EmptyIndexError, OSError):
- if not os.path.exists(indexdir):
- os.makedirs(indexdir)
- ix = create_in(indexdir, schema)
- writer = AsyncWriter(ix)
- for article in articles:
- writer.add_document(title=article.title,
- content=utils.clear_string(article.content),
- article_id=article.id,
- feed_id=feed.id,
- user_id=user_id)
- writer.commit()
-
-
-def delete_article(user_id, feed_id, article_id):
- """
- Delete an article from the index.
- """
- try:
- ix = open_dir(indexdir)
- except (EmptyIndexError, OSError):
- return
- writer = ix.writer()
- document = And([Term("user_id", user_id), Term("feed_id", feed_id),
- Term("article_id", article_id)])
- writer.delete_by_query(document)
- writer.commit()
-
-
-def search(user_id, term):
- """
- Search for `term` in the index.
- Returns a list of articles.
- """
- result_dict = defaultdict(list)
- try:
- ix = open_dir(indexdir)
- except (EmptyIndexError, OSError):
- raise EmptyIndexError
- with ix.searcher() as searcher:
- query = QueryParser("content", ix.schema).parse(term)
- user_doc = Term("user_id", user_id)
- results = searcher.search(query, filter=user_doc, limit=None)
- for article in results:
- result_dict[article["feed_id"]].append(article["article_id"])
- return result_dict, len(results)
-
-
-def nb_documents():
- """
- Return the number of undeleted documents.
- """
- try:
- ix = open_dir(indexdir)
- except (EmptyIndexError, OSError):
- raise EmptyIndexError
- return ix.doc_count()
-
-if __name__ == "__main__":
- # Point of entry in execution mode.
- #create_index()
- print(nb_documents())
- results = search("Nothomb")
- for article in results:
- print(article)
bgstack15