aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/search.py
diff options
context:
space:
mode:
authorCédric Bonhomme <kimble.mandel@gmail.com>2013-11-10 21:49:23 +0100
committerCédric Bonhomme <kimble.mandel@gmail.com>2013-11-10 21:49:23 +0100
commit4c466b3af02063c96675b2fa3fe045b9030d8152 (patch)
treef99b68decb2e6b884fb8896b64de1956d5778ece /pyaggr3g470r/search.py
parentEmail notification. (diff)
downloadnewspipe-4c466b3af02063c96675b2fa3fe045b9030d8152.tar.gz
newspipe-4c466b3af02063c96675b2fa3fe045b9030d8152.tar.bz2
newspipe-4c466b3af02063c96675b2fa3fe045b9030d8152.zip
Whoosh indexing.
Diffstat (limited to 'pyaggr3g470r/search.py')
-rw-r--r--pyaggr3g470r/search.py35
1 files changed, 18 insertions, 17 deletions
diff --git a/pyaggr3g470r/search.py b/pyaggr3g470r/search.py
index 0b4d33b6..afb1b6ab 100644
--- a/pyaggr3g470r/search.py
+++ b/pyaggr3g470r/search.py
@@ -20,9 +20,9 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>
__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.2 $"
+__version__ = "$Revision: 0.3 $"
__date__ = "$Date: 2013/06/24 $"
-__revision__ = "$Date: 2013/06/25 $"
+__revision__ = "$Date: 2013/11/10 $"
__copyright__ = "Copyright (c) Cedric Bonhomme"
__license__ = "GPLv3"
@@ -37,8 +37,9 @@ from whoosh.writing import AsyncWriter
import conf
import utils
+import models
-indexdir = "./var/indexdir"
+indexdir = "./pyaggr3g470r/var/indexdir"
schema = Schema(title=TEXT(stored=True), \
content=TEXT, \
@@ -49,19 +50,17 @@ def create_index():
"""
Creates the index.
"""
- mongo = mongodb.Articles(conf.MONGODB_ADDRESS, conf.MONGODB_PORT, \
- conf.MONGODB_DBNAME, conf.MONGODB_USER, conf.MONGODB_PASSWORD)
- feeds = mongo.get_all_feeds()
+ feeds = models.Feed.objects()
if not os.path.exists(indexdir):
os.mkdir(indexdir)
ix = create_in(indexdir, schema)
writer = ix.writer()
for feed in feeds:
- for article in mongo.get_articles(feed["feed_id"]):
- writer.add_document(title=article["article_title"], \
- content=utils.clear_string(article["article_content"]), \
- article_id=article["article_id"] , \
- feed_id=feed["feed_id"])
+ for article in feed.articles:
+ writer.add_document(title=article.title, \
+ content=utils.clear_string(article.content), \
+ article_id=str(article.id).decode(), \
+ feed_id=str(feed.oid).decode())
writer.commit()
def add_to_index(articles, feed):
@@ -73,13 +72,15 @@ def add_to_index(articles, feed):
try:
ix = open_dir(indexdir)
except (EmptyIndexError, OSError) as e:
- raise EmptyIndexError
+ if not os.path.exists(indexdir):
+ os.mkdir(indexdir)
+ ix = create_in(indexdir, schema)
writer = AsyncWriter(ix)
for article in articles:
- writer.add_document(title=article["article_title"], \
- content=utils.clear_string(article["article_content"]), \
- article_id=article["article_id"] , \
- feed_id=feed["feed_id"])
+ writer.add_document(title=article.title, \
+ content=utils.clear_string(article.content), \
+ article_id=str(article.id).decode(), \
+ feed_id=str(feed.oid).decode())
writer.commit()
def delete_article(feed_id, article_id):
@@ -125,4 +126,4 @@ if __name__ == "__main__":
print(nb_documents())
results = search("Nothomb")
for article in results:
- print(article)
+ print(article) \ No newline at end of file
bgstack15