aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <kimble.mandel@gmail.com>2013-11-10 21:49:23 +0100
committerCédric Bonhomme <kimble.mandel@gmail.com>2013-11-10 21:49:23 +0100
commit4c466b3af02063c96675b2fa3fe045b9030d8152 (patch)
treef99b68decb2e6b884fb8896b64de1956d5778ece
parentEmail notification. (diff)
downloadnewspipe-4c466b3af02063c96675b2fa3fe045b9030d8152.tar.gz
newspipe-4c466b3af02063c96675b2fa3fe045b9030d8152.tar.bz2
newspipe-4c466b3af02063c96675b2fa3fe045b9030d8152.zip
Whoosh indexing.
-rw-r--r--pyaggr3g470r/feedgetter.py22
-rw-r--r--pyaggr3g470r/search.py35
2 files changed, 28 insertions, 29 deletions
diff --git a/pyaggr3g470r/feedgetter.py b/pyaggr3g470r/feedgetter.py
index 850f4449..21cc841d 100644
--- a/pyaggr3g470r/feedgetter.py
+++ b/pyaggr3g470r/feedgetter.py
@@ -20,19 +20,17 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>
__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 1.9 $"
+__version__ = "$Revision: 2.0 $"
__date__ = "$Date: 2010/09/02 $"
-__revision__ = "$Date: 2013/11/05 $"
+__revision__ = "$Date: 2013/11/10 $"
__copyright__ = "Copyright (c) Cedric Bonhomme"
__license__ = "GPLv3"
-import hashlib
import threading
import feedparser
from BeautifulSoup import BeautifulSoup
from datetime import datetime
-from contextlib import contextmanager
import models
import conf
@@ -81,7 +79,7 @@ class FeedGetter(object):
def process(self, feed):
"""
- Comment
+ Retrieves articles form the feed and add them to the database.
"""
#a_feed = feedparser.parse(feed_link, handlers = [self.proxy])
a_feed = feedparser.parse(feed.link)
@@ -118,21 +116,21 @@ class FeedGetter(object):
except:
post_date = datetime(*article.updated_parsed[:6])
+ # save the article
article = models.Article(post_date, article.link, article_title, description, False, False)
article.save()
articles.append(article)
- """
# add the article to the Whoosh index
try:
search.add_to_index([article], feed)
- except:
- print("Whoosh error.")
+ except Exception as e:
+ print("Whoosh error: " + str(e))
#pyaggr3g470r_log.error("Whoosh error.")
- continue"""
+ continue
+ # email notification
if conf.MAIL_ENABLED and feed.email_notification:
- # if subscribed to the feed
with app.app_context():
msg = Message('[pyAggr3g470r] ' + feed.title + ' : ' + article.title, \
sender = conf.MAIL_FROM, recipients = [conf.MAIL_TO])
@@ -140,9 +138,9 @@ class FeedGetter(object):
msg.html = description
mail.send(msg)
+ # add the articles to the list of articles for the current feed
feed.articles.extend(articles)
feed.articles = sorted(feed.articles, key=lambda t: t.date, reverse=True)
- #feed.save()
self.user.save()
@@ -150,4 +148,4 @@ if __name__ == "__main__":
# Point of entry in execution mode
feed_getter = FeedGetter()
# Retrieve all feeds
- feed_getter.retrieve_feed()
+ feed_getter.retrieve_feed() \ No newline at end of file
diff --git a/pyaggr3g470r/search.py b/pyaggr3g470r/search.py
index 0b4d33b6..afb1b6ab 100644
--- a/pyaggr3g470r/search.py
+++ b/pyaggr3g470r/search.py
@@ -20,9 +20,9 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>
__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.2 $"
+__version__ = "$Revision: 0.3 $"
__date__ = "$Date: 2013/06/24 $"
-__revision__ = "$Date: 2013/06/25 $"
+__revision__ = "$Date: 2013/11/10 $"
__copyright__ = "Copyright (c) Cedric Bonhomme"
__license__ = "GPLv3"
@@ -37,8 +37,9 @@ from whoosh.writing import AsyncWriter
import conf
import utils
+import models
-indexdir = "./var/indexdir"
+indexdir = "./pyaggr3g470r/var/indexdir"
schema = Schema(title=TEXT(stored=True), \
content=TEXT, \
@@ -49,19 +50,17 @@ def create_index():
"""
Creates the index.
"""
- mongo = mongodb.Articles(conf.MONGODB_ADDRESS, conf.MONGODB_PORT, \
- conf.MONGODB_DBNAME, conf.MONGODB_USER, conf.MONGODB_PASSWORD)
- feeds = mongo.get_all_feeds()
+ feeds = models.Feed.objects()
if not os.path.exists(indexdir):
os.mkdir(indexdir)
ix = create_in(indexdir, schema)
writer = ix.writer()
for feed in feeds:
- for article in mongo.get_articles(feed["feed_id"]):
- writer.add_document(title=article["article_title"], \
- content=utils.clear_string(article["article_content"]), \
- article_id=article["article_id"] , \
- feed_id=feed["feed_id"])
+ for article in feed.articles:
+ writer.add_document(title=article.title, \
+ content=utils.clear_string(article.content), \
+ article_id=str(article.id).decode(), \
+ feed_id=str(feed.oid).decode())
writer.commit()
def add_to_index(articles, feed):
@@ -73,13 +72,15 @@ def add_to_index(articles, feed):
try:
ix = open_dir(indexdir)
except (EmptyIndexError, OSError) as e:
- raise EmptyIndexError
+ if not os.path.exists(indexdir):
+ os.mkdir(indexdir)
+ ix = create_in(indexdir, schema)
writer = AsyncWriter(ix)
for article in articles:
- writer.add_document(title=article["article_title"], \
- content=utils.clear_string(article["article_content"]), \
- article_id=article["article_id"] , \
- feed_id=feed["feed_id"])
+ writer.add_document(title=article.title, \
+ content=utils.clear_string(article.content), \
+ article_id=str(article.id).decode(), \
+ feed_id=str(feed.oid).decode())
writer.commit()
def delete_article(feed_id, article_id):
@@ -125,4 +126,4 @@ if __name__ == "__main__":
print(nb_documents())
results = search("Nothomb")
for article in results:
- print(article)
+ print(article) \ No newline at end of file
bgstack15