diff options
-rw-r--r-- | README | 36 | ||||
-rw-r--r-- | articles.py | 63 | ||||
-rw-r--r-- | cfg/cherrypy.cfg | 4 | ||||
-rwxr-xr-x | cfg/pyAggr3g470r.cfg-sample | 6 | ||||
-rwxr-xr-x | feedgetter.py | 110 | ||||
-rw-r--r-- | img/hacker-news.png | bin | 0 -> 265 bytes | |||
-rw-r--r-- | mongodb.py | 253 | ||||
-rwxr-xr-x | pyAggr3g470r.py | 657 | ||||
-rw-r--r-- | sqlite2mongo.py | 75 | ||||
-rwxr-xr-x | utils.py | 164 |
10 files changed, 715 insertions, 653 deletions
@@ -22,14 +22,14 @@ pyAggr3g470r Presentation ------------ -pyAggr3g470r is a multi-threaded news aggregator with a web interface -based on CherryPy. Articles are stored in a SQLite base. +pyAggr3g470r_ is a multi-threaded news aggregator with a web interface +based on CherryPy_. Articles are stored in a MongoDB_ base. Features ------------ -* articles are stored in a SQLite database; +* articles are stored in a MongoDB_ database; * find an article with history; * e-mail notification; * export articles to HTML, EPUB, PDF or raw text; @@ -43,24 +43,23 @@ Requierements ------------- Software required ~~~~~~~~~~~~~~~~~ -* Python 2.7.* -* SQLite -* feedparser (http://feedparser.org/) -* CherryPy (version 3 and up) -* BeautifulSoup +* Python_ 2.7.*; +* MongoDB_ and PyMongo_; +* feedparser_; +* CherryPy_ (version 3 and up); +* BeautifulSoup_. Optional module ~~~~~~~~~~~~~~~ -These modules are not required but provides better features: +These modules are not required but enables more features: * lxml and Genshi; -* Python Imaging Library for the generation of QR codes; -* Gamin, detect changes in the database in a better way. +* Python Imaging Library for the generation of QR codes. If you want to install these modules: -sudo aptitude install python-gamin python-lxml python-genshi + sudo aptitude install python-lxml python-genshi Donnation @@ -72,5 +71,14 @@ Thank you! License ------------ -pyAggr3g470r is under GPLv3 license. -http://www.gnu.org/licenses/gpl-3.0.txt +pyAggr3g470r_ is under GPLv3_ license. + + +.. _Python: http://python.org/ +.. _pyAggr3g470r: https://bitbucket.org/cedricbonhomme/pyaggr3g470r/ +.. _feedparser: http://feedparser.org/ +.. _MongoDB: http://www.mongodb.org/ +.. _PyMongo: https://github.com/mongodb/mongo-python-driver +.. _CherryPy: http://cherrypy.org/ +.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/ +.. _GPLv3: http://www.gnu.org/licenses/gpl-3.0.txt diff --git a/articles.py b/articles.py deleted file mode 100644 index c0c4310f..00000000 --- a/articles.py +++ /dev/null @@ -1,63 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -# pyAggr3g470r - A Web based news aggregator. -# Copyright (C) 2010 Cédric Bonhomme - http://cedricbonhomme.org/ -# -# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/ -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/> - -__author__ = "Cedric Bonhomme" -__version__ = "$Revision: 0.2 $" -__date__ = "$Date: 2010/12/02 $" -__revisios__ = "$Date: 2011/07/07 $" -__copyright__ = "Copyright (c) Cedric Bonhomme" -__license__ = "GPLv3" - -from collections import OrderedDict - -class Feed(object): - """ - Represent a stream (RSS, ATOM, etc.). - """ - def __init__(self): - """ - Represent the components of a feed. - """ - self.feed_id = "" - self.feed_image = "" - self.feed_title = "" - self.feed_link = "" - self.feed_site_link = "" - self.mail = "" - self.nb_articles = "" - self.nb_unread_articles = "" - self.articles = OrderedDict() - -class Article(object): - """ - Represent an article. - """ - def __init__(self): - """ - Represent the components of an article. - """ - self.article_id = "" - self.article_date = "" - self.article_title = "" - self.article_link = "" - self.article_description = "" - self.article_readed = "" - self.like = ""
\ No newline at end of file diff --git a/cfg/cherrypy.cfg b/cfg/cherrypy.cfg index 51440d6f..a419504d 100644 --- a/cfg/cherrypy.cfg +++ b/cfg/cherrypy.cfg @@ -10,6 +10,8 @@ engine.timeout_monitor.on = False tools.staticdir.root = os.getcwd() tools.staticdir.on = True tools.staticdir.dir = "." +tools.encode.on = True +tools.encode.encoding = "utf8" [/css] tools.staticdir.on = True @@ -19,4 +21,4 @@ tools.staticdir.match = "(?i)^.+\.css$" [/images] tools.staticdir.on = True tools.staticdir.dir = "img" -tools.staticdir.match = "(?i)^.+\.png$" +tools.staticdir.match = "(?i)^.+\.png$"
\ No newline at end of file diff --git a/cfg/pyAggr3g470r.cfg-sample b/cfg/pyAggr3g470r.cfg-sample index 63de4005..95db75c5 100755 --- a/cfg/pyAggr3g470r.cfg-sample +++ b/cfg/pyAggr3g470r.cfg-sample @@ -1,6 +1,10 @@ [global] -sqlitebase = ./var/feed.db max_nb_articles = 50 +[MongoDB] +address = 127.0.0.1 +port = 27017 +user = username +password = pwd [mail] mail_from = pyAggr3g470r@no-reply.com mail_to = address_of_the_recipient@example.com diff --git a/feedgetter.py b/feedgetter.py index aa463b7d..e3469132 100755 --- a/feedgetter.py +++ b/feedgetter.py @@ -27,14 +27,15 @@ __license__ = "GPLv3" import os.path import traceback -import sqlite3 import threading import feedparser +import hashlib from BeautifulSoup import BeautifulSoup from datetime import datetime import utils +import mongodb feeds_list = [] list_of_threads = [] @@ -50,11 +51,8 @@ class FeedGetter(object): """ Initializes the base and variables. """ - # Create the base if not exists. - utils.create_base() - - # mutex to protect the SQLite base - self.locker = threading.Lock() + # MongoDB connections + self.articles = mongodb.Articles() def retrieve_feed(self): """ @@ -84,25 +82,12 @@ class FeedGetter(object): """Request the URL Executed in a thread. - SQLite objects created in a thread can only be used in that same thread ! """ - # Protect this part of code. - self.locker.acquire() - - self.conn = sqlite3.connect(utils.sqlite_base, isolation_level = None) - self.c = self.conn.cursor() - if utils.detect_url_errors([the_good_url]) == []: # if ressource is available add the articles in the base. - self.add_into_sqlite(the_good_url) - - self.conn.commit() - self.c.close() + self.add_into_database(the_good_url) - # Release this part of code. - self.locker.release() - - def add_into_sqlite(self, feed_link): + def add_into_database(self, feed_link): """ Add the articles of the feed 'a_feed' in the SQLite base. """ @@ -113,16 +98,23 @@ class FeedGetter(object): feed_image = a_feed.feed.image.href except: feed_image = "/img/feed-icon-28x28.png" - try: - self.c.execute('insert into feeds values (?,?,?,?,?)', (\ - utils.clear_string(a_feed.feed.title.encode('utf-8')), \ - a_feed.feed.link.encode('utf-8'), \ - feed_link, \ - feed_image, - "0")) - except sqlite3.IntegrityError: - # feed already in the base - pass + + sha1_hash = hashlib.sha1() + sha1_hash.update(feed_link.encode('utf-8')) + feed_id = sha1_hash.hexdigest() + + collection_dic = {"feed_id": feed_id, \ + "type": 0, \ + "feed_image": feed_image, \ + "feed_title": utils.clear_string(a_feed.feed.title.encode('utf-8')), \ + "feed_link": feed_link, \ + "site_link": a_feed.feed.link.encode('utf-8'), \ + "mail": False \ + } + + self.articles.add_collection(collection_dic) + + articles = [] for article in a_feed['entries']: description = "" try: @@ -142,37 +134,31 @@ class FeedGetter(object): except: post_date = datetime(*article.published_parsed[:6]) - try: - # try. Will only success if the article is not already in the data base - self.c.execute('insert into articles values (?, ?, ?, ?, ?, ?, ?)', (\ - post_date, \ - article_title, \ - article.link.encode('utf-8'), \ - description, \ - "0", \ - feed_link, \ - "0")) - result = self.c.execute("SELECT mail from feeds WHERE feed_site_link='" + \ - a_feed.feed.link.encode('utf-8') + "'").fetchall() - if result[0][0] == "1": - # if subscribed to the current feed - # send the article by e-mail - try: - threading.Thread(None, utils.send_mail, None, (utils.mail_from, utils.mail_to, \ - a_feed.feed.title.encode('utf-8'), \ - article_title, description) \ - ).start() - except Exception, e: - # SMTP acces denied, to many SMTP connections, etc. - top = traceback.extract_stack()[-1] - print ", ".join([type(e).__name__, os.path.basename(top[0]), str(top[1])]) - except sqlite3.IntegrityError: - # article already in the data base - pass - except Exception, e: - # Missing information (updated_parsed, ...) - top = traceback.extract_stack()[-1] - print ", ".join([type(e).__name__, os.path.basename(top[0]), str(top[1]), str(traceback.extract_stack()[-2][3])]) + + sha1_hash = hashlib.sha1() + sha1_hash.update(article.link.encode('utf-8')) + article_id = sha1_hash.hexdigest() + + article = {"article_id": article_id, \ + "type":1, \ + "article_date": post_date, \ + "article_link": article.link.encode('utf-8'), \ + "article_title": article_title, \ + "article_content": description, \ + "article_readed": False, \ + "article_like": False \ + } + + articles.append(article) + + self.articles.add_articles(articles, feed_id) + + # send new articles by e-mail if desired. + #threading.Thread(None, utils.send_mail, None, (utils.mail_from, utils.mail_to, \ + #a_feed.feed.title.encode('utf-8'), \ + #article_title, description) \ + #).start() + if __name__ == "__main__": diff --git a/img/hacker-news.png b/img/hacker-news.png Binary files differnew file mode 100644 index 00000000..ce92765d --- /dev/null +++ b/img/hacker-news.png diff --git a/mongodb.py b/mongodb.py new file mode 100644 index 00000000..d00b453e --- /dev/null +++ b/mongodb.py @@ -0,0 +1,253 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +__author__ = "Cedric Bonhomme" +__version__ = "$Revision: 0.1 $" +__date__ = "$Date: 2012/03/03 $" +__revision__ = "$Date: 2012/03/03 $" +__copyright__ = "Copyright (c) Cedric Bonhomme" +__license__ = "GPLv3" + +import time +import pymongo + +from operator import itemgetter, attrgetter + +class Articles(object): + """ + """ + def __init__(self, url='localhost', port=27017): + """ + Instantiates the connection. + """ + self.connection = pymongo.connection.Connection(url, port) + self.db = self.connection.pyaggr3g470r + + def add_collection(self, new_collection): + """ + Creates a new collection for a new feed. + """ + collection = self.db[new_collection["feed_id"]] + #collection.create_index([("feed_link", pymongo.ASCENDING)], {"unique":True, "sparse":True}) + collection.insert(new_collection) + + def add_articles(self, articles, feed_id): + """ + Add article(s) in a collection. + """ + collection = self.db[str(feed_id)] + + collection.create_index([("article_date", pymongo.DESCENDING)], \ + {"unique":False, "sparse":False}) + + for article in articles: + cursor = collection.find({"article_id":article["article_id"]}) + if cursor.count() == 0: + collection.insert(article) + + def delete_feed(self, feed_id): + """ + Delete a collection (feed with all articles). + """ + self.db.drop_collection(feed_id) + + def delete_article(self, feed_id, article_id): + """ + Delete an article. + """ + collection = self.db[str(feed_id)] + collection.find_and_modify(query={"article_id":article_id}, remove=True) + + def get_collection(self, feed_id): + """ + """ + return self.db[str(feed_id)].find().next() + + def get_all_articles(self): + """ + Return all articles from all collections. + """ + articles = [] + collections = self.db.collection_names() + for collection_name in collections: + collection = self.db[collection_name] + articles.extend([article for article in collection.find({'type':1})]) + return articles + + def get_article(self, feed_id, article_id): + """ + """ + collection = self.db[str(feed_id)] + return collection.find({"article_id":article_id}).next() + + def get_all_collections(self, condition=None): + """ + """ + feeds = [] + collections = self.db.collection_names() + for collection_name in collections: + if collection_name != "system.indexes": + if condition is None: + cursor = self.db[collection_name].find({"type":0}) + else: + cursor = self.db[collection_name].find({"type":0, condition[0]:condition[1]}) + if cursor.count() != 0: + feeds.append(cursor.next()) + feeds.sort(key = lambda elem: elem['feed_title'].lower()) + return feeds + + def get_articles_from_collection(self, feed_id, condition=None): + """ + Return all the articles of a collection. + """ + collection = self.db[str(feed_id)] + if condition is None: + cursor = collection.find({"type":1}) + else: + cursor = collection.find({"type":1, condition[0]:condition[1]}) + return cursor.sort([("article_date", pymongo.DESCENDING)]) + + def print_articles_from_collection(self, collection_id): + """ + Print the articles of a collection. + """ + collection = self.db[str(collection_id)] + cursor = collection.find({"type":1}) + print "Article for the collection", collection_id + for d in cursor: + print d + print + + def nb_articles(self, feed_id=None): + """ + Return the number of users. + """ + if feed_id is not None: + collection = self.db[feed_id] + cursor = collection.find({'type':1}) + return cursor.count() + else: + nb_articles = 0 + for feed_id in self.db.collection_names(): + nb_articles += self.nb_articles(feed_id) + return nb_articles + + def nb_favorites(self, feed_id=None): + if feed_id is not None: + collection = self.db[feed_id] + cursor = collection.find({'type':1, 'article_like':True}) + return cursor.count() + else: + nb_favorites = 0 + for feed_id in self.db.collection_names(): + nb_favorites += self.nb_favorites(feed_id) + return nb_favorites + + def nb_mail_notifications(self): + """ + Return the number of subscribed feeds. + """ + nb_mail_notifications = 0 + for feed_id in self.db.collection_names(): + collection = self.db[feed_id] + cursor = collection.find({'type':0, 'mail':True}) + nb_mail_notifications += cursor.count() + return nb_mail_notifications + + def nb_unread_articles(self, feed_id=None): + if feed_id is not None: + collection = self.db[feed_id] + cursor = collection.find({'article_readed':False}) + return cursor.count() + else: + unread_articles = 0 + for feed_id in self.db.collection_names(): + unread_articles += self.nb_unread_articles(feed_id) + return unread_articles + + def like_article(self, like, feed_id, article_id): + """ + Like or unlike an article. + """ + collection = self.db[str(feed_id)] + collection.update({"article_id": article_id}, {"$set": {"article_like": like}}) + + def mark_as_read(self, readed, feed_id=None, article_id=None): + """ + """ + if feed_id != None and article_id != None: + collection = self.db[str(feed_id)] + collection.update({"article_id": article_id, "article_readed":not readed}, {"$set": {"article_readed": readed}}) + elif feed_id != None and article_id == None: + collection = self.db[str(feed_id)] + collection.update({"type": 1, "article_readed":not readed}, {"$set": {"article_readed": readed}}, multi=True) + else: + for feed_id in self.db.collection_names(): + self.mark_as_read(readed, feed_id, None) + + def list_collections(self): + """ + List all collections (feed). + """ + collections = self.db.collection_names() + return collections + + # Functions on database + def drop_database(self): + """ + Drop all the database + """ + self.connection.drop_database('pyaggr3g470r') + + +if __name__ == "__main__": + # Point of entry in execution mode. + articles = Articles() + + + # Create a collection for a stream + collection_dic = {"collection_id": 42,\ + "feed_image": "Image", \ + "feed_title": "Title", \ + "feed_link": "Link", \ + "site_title": "Site link", \ + "mail": True, \ + } + + #articles.add_collection(collection_dic) + + + + # Add an article in the newly created collection + article_dic1 = {"article_id": 51, \ + "article_date": "Today", \ + "article_link": "Link of the article", \ + "article_title": "The title", \ + "article_content": "The content of the article", \ + "article_readed": True, \ + "article_like": True \ + } + + article_dic2 = {"article_id": 52, \ + "article_date": "Yesterday", \ + "article_link": "Link", \ + "article_title": "Hello", \ + "article_content": "The content of the article", \ + "article_readed": True, \ + "article_like": True \ + } + + #articles.add_articles([article_dic1, article_dic2], 42) + + + # Print articles of the collection + #articles.print_articles_from_collection("http://esr.ibiblio.org/?feed=rss2") + + + print "All articles:" + #print articles.get_all_articles() + + + + # Drop the database + articles.drop_database()
\ No newline at end of file diff --git a/pyAggr3g470r.py b/pyAggr3g470r.py index 28e3ca46..ea708d18 100755 --- a/pyAggr3g470r.py +++ b/pyAggr3g470r.py @@ -2,7 +2,7 @@ #-*- coding: utf-8 -*- # pyAggr3g470r - A Web based news aggregator. -# Copyright (C) 2010 Cédric Bonhomme - http://cedricbonhomme.org/ +# Copyright (C) 2010-2012 Cédric Bonhomme - http://cedricbonhomme.org/ # # For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/ # @@ -22,7 +22,7 @@ __author__ = "Cedric Bonhomme" __version__ = "$Revision: 3.1 $" __date__ = "$Date: 2010/01/29 $" -__revision__ = "$Date: 2011/11/29 $" +__revision__ = "$Date: 2012/03/09 $" __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "GPLv3" @@ -41,16 +41,15 @@ __license__ = "GPLv3" import os import re import time -import sqlite3 import cherrypy import calendar -import threading from collections import Counter import datetime import utils import export +import mongodb import feedgetter from qrcode.pyqrnative.PyQRNative import QRCode, QRErrorCorrectLevel, CodeOverflowException from qrcode import qr @@ -107,89 +106,99 @@ class Root: Root class. All pages of pyAggr3g470r are described in this class. """ + def __init__(self): + """ + """ + self.mongo = mongodb.Articles(utils.MONGODB_ADDRESS, utils.MONGODB_PORT) + def index(self): """ Main page containing the list of feeds and articles. """ + feeds = self.mongo.get_all_collections() + nb_unread_articles = self.mongo.nb_unread_articles() + nb_favorites = self.mongo.nb_favorites() + nb_mail_notifications = self.mongo.nb_mail_notifications() + # if there are unread articles, display the number in the tab of the browser - html = htmlheader((self.nb_unread_articles and \ - ['(' + str(self.nb_unread_articles) +') '] or \ + html = htmlheader((nb_unread_articles and \ + ['(' + str(nb_unread_articles) +') '] or \ [""])[0]) html += htmlnav html += self.create_right_menu() html += """<div class="left inner">\n""" - if self.feeds: + if feeds: html += '<a href="/management/"><img src="/img/management.png" title="Management" /></a>\n' html += '<a href="/history/"><img src="/img/history.png" title="History" /></a>\n' html += ' \n' html += """<a href="/favorites/"><img src="/img/heart-32x32.png" title="Your favorites (%s)" /></a>\n""" % \ - (self.nb_favorites,) + (nb_favorites,) html += """<a href="/notifications/"><img src="/img/email-follow.png" title="Active e-mail notifications (%s)" /></a>\n""" % \ - (self.nb_mail_notifications,) + (nb_mail_notifications,) html += ' ' - if self.nb_unread_articles != 0: + if nb_unread_articles != 0: html += '<a href="/mark_as_read/"><img src="/img/mark-as-read.png" title="Mark articles as read" /></a>\n' html += """<a href="/unread/"><img src="/img/unread.png" title="Unread article(s): %s" /></a>\n""" % \ - (self.nb_unread_articles,) + (nb_unread_articles,) html += '<a accesskey="F" href="/fetch/"><img src="/img/check-news.png" title="Check for news" /></a>\n' + # The main page display all the feeds. - for feed in self.feeds.values(): + for feed in feeds: html += """<h2><a name="%s"><a href="%s" rel="noreferrer" target="_blank">%s</a></a> <a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \ - (feed.feed_id, feed.feed_site_link, feed.feed_title, \ - feed.feed_link, feed.feed_image) + (feed["feed_id"], feed["feed_link"], feed["feed_title"], \ + feed["feed_link"], feed["feed_image"]) # The main page display only 10 articles by feeds. - for article in feed.articles.values()[:10]: - - if article.article_readed == "0": + for article in self.mongo.get_articles_from_collection(feed["feed_id"])[:10]: + if article["article_readed"] == False: # not readed articles are in bold not_read_begin, not_read_end = "<b>", "</b>" else: not_read_begin, not_read_end = "", "" # display a heart for faved articles - if article.like == "1": + if article["article_like"] == True: like = """ <img src="/img/heart.png" title="I like this article!" />""" else: like = "" # Descrition for the CSS ToolTips - article_content = utils.clear_string(article.article_description) + article_content = utils.clear_string(article["article_content"]) if article_content: description = " ".join(article_content.split(' ')[:55]) else: description = "No description." # Title of the article - article_title = article.article_title + article_title = article["article_title"] if len(article_title) >= 110: article_title = article_title[:110] + " ..." # a description line per article (date, title of the article and # CSS description tooltips on mouse over) - html += article.article_date + " - " + \ + html += str(article["article_date"]) + " - " + \ """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \ - (feed.feed_id, article.article_id, not_read_begin, \ + (feed["feed_id"], article["article_id"], not_read_begin, \ article_title, not_read_end, description) + like + "<br />\n" html += "<br />\n" # some options for the current feed - html += """<a href="/articles/%s">All articles</a> """ % (feed.feed_id,) - html += """<a href="/feed/%s">Feed summary</a> """ % (feed.feed_id,) - if feed.nb_unread_articles != 0: - html += """ <a href="/mark_as_read/Feed_FromMainPage:%s">Mark all as read</a>""" % (feed.feed_id,) - html += """ <a href="/unread/%s" title="Unread article(s)">Unread article(s) (%s)</a>""" % (feed.feed_id, feed.nb_unread_articles) - if feed.mail == "0": - html += """<br />\n<a href="/mail_notification/1:%s" title="By e-mail">Stay tuned</a>""" % (feed.feed_id,) + html += """<a href="/articles/%s">All articles</a> """ % (feed["feed_id"],) + html += """<a href="/feed/%s">Feed summary</a> """ % (feed["feed_id"],) + if self.mongo.nb_unread_articles(feed["feed_id"]) != 0: + html += """ <a href="/mark_as_read/Feed_FromMainPage:%s">Mark all as read</a>""" % (feed["feed_id"],) + html += """ <a href="/unread/%s" title="Unread article(s)">Unread article(s) (%s)</a>""" % (feed["feed_id"], self.mongo.nb_unread_articles(feed["feed_id"])) + if feed["mail"] == "0": + html += """<br />\n<a href="/mail_notification/1:%s" title="By e-mail">Stay tuned</a>""" % (feed["feed_id"],) else: - html += """<br />\n<a href="/mail_notification/0:%s" title="By e-mail">Stop staying tuned</a>""" % (feed.feed_id,) + html += """<br />\n<a href="/mail_notification/0:%s" title="By e-mail">Stop staying tuned</a>""" % (feed["feed_id"],) html += """<h4><a href="/#top">Top</a></h4>""" html += "<hr />\n" html += htmlfooter @@ -203,7 +212,7 @@ class Root: Create the right menu. """ html = """<div class="right inner">\n""" - html += """<form method=get action="/q/"><input type="search" name="querystring" value="" placeholder="Search articles" maxlength=2048 autocomplete="on"></form>\n""" + html += """<form method=get action="/search/"><input type="search" name="query" value="" placeholder="Search articles" maxlength=2048 autocomplete="on"></form>\n""" html += "<hr />\n" # insert the list of feeds in the menu html += self.create_list_of_feeds() @@ -215,25 +224,32 @@ class Root: """ Create the list of feeds. """ - html = """<div class="nav_container">Your feeds (%s):<br />\n""" % len(self.feeds) - for feed in self.feeds.values(): - if feed.nb_unread_articles != 0: + feeds = self.mongo.get_all_collections() + html = """<div class="nav_container">Your feeds (%s):<br />\n""" % len(feeds) + for feed in feeds: + if self.mongo.nb_unread_articles(feed["feed_id"]) != 0: # not readed articles are in bold not_read_begin, not_read_end = "<b>", "</b>" else: not_read_begin, not_read_end = "", "" html += """<div><a href="/#%s">%s</a> (<a href="/unread/%s" title="Unread article(s)">%s%s%s</a> / %s)</div>""" % \ - (feed.feed_id, feed.feed_title, feed.feed_id, not_read_begin, \ - feed.nb_unread_articles, not_read_end, feed.nb_articles) + (feed["feed_id"], feed["feed_title"], feed["feed_id"], not_read_begin, \ + self.mongo.nb_unread_articles(feed["feed_id"]), not_read_end, self.mongo.nb_articles(feed["feed_id"])) return html + "</div>" def management(self, max_nb_articles=5): """ Management page. - Allows adding and deleting feeds. Export functions of the SQLite data base + Allows adding and deleting feeds. Export functions of the MongoDB data base and display some statistics. """ + feeds = self.mongo.get_all_collections() + nb_mail_notifications = self.mongo.nb_mail_notifications() + nb_favorites = self.mongo.nb_favorites() + nb_articles = self.mongo.nb_articles() + nb_unread_articles = self.mongo.nb_unread_articles() + html = htmlheader() html += htmlnav html += """<div class="left inner">\n""" @@ -241,27 +257,27 @@ class Root: # Form: add a feed html += """<form method=get action="/add_feed/"><input type="url" name="url" placeholder="URL of a site" maxlength=2048 autocomplete="off">\n<input type="submit" value="OK"></form>\n""" - if self.feeds: + if feeds: # Form: delete a feed html += "<h1>Delete Feeds</h1>\n" html += """<form method=get action="/remove_feed/"><select name="feed_id">\n""" - for feed in self.feeds.values(): - html += """\t<option value="%s">%s</option>\n""" % (feed.feed_id, feed.feed_title) + for feed in feeds: + html += """\t<option value="%s">%s</option>\n""" % (feed["feed_id"], feed["feed_title"]) html += """</select><input type="submit" value="OK"></form>\n""" html += """<p>Active e-mail notifications: <a href="/notifications/">%s</a></p>\n""" % \ - (self.nb_mail_notifications,) + (nb_mail_notifications,) html += """<p>You like <a href="/favorites/">%s</a> article(s).</p>\n""" % \ - (self.nb_favorites, ) + (nb_favorites, ) html += "<hr />\n" # Informations about the data base of articles html += """<p>%s article(s) are loaded from the database with <a href="/unread/">%s unread article(s)</a>.<br />\n""" % \ - (self.nb_articles, self.nb_unread_articles) - html += """Database: %s.\n<br />Size: %s bytes.<br />\n""" % \ - (os.path.abspath(utils.sqlite_base), os.path.getsize(utils.sqlite_base)) + (nb_articles, nb_unread_articles) + #html += """Database: %s.\n<br />Size: %s bytes.<br />\n""" % \ + #(os.path.abspath(utils.sqlite_base), os.path.getsize(utils.sqlite_base)) html += '<a href="/statistics/">Advanced statistics.</a></p>\n' html += """<form method=get action="/fetch/">\n<input type="submit" value="Fetch all feeds"></form>\n""" @@ -298,13 +314,14 @@ class Root: """ More advanced statistics. """ + articles = self.mongo.get_all_articles() html = htmlheader() html += htmlnav html += """<div class="left inner">\n""" # Some statistics (most frequent word) - if self.feeds: - self.top_words = utils.top_words(self.feeds, n=50, size=int(word_size)) + if articles: + top_words = utils.top_words(articles, n=50, size=int(word_size)) html += "<h1>Statistics</h1>\n<br />\n" # Tags cloud html += 'Minimum size of a word:' @@ -313,28 +330,29 @@ class Root: html += '<input type="submit" value="OK"></form>\n' html += '<br /><h3>Tag cloud</h3>\n' html += '<div style="width: 35%; overflow:hidden; text-align: justify">' + \ - utils.tag_cloud(self.top_words) + '</div>' + utils.tag_cloud(top_words) + '</div>' html += "<hr />\n" html += htmlfooter return html - statistics.exposed = True - def q(self, querystring=None): + + + def search(self, query=None): """ - Simply search for the string 'querystring' + Simply search for the string 'query' in the description of the article. """ - param, _, value = querystring.partition(':') + param, _, value = query.partition(':') wordre = re.compile(r'\b%s\b' % param, re.I) feed_id = None if param == "Feed": - feed_id, _, querystring = value.partition(':') + feed_id, _, query = value.partition(':') html = htmlheader() html += htmlnav html += """<div class="left inner">""" - html += """<h1>Articles containing the string <i>%s</i></h1><br />""" % (querystring,) + html += """<h1>Articles containing the string <i>%s</i></h1><br />""" % (query,) if feed_id is not None: for article in self.feeds[feed_id].articles.values(): @@ -353,32 +371,33 @@ class Root: (feed_id, article.article_id, article.article_title) + \ not_read_end + """<br />\n""" else: - for feed in self.feeds.values(): + feeds = self.mongo.get_all_collections() + for feed in feeds: new_feed_section = True - for article in feed.articles.values(): - article_content = utils.clear_string(article.article_description) + for article in self.mongo.get_articles_from_collection(feed["feed_id"]): + article_content = utils.clear_string(article["article_content"]) if not article_content: - utils.clear_string(article.article_title) + utils.clear_string(article["article_title"]) if wordre.findall(article_content) != []: if new_feed_section is True: new_feed_section = False html += """<h2><a href="/articles/%s" rel="noreferrer" target="_blank">%s</a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \ - (feed.feed_id, feed.feed_title, feed.feed_link, feed.feed_image) + (feed["feed_id"], feed["feed_title"], feed["feed_link"], feed["feed_image"]) - if article.article_readed == "0": + if article["article_readed"] == False: # not readed articles are in bold not_read_begin, not_read_end = "<b>", "</b>" else: not_read_begin, not_read_end = "", "" # display a heart for faved articles - if article.like == "1": + if article["article_like"] == True: like = """ <img src="/img/heart.png" title="I like this article!" />""" else: like = "" # descrition for the CSS ToolTips - article_content = utils.clear_string(article.article_description) + article_content = utils.clear_string(article["article_content"]) if article_content: description = " ".join(article_content[:500].split(' ')[:-1]) else: @@ -386,15 +405,15 @@ class Root: # a description line per article (date, title of the article and # CSS description tooltips on mouse over) - html += article.article_date + " - " + \ + html += str(article["article_date"]) + " - " + \ """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \ - (feed.feed_id, article.article_id, not_read_begin, \ - article.article_title[:150], not_read_end, description) + like + "<br />\n" + (feed["feed_id"], article["article_id"], not_read_begin, \ + article["article_title"][:150], not_read_end, description) + like + "<br />\n" html += "<hr />" html += htmlfooter return html - q.exposed = True + search.exposed = True def fetch(self): @@ -414,33 +433,35 @@ class Root: """ try: feed_id, article_id = param.split(':') - feed, article = self.feeds[feed_id], self.feeds[feed_id].articles[article_id] + feed = self.mongo.get_collection(feed_id) + articles = self.mongo.get_articles_from_collection(feed_id) + article = self.mongo.get_article(feed_id, article_id) except: return self.error_page("Bad URL. This article do not exists.") html = htmlheader() html += htmlnav html += """<div>""" - if article.article_readed == "0": + if article["article_readed"] == False: # if the current article is not yet readed, update the database - self.mark_as_read("Article:"+article.article_link) + self.mark_as_read("Article:"+article["article_id"]+":"+feed["feed_id"]) html += '\n<div style="width: 50%; overflow:hidden; text-align: justify; margin:0 auto">\n' # Title of the article html += """<h1><i>%s</i> from <a href="/feed/%s">%s</a></h1>\n<br />\n""" % \ - (article.article_title, feed_id, feed.feed_title) - if article.like == "1": + (article["article_title"], feed_id, feed["feed_title"]) + if article["article_like"] == True: html += """<a href="/like/0:%s:%s"><img src="/img/heart.png" title="I like this article!" /></a>""" % \ - (feed_id, article.article_id) + (feed_id, article["article_id"]) else: html += """<a href="/like/1:%s:%s"><img src="/img/heart_open.png" title="Click if you like this article." /></a>""" % \ - (feed_id, article.article_id) + (feed_id, article["article_id"]) html += """ <a href="/delete_article/%s:%s"><img src="/img/cross.png" title="Delete this article" /></a>""" % \ - (feed_id, article.article_id) + (feed_id, article["article_id"]) html += "<br /><br />" # Description (full content) of the article - description = article.article_description + description = article["article_content"] if description: p = re.compile(r'<code><') q = re.compile(r'></code>') @@ -465,74 +486,79 @@ class Root: f = qr.QRUrl(url = utils.clear_string(description)) f.make() except: - f = qr.QRUrl(url = article.article_link) + f = qr.QRUrl(url = article["article_link"]) f.make() f.save("./var/qrcode/"+article_id+".png") # Previous and following articles + articles_list = articles.distinct("article_id") try: - following = feed.articles.values()[feed.articles.keys().index(article_id) - 1] + following = articles[articles_list.index(article_id) - 1] html += """<div style="float:right;"><a href="/article/%s:%s" title="%s"><img src="/img/following-article.png" /></a></div>\n""" % \ - (feed_id, following.article_id, following.article_title) - except: - pass + (feed_id, following["article_id"], following["article_title"]) + except Exception, e: + print e try: - previous = feed.articles.values()[feed.articles.keys().index(article_id) + 1] + previous = articles[articles_list.index(article_id) + 1] except: - previous = feed.articles.values()[0] + previous = articles[0] finally: html += """<div style="float:left;"><a href="/article/%s:%s" title="%s"><img src="/img/previous-article.png" /></a></div>\n""" % \ - (feed_id, previous.article_id, previous.article_title) + (feed_id, previous["article_id"], previous["article_title"]) html += "\n</div>\n" # Footer menu html += "<hr />\n" - html += """\n<a href="/plain_text/%s:%s">Plain text</a>\n""" % (feed_id, article.article_id) - html += """ - <a href="/epub/%s:%s">Export to EPUB</a>\n""" % (feed_id, article.article_id) - html += """<br />\n<a href="%s">Complete story</a>\n<br />\n""" % (article.article_link,) + html += """\n<a href="/plain_text/%s:%s">Plain text</a>\n""" % (feed_id, article["article_id"]) + html += """ - <a href="/epub/%s:%s">Export to EPUB</a>\n""" % (feed_id, article["article_id"]) + html += """<br />\n<a href="%s">Complete story</a>\n<br />\n""" % (article["article_link"],) # Share this article: html += "Share this article:<br />\n" # on Diaspora html += """<a href="javascript:(function(){f='https://%s/bookmarklet?url=%s&title=%s&notes=%s&v=1&';a=function(){if(!window.open(f+'noui=1&jump=doclose','diasporav1','location=yes,links=no,scrollbars=no,toolbar=no,width=620,height=250'))location.href=f+'jump=yes'};if(/Firefox/.test(navigator.userAgent)){setTimeout(a,0)}else{a()}})()">\n\t <img src="/img/diaspora.png" title="Share on Diaspora" /></a>\n""" % \ - (utils.DIASPORA_POD, article.article_link, article.article_title, "via pyAggr3g470r") + (utils.DIASPORA_POD, article["article_link"], article["article_title"], "via pyAggr3g470r") # on Identi.ca html += """\n\n<a href="http://identi.ca/index.php?action=newnotice&status_textarea=%s: %s" title="Share on Identi.ca" target="_blank"><img src="/img/identica.png" /></a>""" % \ - (article.article_title, article.article_link) + (article["article_title"], article["article_link"]) + # on Hacker News + html += """\n\n<a href='javascript:window.location="http://news.ycombinator.com/submitlink?u="+encodeURIComponent("%s")+"&t="+encodeURIComponent("%s")'><img src="/img/hacker-news.png" title="Share on Hacker News" /></a>""" % \ + (article["article_link"], article["article_title"]) + # on Pinboard html += """\n\n\t<a href="https://api.pinboard.in/v1/posts/add?url=%s&description=%s" rel="noreferrer" target="_blank">\n <img src="/img/pinboard.png" title="Share on Pinboard" /></a>""" % \ - (article.article_link, article.article_title) + (article["article_link"], article["article_title"]) # on Digg html += """\n\n\t<a href="http://digg.com/submit?url=%s&title=%s" rel="noreferrer" target="_blank">\n <img src="/img/digg.png" title="Share on Digg" /></a>""" % \ - (article.article_link, article.article_title) + (article["article_link"], article["article_title"]) # on reddit html += """\n\n\t<a href="http://reddit.com/submit?url=%s&title=%s" rel="noreferrer" target="_blank">\n <img src="/img/reddit.png" title="Share on reddit" /></a>""" % \ - (article.article_link, article.article_title) + (article["article_link"], article["article_title"]) # on Scoopeo html += """\n\n\t<a href="http://scoopeo.com/scoop/new?newurl=%s&title=%s" rel="noreferrer" target="_blank">\n <img src="/img/scoopeo.png" title="Share on Scoopeo" /></a>""" % \ - (article.article_link, article.article_title) + (article["article_link"], article["article_title"]) # on Blogmarks html += """\n\n\t<a href="http://blogmarks.net/my/new.php?url=%s&title=%s" rel="noreferrer" target="_blank">\n <img src="/img/blogmarks.png" title="Share on Blogmarks" /></a>""" % \ - (article.article_link, article.article_title) + (article["article_link"], article["article_title"]) # Google +1 button html += """\n\n<g:plusone size="standard" count="true" href="%s"></g:plusone>""" % \ - (article.article_link,) + (article["article_link"],) # QRCode (for smartphone) @@ -550,76 +576,77 @@ class Root: favourite articles for the current feed. """ try: - feed = self.feeds[feed_id] + feed = self.mongo.get_collection(feed_id) + articles = self.mongo.get_articles_from_collection(feed_id) except KeyError: return self.error_page("This feed do not exists.") html = htmlheader() html += htmlnav html += """<div class="left inner">""" - html += "<p>The feed <b>" + feed.feed_title + "</b> contains <b>" + str(feed.nb_articles) + "</b> articles. " - html += "Representing " + str((round(float(feed.nb_articles) / self.nb_articles, 4)) * 100) + " % of the total " - html += "(" + str(self.nb_articles) + ").</p>" - if feed.articles.values() != []: - html += "<p>" + (feed.nb_unread_articles == 0 and ["All articles are read"] or [str(feed.nb_unread_articles) + \ - " unread article" + (feed.nb_unread_articles == 1 and [""] or ["s"])[0]])[0] + ".</p>" - if feed.mail == "1": + html += "<p>The feed <b>" + feed["feed_title"] + "</b> contains <b>" + str(self.mongo.nb_articles(feed_id)) + "</b> articles. " + html += "Representing " + str((round(float(self.mongo.nb_articles(feed_id)) / 1000, 4)) * 100) + " % of the total " #hack + html += "(" + str(1000) + ").</p>" + if articles != []: + html += "<p>" + (self.mongo.nb_unread_articles(feed_id) == 0 and ["All articles are read"] or [str(self.mongo.nb_unread_articles(feed_id)) + \ + " unread article" + (self.mongo.nb_unread_articles(feed_id) == 1 and [""] or ["s"])[0]])[0] + ".</p>" + if feed["mail"] == True: html += """<p>You are receiving articles from this feed to the address: <a href="mail:%s">%s</a>. """ % \ (utils.mail_to, utils.mail_to) html += """<a href="/mail_notification/0:%s">Stop</a> receiving articles from this feed.</p>""" % \ - (feed.feed_id, ) + (feed[feed_id], ) - if feed.articles.values() != []: - last_article = utils.string_to_datetime(feed.articles.values()[0].article_date) - first_article = utils.string_to_datetime(feed.articles.values()[-1].article_date) + if articles != []: + last_article = utils.string_to_datetime(str(articles[0]["article_date"])) + first_article = utils.string_to_datetime(str(articles[self.mongo.nb_articles(feed_id)-2]["article_date"])) delta = last_article - first_article delta_today = datetime.datetime.fromordinal(datetime.date.today().toordinal()) - last_article html += "<p>The last article was posted " + str(abs(delta_today.days)) + " day(s) ago.</p>" if delta.days > 0: - html += """<p>Daily average: %s,""" % (str(round(float(feed.nb_articles)/abs(delta.days), 2)),) + html += """<p>Daily average: %s,""" % (str(round(float(self.mongo.nb_articles(feed_id))/abs(delta.days), 2)),) html += """ between the %s and the %s.</p>\n""" % \ - (feed.articles.values()[-1].article_date[:10], feed.articles.values()[0].article_date[:10]) + (str(articles[self.mongo.nb_articles(feed_id)-2]["article_date"])[:10], str(articles[0]["article_date"])[:10]) html += "<br /><h1>Recent articles</h1>" - for article in feed.articles.values()[:10]: - if article.article_readed == "0": + for article in articles[:10]: + if article["article_readed"] == False: # not readed articles are in bold not_read_begin, not_read_end = "<b>", "</b>" else: not_read_begin, not_read_end = "", "" # display a heart for faved articles - if article.like == "1": + if article["article_like"] == True: like = """ <img src="/img/heart.png" title="I like this article!" />""" else: like = "" # Descrition for the CSS ToolTips - article_content = utils.clear_string(article.article_description) + article_content = utils.clear_string(article["article_content"]) if article_content: description = " ".join(article_content[:500].split(' ')[:-1]) else: description = "No description." # Title of the article - article_title = article.article_title + article_title = article["article_title"] if len(article_title) >= 110: article_title = article_title[:110] + " ..." # a description line per article (date, title of the article and # CSS description tooltips on mouse over) - html += article.article_date + " - " + \ + html += str(article["article_date"]) + " - " + \ """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \ - (feed.feed_id, article.article_id, not_read_begin, \ + (feed["feed_id"], article["article_id"], not_read_begin, \ article_title, not_read_end, description) + like + "<br />\n" html += "<br />\n" - html += """<a href="/articles/%s">All articles</a> """ % (feed.feed_id,) + html += """<a href="/articles/%s">All articles</a> """ % (feed["feed_id"],) - favs = [article for article in feed.articles.values() if article.like == "1"] + favs = [article for article in articles if article["article_like"] == True] if len(favs) != 0: html += "<br /></br /><h1>Your favorites articles for this feed</h1>" for article in favs: - if article.like == "1": + if article["like"] == True: # descrition for the CSS ToolTips - article_content = utils.clear_string(article.article_description) + article_content = utils.clear_string(article["article_content"]) if article_content: description = " ".join(article_content[:500].split(' ')[:-1]) else: @@ -627,9 +654,9 @@ class Root: # a description line per article (date, title of the article and # CSS description tooltips on mouse over) - html += article.article_date + " - " + \ + html += str(article["article_date"]) + " - " + \ """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s<span class="classic">%s</span></a><br />\n""" % \ - (feed.feed_id, article.article_id, article.article_title[:150], description) + (feed["feed_id"], article["article_id"], article["article_title"][:150], description) # This section enables the user to edit informations about @@ -642,25 +669,24 @@ class Root: '<input type="text" name="new_feed_name" value="" ' + \ 'placeholder="Enter a new name (then press Enter)." maxlength=2048 autocomplete="on" size="50" />' + \ """<input type="hidden" name="feed_url" value="%s" /></form>\n""" % \ - (feed.feed_link,) + (feed["feed_link"],) html += '\n\n<form method=post action="/change_feed_url/">' + \ '<input type="url" name="new_feed_url" value="" ' + \ 'placeholder="Enter a new URL in order to retrieve articles (then press Enter)." maxlength=2048 autocomplete="on" size="50" />' + \ """<input type="hidden" name="old_feed_url" value="%s" /></form>\n""" % \ - (feed.feed_link,) + (feed["feed_link"],) html += '\n\n<form method=post action="/change_feed_logo/">' + \ '<input type="url" name="new_feed_logo" value="" ' + \ 'placeholder="Enter the URL of the logo (then press Enter)." maxlength=2048 autocomplete="on" size="50" />' + \ """<input type="hidden" name="feed_url" value="%s" /></form>\n""" % \ - (feed.feed_link,) + (feed["feed_link"],) dic = {} - dic[feed.feed_id] = self.feeds[feed.feed_id] - top_words = utils.top_words(dic, n=50, size=int(word_size)) + top_words = utils.top_words(articles = self.mongo.get_articles_from_collection(feed_id), n=50, size=int(word_size)) html += "</br /><h1>Tag cloud</h1>\n<br />\n" # Tags cloud html += 'Minimum size of a word:' - html += """<form method=get action="/feed/%s">""" % (feed.feed_id,) + html += """<form method=get action="/feed/%s">""" % (feed["feed_id"],) html += """<input type="number" name="word_size" value="%s" min="2" max="15" step="1" size="2">""" % (word_size,) html += '<input type="submit" value="OK"></form>\n' html += '<div style="width: 35%; overflow:hidden; text-align: justify">' + \ @@ -679,34 +705,35 @@ class Root: This page displays all articles of a feed. """ try: - feed = self.feeds[feed_id] + feed = self.mongo.get_collection(feed_id) + articles = self.mongo.get_articles_from_collection(feed_id) except KeyError: return self.error_page("This feed do not exists.") html = htmlheader() html += htmlnav html += """<div class="right inner">\n""" html += """<a href="/mark_as_read/Feed:%s">Mark all articles from this feed as read</a>""" % (feed_id,) - html += """<br />\n<form method=get action="/q/%s"><input type="search" name="querystring" value="" placeholder="Search this feed" maxlength=2048 autocomplete="on"></form>\n""" % ("Feed:"+feed_id,) + html += """<br />\n<form method=get action="/search/%s"><input type="search" name="query" value="" placeholder="Search this feed" maxlength=2048 autocomplete="on"></form>\n""" % ("Feed:"+feed_id,) html += "<hr />\n" html += self.create_list_of_feeds() html += """</div> <div class="left inner">""" - html += """<h1>Articles of the feed <i>%s</i></h1><br />""" % (feed.feed_title) + html += """<h1>Articles of the feed <i>%s</i></h1><br />""" % (feed["feed_title"],) - for article in feed.articles.values(): + for article in articles: - if article.article_readed == "0": + if article["article_readed"] == False: # not readed articles are in bold not_read_begin, not_read_end = "<b>", "</b>" else: not_read_begin, not_read_end = "", "" - if article.like == "1": + if article["article_like"] == True: like = """ <img src="/img/heart.png" title="I like this article!" />""" else: like = "" # descrition for the CSS ToolTips - article_content = utils.clear_string(article.article_description) + article_content = utils.clear_string(article["article_content"]) if article_content: description = " ".join(article_content[:500].split(' ')[:-1]) else: @@ -714,10 +741,10 @@ class Root: # a description line per article (date, title of the article and # CSS description tooltips on mouse over) - html += article.article_date + " - " + \ + html += str(article["article_date"]) + " - " + \ """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \ - (feed.feed_id, article.article_id, not_read_begin, \ - article.article_title[:150], not_read_end, description) + like + "<br />\n" + (feed_id, article["article_id"], not_read_begin, \ + article["article_title"][:150], not_read_end, description) + like + "<br />\n" html += """\n<h4><a href="/">All feeds</a></h4>""" html += "<hr />\n" @@ -731,52 +758,30 @@ class Root: """ This page displays all unread articles of a feed. """ + feeds = self.mongo.get_all_collections() html = htmlheader() html += htmlnav html += """<div class="left inner">""" - if self.nb_unread_articles != 0: + if self.mongo.nb_unread_articles() != 0: + + # List unread articles of all the database if feed_id == "": html += "<h1>Unread article(s)</h1>" html += """\n<br />\n<a href="/mark_as_read/">Mark articles as read</a>\n<hr />\n""" - for feed in self.feeds.values(): + for feed in feeds: new_feed_section = True nb_unread = 0 - for article in feed.articles.values(): - if article.article_readed == "0": - nb_unread += 1 - if new_feed_section is True: - new_feed_section = False - html += """<h2><a name="%s"><a href="%s" rel="noreferrer" target="_blank">%s</a></a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \ - (feed.feed_id, feed.feed_site_link, feed.feed_title, feed.feed_link, feed.feed_image) - - # descrition for the CSS ToolTips - article_content = utils.clear_string(article.article_description) - if article_content: - description = " ".join(article_content[:500].split(' ')[:-1]) - else: - description = "No description." - - # a description line per article (date, title of the article and - # CSS description tooltips on mouse over) - html += article.article_date + " - " + \ - """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s<span class="classic">%s</span></a><br />\n""" % \ - (feed.feed_id, article.article_id, article.article_title[:150], description) - - if nb_unread == feed.nb_unread_articles: - html += """<br />\n<a href="/mark_as_read/Feed:%s">Mark all articles from this feed as read</a>\n""" % \ - (feed.feed_id,) - html += """<hr />\n<a href="/mark_as_read/">Mark articles as read</a>\n""" - else: - try: - feed = self.feeds[feed_id] - except: - self.error_page("This feed do not exists.") - html += """<h1>Unread article(s) of the feed <a href="/articles/%s">%s</a></h1> - <br />""" % (feed.feed_id, feed.feed_title) - for article in feed.articles.values(): - if article.article_readed == "0": + + # For all unread article of the current feed. + for article in self.mongo.get_articles_from_collection(feed["feed_id"], condition=("article_readed", False)): + nb_unread += 1 + if new_feed_section is True: + new_feed_section = False + html += """<h2><a name="%s"><a href="%s" rel="noreferrer" target="_blank">%s</a></a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \ + (feed["feed_id"], feed["site_link"], feed["feed_title"], feed["feed_link"], feed["feed_image"]) + # descrition for the CSS ToolTips - article_content = utils.clear_string(article.article_description) + article_content = utils.clear_string(article["article_content"]) if article_content: description = " ".join(article_content[:500].split(' ')[:-1]) else: @@ -784,11 +789,41 @@ class Root: # a description line per article (date, title of the article and # CSS description tooltips on mouse over) - html += article.article_date + " - " + \ + html += str(article["article_date"]) + " - " + \ """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s<span class="classic">%s</span></a><br />\n""" % \ - (feed.feed_id, article.article_id, article.article_title[:150], description) + (feed["feed_id"], article["article_id"], article["article_title"][:150], description) + + if nb_unread == self.mongo.nb_unread_articles(feed["feed_id"]): + html += """<br />\n<a href="/mark_as_read/Feed:%s">Mark all articles from this feed as read</a>\n""" % \ + (feed["feed_id"],) + html += """<hr />\n<a href="/mark_as_read/">Mark articles as read</a>\n""" + + # List unread articles of a feed + else: + try: + feed = self.mongo.get_collection(feed_id) + except: + self.error_page("This feed do not exists.") + html += """<h1>Unread article(s) of the feed <a href="/articles/%s">%s</a></h1> + <br />""" % (feed_id, feed["feed_title"]) + + # For all unread article of the feed. + for article in self.mongo.get_articles_from_collection(feed_id, condition=("article_readed", False)): + # descrition for the CSS ToolTips + article_content = utils.clear_string(article["article_content"]) + if article_content: + description = " ".join(article_content[:500].split(' ')[:-1]) + else: + description = "No description." + + # a description line per article (date, title of the article and + # CSS description tooltips on mouse over) + html += str(article["article_date"]) + " - " + \ + """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s<span class="classic">%s</span></a><br />\n""" % \ + (feed_id, article["article_id"], article["article_title"][:150], description) - html += """<hr />\n<a href="/mark_as_read/Feed:%s">Mark all as read</a>""" % (feed.feed_id,) + html += """<hr />\n<a href="/mark_as_read/Feed:%s">Mark all as read</a>""" % (feed_id,) + # No unread article else: html += '<h1>No unread article(s)</h1>\n<br />\n<a href="/fetch/">Why not check for news?</a>' html += """\n<h4><a href="/">All feeds</a></h4>""" @@ -799,65 +834,66 @@ class Root: unread.exposed = True - def history(self, querystring="all", m=""): + def history(self, query="all", m=""): """ This page enables to browse articles chronologically. """ + feeds = self.mongo.get_all_collections() html = htmlheader() html += htmlnav html += """<div class="left inner">\n""" - # Get the date from the tag cloud - # Format: /history/?querystring=year:2011-month:06 to get the + # Get the date from the tag cloud + # Format: /history/?query=year:2011-month:06 to get the # list of articles of June, 2011. if m != "": - querystring = """year:%s-month:%s""" % tuple(m.split('-')) + query = """year:%s-month:%s""" % tuple(m.split('-')) - if querystring == "all": + if query == "all": html += "<h1>Search with tags cloud</h1>\n" html += "<h4>Choose a year</h4></br >\n" - if "year" in querystring: - the_year = querystring.split('-')[0].split(':')[1] - if "month" not in querystring: + if "year" in query: + the_year = query.split('-')[0].split(':')[1] + if "month" not in query: html += "<h1>Choose a month for " + the_year + "</h1></br >\n" - if "month" in querystring: - the_month = querystring.split('-')[1].split(':')[1] + if "month" in query: + the_month = query.split('-')[1].split(':')[1] html += "<h1>Articles of "+ calendar.month_name[int(the_month)] + \ ", "+ the_year +".</h1><br />\n" timeline = Counter() - for feed in self.feeds.values(): + for feed in feeds: new_feed_section = True - for article in feed.articles.values(): + for article in self.mongo.get_articles_from_collection(feed["feed_id"]): - if querystring == "all": - timeline[article.article_date.split(' ')[0].split('-')[0]] += 1 + if query == "all": + timeline[str(article["article_date"]).split(' ')[0].split('-')[0]] += 1 - elif querystring[:4] == "year": + elif query[:4] == "year": - if article.article_date.split(' ')[0].split('-')[0] == the_year: - timeline[article.article_date.split(' ')[0].split('-')[1]] += 1 + if str(article["article_date"]).split(' ')[0].split('-')[0] == the_year: + timeline[str(article["article_date"]).split(' ')[0].split('-')[1]] += 1 - if "month" in querystring: - if article.article_date.split(' ')[0].split('-')[1] == the_month: - if article.article_readed == "0": + if "month" in query: + if str(article["article_date"]).split(' ')[0].split('-')[1] == the_month: + if article["article_readed"] == False: # not readed articles are in bold not_read_begin, not_read_end = "<b>", "</b>" else: not_read_begin, not_read_end = "", "" - if article.like == "1": + if article["article_like"] == True: like = """ <img src="/img/heart.png" title="I like this article!" />""" else: like = "" # Descrition for the CSS ToolTips - article_content = utils.clear_string(article.article_description) + article_content = utils.clear_string(article["article_content"]) if article_content: description = " ".join(article_content[:500].split(' ')[:-1]) else: description = "No description." # Title of the article - article_title = article.article_title + article_title = article["article_title"] if len(article_title) >= 110: article_title = article_title[:110] + " ..." @@ -866,20 +902,20 @@ class Root: html += """<h2><a name="%s"><a href="%s" rel="noreferrer" target="_blank">%s</a></a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \ - (feed.feed_id, feed.feed_site_link, feed.feed_title, feed.feed_link, feed.feed_image) + (feed["feed_id"], feed["feed_link"], feed["feed_title"], feed["feed_link"], feed["feed_image"]) - html += article.article_date.split(' ')[0][-2:] + " (" + \ - article.article_date.split(' ')[1] + ") - " + \ + html += str(article["article_date"]).split(' ')[0][-2:] + " (" + \ + str(article["article_date"]).split(' ')[1] + ") - " + \ """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \ - (feed.feed_id, article.article_id, not_read_begin, \ + (feed["feed_id"], article["article_id"], not_read_begin, \ article_title, not_read_end, description) + like + "<br />\n" - if querystring == "all": - query = "year" - elif "year" in querystring: - query = "year:" + the_year + "-month" - if "month" not in querystring: + if query == "all": + query_string = "year" + elif "year" in query: + query_string = "year:" + the_year + "-month" + if "month" not in query: html += '<div style="width: 35%; overflow:hidden; text-align: justify">' + \ - utils.tag_cloud([(elem, timeline[elem]) for elem in timeline.keys()], query) + '</div>' + utils.tag_cloud([(elem, timeline[elem]) for elem in timeline.keys()], query_string) + '</div>' html += '<br /><br /><h1>Search with a month+year picker</h1>\n' html += '<form>\n\t<input name="m" type="month">\n\t<input type="submit" value="Go">\n</form>' html += '<hr />' @@ -895,15 +931,16 @@ class Root: """ try: feed_id, article_id = target.split(':') - feed, article = self.feeds[feed_id], self.feeds[feed_id].articles[article_id] + feed = self.mongo.get_collection(feed_id) + article = self.mongo.get_article(feed_id, article_id) except: return self.error_page("Bad URL. This article do not exists.") html = htmlheader() html += htmlnav html += """<div class="left inner">""" html += """<h1><i>%s</i> from <a href="/articles/%s">%s</a></h1>\n<br />\n"""% \ - (article.article_title, feed_id, feed.feed_title) - description = utils.clear_string(article.article_description) + (article["article_title"], feed_id, feed["feed_title"]) + description = utils.clear_string(article["article_content"]) if description: html += description else: @@ -931,30 +968,19 @@ class Root: def mark_as_read(self, target=""): """ Mark one (or more) article(s) as read by setting the value of the field - 'article_readed' of the SQLite database to 1. + 'article_readed' of the MongoDB database to 'True'. """ param, _, identifiant = target.partition(':') - try: - LOCKER.acquire() - conn = sqlite3.connect(utils.sqlite_base, isolation_level = None) - c = conn.cursor() - # Mark all articles as read. - if param == "": - c.execute("UPDATE articles SET article_readed=1 WHERE article_readed='0'") - # Mark all articles from a feed as read. - elif param == "Feed" or param == "Feed_FromMainPage": - c.execute("UPDATE articles SET article_readed=1 WHERE article_readed='0' AND feed_link='" + \ - self.feeds[identifiant].feed_link + "'") - # Mark an article as read. - elif param == "Article": - c.execute("UPDATE articles SET article_readed=1 WHERE article_link='" + identifiant + "'") - conn.commit() - c.close() - except Exception: - self.error_page("Impossible to mark this article as read.") - finally: - LOCKER.release() + # Mark all articles as read. + if param == "": + self.mongo.mark_as_read(True, None, None) + # Mark all articles from a feed as read. + elif param == "Feed" or param == "Feed_FromMainPage": + self.mongo.mark_as_read(True, identifiant, None) + # Mark an article as read. + elif param == "Article": + self.mongo.mark_as_read(True, identifiant.split(':')[1], identifiant.split(':')[0]) if param == "" or param == "Feed_FromMainPage": return self.index() elif param == "Feed": @@ -970,11 +996,14 @@ class Root: html = htmlheader() html += htmlnav html += """<div class="left inner">""" - html += "<h1>You are receiving e-mails for the following feeds:</h1>\n" - for feed in self.feeds.values(): - if feed.mail == "1": + feeds = self.mongo.get_all_collections(condition=("mail",True)) + if feeds != []: + html += "<h1>You are receiving e-mails for the following feeds:</h1>\n" + for feed in feeds: html += """\t<a href="/articles/%s">%s</a> - <a href="/mail_notification/0:%s">Stop</a><br />\n""" % \ - (feed.feed_id, feed.feed_title, feed.feed_id) + (feed["feed_id"], feed["feed_title"], feed["feed_id"]) + else: + html += "<p>No active notifications.<p>\n" html += """<p>Notifications are sent to: <a href="mail:%s">%s</a></p>""" % \ (utils.mail_to, utils.mail_to) html += "\n<hr />\n" + htmlfooter @@ -992,15 +1021,7 @@ class Root: feed = self.feeds[feed_id] except: return self.error_page("Bad URL. This feed do not exists.") - conn = sqlite3.connect(utils.sqlite_base, isolation_level = None) - try: - c = conn.cursor() - c.execute("""UPDATE feeds SET mail=%s WHERE feed_site_link='%s'""" % (action, self.feeds[feed_id].feed_site_link)) - except: - return self.error_page("Error") - finally: - conn.commit() - c.close() + return self.index() mail_notification.exposed = True @@ -1011,19 +1032,11 @@ class Root: Mark or unmark an article as favorites. """ try: - action, feed_id, article_id = param.split(':') - article = self.feeds[feed_id].articles[article_id] + like, feed_id, article_id = param.split(':') + articles = self.mongo.get_article(feed_id, article_id) except: return self.error_page("Bad URL. This article do not exists.") - conn = sqlite3.connect(utils.sqlite_base, isolation_level = None) - try: - c = conn.cursor() - c.execute("""UPDATE articles SET like=%s WHERE article_link='%s'""" % (action, article.article_link)) - except Exception: - self.error_page("Impossible to like/dislike this article (database error).") - finally: - conn.commit() - c.close() + self.mongo.like_article("1"==like, feed_id, article_id) return self.article(feed_id+":"+article_id) like.exposed = True @@ -1033,21 +1046,22 @@ class Root: """ List of favorites articles """ + feeds = self.mongo.get_all_collections() html = htmlheader() html += htmlnav html += """<div class="left inner">""" html += "<h1>Your favorites articles</h1>" - for feed in self.feeds.values(): + for feed in feeds: new_feed_section = True - for article in feed.articles.values(): - if article.like == "1": + for article in self.mongo.get_articles_from_collection(feed["feed_id"]): + if article["article_like"] == True: if new_feed_section is True: new_feed_section = False html += """<h2><a name="%s"><a href="%s" rel="noreferrer"target="_blank">%s</a></a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \ - (feed.feed_id, feed.feed_site_link, feed.feed_title, feed.feed_link, feed.feed_image) + (feed["feed_id"], feed["feed_link"], feed["feed_title"], feed["feed_link"], feed["feed_image"]) # descrition for the CSS ToolTips - article_content = utils.clear_string(article.article_description) + article_content = utils.clear_string(article["article_content"]) if article_content: description = " ".join(article_content[:500].split(' ')[:-1]) else: @@ -1055,9 +1069,9 @@ class Root: # a description line per article (date, title of the article and # CSS description tooltips on mouse over) - html += article.article_date + " - " + \ + html += str(article["article_date"]) + " - " + \ """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s<span class="classic">%s</span></a><br />\n""" % \ - (feed.feed_id, article.article_id, article.article_title[:150], description) + (feed["feed_id"], article["article_id"], article["article_title"][:150], description) html += "<hr />\n" html += htmlfooter return html @@ -1094,17 +1108,18 @@ class Root: def remove_feed(self, feed_id): """ - Remove a feed from the file feed.lst and from the SQLite base. + Remove a feed from the file feed.lst and from the MongoDB database. """ html = htmlheader() html += htmlnav html += """<div class="left inner">""" - try: - utils.remove_feed(self.feeds[feed_id].feed_link) - html += """<p>All articles from the feed <i>%s</i> are now removed from the base.</p><br />""" % \ - (self.feeds[feed_id].feed_title,) - except: - return self.error_page("This feed do not exists.") + + feed = self.mongo.get_collection(feed_id) + self.mongo.delete_feed(feed_id) + utils.remove_feed(feed["feed_link"]) + + html += """<p>All articles from the feed <i>%s</i> are now removed from the base.</p><br />""" % \ + (feed["feed_title"],) html += """<a href="/management/">Back to the management page.</a><br />\n""" html += "<hr />\n" html += htmlfooter @@ -1165,9 +1180,8 @@ class Root: the data base. """ if max_nb_articles < -1 or max_nb_articles == 0: - max_nb_articles = 1 + max_nb_articles = 1 utils.MAX_NB_ARTICLES = int(max_nb_articles) - self.update() return self.management() set_max_articles.exposed = True @@ -1179,18 +1193,10 @@ class Root: """ try: feed_id, article_id = param.split(':') - article = self.feeds[feed_id].articles[article_id] + self.mongo.delete_article(feed_id, article_id) except: return self.error_page("Bad URL. This article do not exists.") - try: - conn = sqlite3.connect(utils.sqlite_base, isolation_level = None) - c = conn.cursor() - c.execute("DELETE FROM articles WHERE article_link='" + article.article_link +"'") - except Exception, e: - return e - finally: - conn.commit() - c.close() + return self.index() delete_article.exposed = True @@ -1208,7 +1214,7 @@ class Root: def export(self, export_method): """ - Export articles currently loaded from the SQLite database with + Export articles currently loaded from the MongoDB database with the appropriate function of the 'export' module. """ try: @@ -1253,70 +1259,9 @@ class Root: epub.exposed = True - # - # Monitoring functions - # - def update(self, path=None, event = None): - """ - Synchronizes transient objects (dictionary of feed and articles) with the database. - Called when a changes in the database is detected. - """ - self.feeds, \ - self.nb_articles, self.nb_unread_articles, \ - self.nb_favorites, self.nb_mail_notifications = utils.load_feed() - if self.feeds != {}: - print "Base (%s) loaded" % utils.sqlite_base - else: - print "Base (%s) empty!" % utils.sqlite_base - - def watch_base(self): - """Monitor a file. - - Detect the changes in base of feeds. - When a change is detected, reload the base. - """ - mon = gamin.WatchMonitor() - time.sleep(10) - mon.watch_file(utils.sqlite_base, self.update) - ret = mon.event_pending() - try: - print "Watching %s" % utils.sqlite_base - while True: - ret = mon.event_pending() - if ret > 0: - print "The base of feeds (%s) has changed.\nReloading..." % utils.sqlite_base - ret = mon.handle_one_event() - time.sleep(1) - except KeyboardInterrupt: - pass - print "Stop watching", sqlite_base - mon.stop_watch(sqlite_base) - del mon - - def watch_base_classic(self): - """ - Monitor the base of feeds if the module gamin is not installed. - """ - time.sleep(10) - old_time = os.path.getmtime(utils.sqlite_base) - try: - print "Watching %s" % utils.sqlite_base - while True: - time.sleep(1) - # simple test (date of last modification: getmtime) - if os.path.getmtime(utils.sqlite_base) != old_time: - print "The base of feeds (%s) has changed.\nReloading..." % utils.sqlite_base - self.update() - old_time = os.path.getmtime(utils.sqlite_base) - except KeyboardInterrupt: - pass - print "Stop watching", utils.sqlite_base - - if __name__ == '__main__': # Point of entry in execution mode print "Launching pyAggr3g470r..." - LOCKER = threading.Lock() root = Root() root.favicon_ico = cherrypy.tools.staticfile.handler(filename=os.path.join(utils.path + "/img/favicon.png")) @@ -1324,22 +1269,4 @@ if __name__ == '__main__': cherrypy.config.update({'error_page.404': error_page_404}) _cp_config = {'request.error_response': handle_error} - if not os.path.isfile(utils.sqlite_base): - # create the SQLite base if not exists - print "Creating data base..." - utils.create_base() - # load the informations from base in memory - print "Loading informations from data base..." - root.update() - # launch the available base monitoring method (gamin or classic) - try: - import gamin - thread_watch_base = threading.Thread(None, root.watch_base, None, ()) - except: - print "The gamin module is not installed." - print "The base of feeds will be monitored with the simple method." - thread_watch_base = threading.Thread(None, root.watch_base_classic, None, ()) - thread_watch_base.setDaemon(True) - thread_watch_base.start() - - cherrypy.quickstart(root, "/" ,config=utils.path + "/cfg/cherrypy.cfg") + cherrypy.quickstart(root, "/" ,config=utils.path + "/cfg/cherrypy.cfg")
\ No newline at end of file diff --git a/sqlite2mongo.py b/sqlite2mongo.py new file mode 100644 index 00000000..52cd36fe --- /dev/null +++ b/sqlite2mongo.py @@ -0,0 +1,75 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +import hashlib +import sqlite3 + +import mongodb + +SQLITE_BASE = "./var/feed.db" + + +def sqlite2mongo(): + """ + Load feeds and articles in a dictionary. + """ + mongo = mongodb.Articles() + list_of_feeds = [] + list_of_articles = [] + + try: + conn = sqlite3.connect(SQLITE_BASE, isolation_level = None) + c = conn.cursor() + list_of_feeds = c.execute("SELECT * FROM feeds").fetchall() + except: + pass + + if list_of_feeds != []: + # Walk through the list of feeds + for feed in list_of_feeds: + try: + list_of_articles = c.execute(\ + "SELECT * FROM articles WHERE feed_link='" + \ + feed[2] + "'").fetchall() + except: + continue + sha1_hash = hashlib.sha1() + sha1_hash.update(feed[2].encode('utf-8')) + feed_id = sha1_hash.hexdigest() + + new_collection = {"feed_id" : feed_id.encode('utf-8'), \ + "type": 0, \ + "feed_image" : feed[3].encode('utf-8'), \ + "feed_title" : feed[0].encode('utf-8'), \ + "feed_link" : feed[2].encode('utf-8'), \ + "site_link" : feed[1].encode('utf-8'), \ + "mail" : feed[4]=="1"} + + mongo.add_collection(new_collection) + + if list_of_articles != []: + # Walk through the list of articles for the current feed. + articles = [] + for article in list_of_articles: + sha1_hash = hashlib.sha1() + sha1_hash.update(article[2].encode('utf-8')) + article_id = sha1_hash.hexdigest() + + article = {"article_id": article_id.encode('utf-8'), \ + "type":1, \ + "article_date": article[0].encode('utf-8'), \ + "article_link": article[2].encode('utf-8'), \ + "article_title": article[1].encode('utf-8'), \ + "article_content": article[3].encode('utf-8'), \ + "article_readed": article[4]=="1", \ + "article_like": article[6]=="1" \ + } + + articles.append(article) + + mongo.add_articles(articles, feed_id) + + c.close() + +if __name__ == "__main__": + sqlite2mongo()
\ No newline at end of file @@ -43,8 +43,6 @@ import calendar import unicodedata import htmlentitydefs -import articles - import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText @@ -72,15 +70,22 @@ try: except: config.read("./cfg/pyAggr3g470r.cfg-sample") path = os.path.abspath(".") -sqlite_base = os.path.abspath(config.get('global', 'sqlitebase')) + +MONGODB_ADDRESS = config.get('MongoDB', 'address') +MONGODB_PORT = int(config.get('MongoDB', 'port')) +MONGODB_USER = config.get('MongoDB', 'user') +MONGODB_PASSWORD = config.get('MongoDB', 'password') + MAX_NB_ARTICLES = int(config.get('global', 'max_nb_articles')) -DIASPORA_POD = config.get('misc', 'diaspora_pod') + mail_from = config.get('mail','mail_from') mail_to = config.get('mail','mail_to') smtp_server = config.get('mail','smtp') username = config.get('mail','username') password = config.get('mail','password') +DIASPORA_POD = config.get('misc', 'diaspora_pod') + # regular expression to chech URL url_finders = [ \ re.compile("([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}|(((news|telnet|nttp|file|http|ftp|https)://)|(www|ftp)[-A-Za-z0-9]*\\.)[-A-Za-z0-9\\.]+)(:[0-9]*)?/[-A-Za-z0-9_\\$\\.\\+\\!\\*\\(\\),;:@&=\\?/~\\#\\%]*[^]'\\.}>\\),\\\"]"), \ @@ -165,16 +170,15 @@ def normalize_filename(name): file_name = strip_accents(file_name, "utf-8") return os.path.normpath(file_name) -def top_words(feeds, n=10, size=5): +def top_words(articles, n=10, size=5): """ Return the n most frequent words in a list. """ words = Counter() wordre = re.compile(r'\b\w{%s,}\b' % size, re.I) - for feed in feeds.values(): - for article in feed.articles.values(): - for word in wordre.findall(clear_string(article.article_description)): - words[word.lower()] += 1 + for article in articles: + for word in wordre.findall(clear_string(article["article_content"])): + words[word.lower()] += 1 return words.most_common(n) def tag_cloud(tags, query="word_count"): @@ -184,15 +188,15 @@ def tag_cloud(tags, query="word_count"): tags.sort(key=operator.itemgetter(0)) if query == "word_count": # tags cloud from the management page - return ' '.join([('<font size=%d><a href="/q/?querystring=%s" title="Count: %s">%s</a></font>\n' % \ + return ' '.join([('<font size=%d><a href="/search/?query=%s" title="Count: %s">%s</a></font>\n' % \ (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), word, count, word)) \ for (word, count) in tags]) if query == "year": # tags cloud for the history - return ' '.join([('<font size=%d><a href="/history/?querystring=%s:%s" title="Count: %s">%s</a></font>\n' % \ + return ' '.join([('<font size=%d><a href="/history/?query=%s:%s" title="Count: %s">%s</a></font>\n' % \ (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), query, word, count, word)) \ for (word, count) in tags]) - return ' '.join([('<font size=%d><a href="/history/?querystring=%s:%s" title="Count: %s">%s</a></font>\n' % \ + return ' '.join([('<font size=%d><a href="/history/?query=%s:%s" title="Count: %s">%s</a></font>\n' % \ (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), query, word, count, calendar.month_name[int(word)])) \ for (word, count) in tags]) @@ -327,16 +331,6 @@ def remove_feed(feed_url): feeds.append(line.replace("\n", "")) with open("./var/feed.lst", "w") as f: f.write("\n".join(feeds) + "\n") - # Remove articles from this feed from the SQLite base. - try: - conn = sqlite3.connect(sqlite_base, isolation_level = None) - c = conn.cursor() - c.execute("DELETE FROM feeds WHERE feed_link='" + feed_url +"'") - c.execute("DELETE FROM articles WHERE feed_link='" + feed_url +"'") - conn.commit() - c.close() - except: - pass def search_feed(url): """ @@ -354,128 +348,4 @@ def search_feed(url): if url not in feed_link['href']: return urlparse.urljoin(url, feed_link['href']) return feed_link['href'] - return None - -def create_base(): - """ - Create the base if not exists. - """ - sqlite3.register_adapter(str, lambda s : s.decode('utf-8')) - conn = sqlite3.connect(sqlite_base, isolation_level = None) - c = conn.cursor() - # table of feeds - c.execute('''create table if not exists feeds - (feed_title text, feed_site_link text, \ - feed_link text PRIMARY KEY, feed_image_link text, - mail text)''') - # table of articles - c.execute('''create table if not exists articles - (article_date text, article_title text, \ - article_link text PRIMARY KEY, article_description text, \ - article_readed text, feed_link text, like text)''') - conn.commit() - c.close() - -def drop_base(): - """ - Delete all articles from the database. - """ - sqlite3.register_adapter(str, lambda s : s.decode('utf-8')) - conn = sqlite3.connect(sqlite_base, isolation_level = None) - c = conn.cursor() - c.execute('''DROP TABLE IF EXISTS feeds''') - c.execute('''DROP TABLE IF EXISTS articles''') - conn.commit() - c.close() - -def load_feed(): - """ - Load feeds and articles in a dictionary. - """ - LOCKER.acquire() - list_of_feeds = [] - list_of_articles = [] - try: - conn = sqlite3.connect(sqlite_base, isolation_level = None) - c = conn.cursor() - list_of_feeds = c.execute("SELECT * FROM feeds").fetchall() - except: - pass - - nb_articles = 0 - nb_unread_articles = 0 - nb_mail_notifications = 0 - nb_favorites = 0 - - # Contains the list of Feed object. - feeds = OrderedDict() - - if list_of_feeds != []: - # Case-insensitive sorting - tupleList = [(x[0].lower(), x) for x in list_of_feeds] - tupleList.sort(key=operator.itemgetter(0)) - - # Walk through the list of feeds - for feed in [x[1] for x in tupleList]: - try: - list_of_articles = c.execute(\ - "SELECT * FROM articles WHERE feed_link='" + \ - feed[2] + "'").fetchall() - except: - LOCKER.release() - continue - sha1_hash = hashlib.sha1() - sha1_hash.update(feed[2].encode('utf-8')) - feed_id = sha1_hash.hexdigest() - - # Current Feed object - feed_object = articles.Feed() - feed_object.feed_id = feed_id.encode('utf-8') - feed_object.feed_title = feed[0].encode('utf-8') - feed_object.feed_image = feed[3].encode('utf-8') - feed_object.feed_link = feed[2].encode('utf-8') - feed_object.feed_site_link = feed[1].encode('utf-8') - feed_object.mail = feed[4] - - if list_of_articles != []: - list_of_articles.sort(lambda x,y: compare(y[0], x[0])) - if MAX_NB_ARTICLES != -1: - list_of_articles = list_of_articles[:MAX_NB_ARTICLES] - # Walk through the list of articles for the current feed. - for article in list_of_articles: - sha1_hash = hashlib.sha1() - sha1_hash.update(article[2].encode('utf-8')) - article_id = sha1_hash.hexdigest() - - # Current Article object - article_object = articles.Article() - article_object.article_id = article_id.encode('utf-8') - article_object.article_date = article[0].encode('utf-8') - article_object.article_title = unescape(article[1]).encode('utf-8') - article_object.article_link = article[2].encode('utf-8') - article_object.article_description = unescape(article[3]).encode('utf-8') - article_object.article_readed = article[4] - article_object.like = article[6] - - feed_object.articles[article_id] = article_object - - # update the number of favorites articles - nb_favorites = nb_favorites + int(article[6]) - - - # informations about a feed - feed_object.nb_articles = len(feed_object.articles) - feed_object.nb_unread_articles = len([article for article in feed_object.articles.values() \ - if article.article_readed=="0"]) - - feeds[feed_id] = feed_object - - nb_articles += feed_object.nb_articles - nb_unread_articles += feed_object.nb_unread_articles - nb_mail_notifications += int(feed_object.mail) - - c.close() - LOCKER.release() - return (feeds, nb_articles, nb_unread_articles, nb_favorites, nb_mail_notifications) - LOCKER.release() - return (feeds, nb_articles, nb_unread_articles, nb_favorites, nb_mail_notifications) + return None
\ No newline at end of file |