#! /usr/local/bin/python #-*- coding: utf-8 -*- __author__ = "Cedric Bonhomme" __version__ = "$Revision: 1.4 $" __date__ = "$Date: 2010/06/10 $" __copyright__ = "Copyright (c) 2010 Cedric Bonhomme" __license__ = "GPLv3" import os import time import sqlite3 import cherrypy import operator import threading from cherrypy.lib.static import serve_file import utils import feedgetter bindhost = "0.0.0.0" cherrypy.config.update({ 'server.socket_port': 12556, 'server.socket_host': bindhost}) path = {'/css/style.css': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'css/style.css'}, \ '/css/img/feed-icon-28x28.png': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'css/img/feed-icon-28x28.png'}, \ '/css/img/delicious.png': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'css/img/delicious.png'}, \ '/css/img/digg.png': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'css/img/digg.png'}, \ '/css/img/reddit.png': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'css/img/reddit.png'}, \ '/css/img/scoopeo.png': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'css/img/scoopeo.png'}, \ '/css/img/blogmarks.png': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'css/img/blogmarks.png'}, \ '/css/img/buzz.png': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'css/img/buzz.png'}, \ '/css/img/heart.png': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'css/img/heart.png'}, \ '/css/img/heart_open.png': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'css/img/heart_open.png'}, \ '/css/img/email.png': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'css/img/email.png'}, \ '/var/histogram.png':{'tools.staticfile.on': True, \ 'tools.staticfile.filename':utils.path+'var/histogram.png'}} htmlheader = '\n' + \ '' + \ '\n\tpyAggr3g470r - RSS Feed Reader\n' + \ '\t' + \ '\n\t\n' + \ '\n' htmlfooter = '

This software is under GPLv3 license. You are welcome to copy, modify or' + \ ' redistribute the source code according to the' + \ ' GPLv3 license.

\n' + \ '\n' htmlnav = '\n

pyAggr3g470r - RSS Feed Reader

\n' + \ 'pyAggr3g470r (source code)' class Root: def index(self): """ Main page containing the list of feeds and articles. """ html = htmlheader html += htmlnav html += self.create_right_menu() html += """
\n""" if self.articles: html += """\n""" % \ (sum([len([article for article in self.articles[feed_id] if article[7] == "1"]) \ for feed_id in self.feeds.keys()]),) html += """\n""" % \ (len([feed for feed in self.feeds.values() if feed[6] == "1"]),) for rss_feed_id in self.articles.keys(): html += """

%s

\n""" % \ (rss_feed_id, \ self.feeds[rss_feed_id][5].encode('utf-8'), \ self.feeds[rss_feed_id][3].encode('utf-8'), \ self.feeds[rss_feed_id][4].encode('utf-8'), \ self.feeds[rss_feed_id][2].encode('utf-8')) # The main page display only 10 articles by feeds. for article in self.articles[rss_feed_id][:10]: if article[5] == "0": # not readed articles are in bold not_read_begin = "" not_read_end = "" else: not_read_begin = "" not_read_end = "" if article[7] == "1": like = """ """ else: like = "" html += article[1].encode('utf-8') + \ " - " + not_read_begin + \ """%s""" % \ (rss_feed_id, article[0].encode('utf-8'), article[2].encode('utf-8')) + \ not_read_end + like + \ "
\n" html += "
\n" html += """All articles   """ % (rss_feed_id,) html += """  Mark all as read""" % (rss_feed_id,) if self.feeds[rss_feed_id][1] != 0: html += """     Unread article(s) (%s)""" % (rss_feed_id, \ self.feeds[rss_feed_id][1]) if self.feeds[rss_feed_id][6] == "0": html += """
\nStay tuned""" % (rss_feed_id,) else: html += """
\nStop staying tuned""" % (rss_feed_id,) html += """

Top

""" html += "
\n" html += htmlfooter return html index.exposed = True def create_right_menu(self): """ Create the right menu. """ html = """
\n""" html += """Management
\n""" html += """Fetch all feeds
\n""" html += """Mark articles as read\n""" html += """
\n""" html += "
\n" html += self.create_list_of_feeds() html += "
\n" return html def create_list_of_feeds(self): """ Create the list of feeds. """ html = """Your feeds (%s):
\n""" % len(self.articles.keys()) for rss_feed_id in self.articles.keys(): if self.feeds[rss_feed_id][1] != 0: # not readed articles are in bold not_read_begin = "" not_read_end = "" else: not_read_begin = "" not_read_end = "" html += """%s (%s%s%s / %s)
\n""" % \ (rss_feed_id.encode('utf-8'), \ self.feeds[rss_feed_id][3].encode('utf-8'), \ rss_feed_id, not_read_begin, \ self.feeds[rss_feed_id][1], not_read_end, \ self.feeds[rss_feed_id][0]) return html def management(self, word_size=6): """ Management of articles. """ html = htmlheader html += htmlnav html += """
\n""" html += "

Add Feeds

\n" html += """
\n
\n""" if self.articles: html += "

Delete Feeds

\n" html += """
\n""" html += """

Active e-mail notifications: %s

\n""" % \ (len([feed for feed in self.feeds.values() if feed[6] == "1"]),) html += """

You like %s article(s).

\n""" % \ (sum([len([article for article in self.articles[feed_id] if article[7] == "1"]) \ for feed_id in self.feeds.keys()]), ) html += "
\n" html += """

The database contains a total of %s article(s) with %s unread article(s).
""" % \ (self.nb_articles, sum([feed[1] for feed in self.feeds.values()])) html += """Database: %s.\n
Size: %s bytes.

\n""" % \ (os.path.abspath(utils.sqlite_base), os.path.getsize(utils.sqlite_base)) html += """
\n
\n""" html += """
\n
\n""" html += "

Export articles

" html += """
\n""" html += "
\n" if self.articles: self.top_words = utils.top_words(self.articles, n=50, size=int(word_size)) utils.create_histogram(self.top_words[:10]) html += "

Statistics

\n
\n" if "oice" not in utils.IMPORT_ERROR: nb_french = 0 nb_english = 0 for rss_feed_id in self.articles.keys(): for article in self.articles[rss_feed_id]: if article[6] == 'french': nb_french += 1 elif article[6] == 'english': nb_english += 1 nb_other = self.nb_articles - nb_french - nb_english html += "Minimum size of a word: " html += """
\n""" html += "\n" html += '" html += "\n\n
' html += "

Tag cloud

\n" html += '
' + \ utils.tag_cloud(self.top_words) + '
' html += "
" html += "

Words count

\n" html += "
    \n" for word, frequency in sorted(self.top_words, key=operator.itemgetter(1), reverse=True)[:10]: html += """\t
  1. %s: %s
  2. \n""" % \ (word, word, frequency) html += "
\n" html += "

Languages

\n" if "oice" in utils.IMPORT_ERROR: html += "Install the module " html += """oice.langdet""" html += "
" else: html += "
    \n" for language in ['english', 'french', 'other']: html += """\t
  • %s articles in %s
  • \n""" % \ (locals()["nb_"+language], language, language) html += "
\n
" html += """
\n
\n""" html += "
\n" html += htmlfooter return html management.exposed = True def q(self, querystring=None): """ Search for a feed. Simply search for the string 'querystring' in the description of the article. """ param, _, value = querystring.partition(':') feed_id = None if param == "Feed": feed_id, _, querystring = value.partition(':') html = htmlheader html += htmlnav html += """
""" html += """

Articles containing the string %s


""" % (querystring,) if feed_id is not None: for article in self.articles[rss_feed_id]: article_content = utils.remove_html_tags(article[4].encode('utf-8')) if not article_content: utils.remove_html_tags(article[2].encode('utf-8')) if querystring.lower() in article_content.lower(): if article[5] == "0": # not readed articles are in bold not_read_begin = "" not_read_end = "" else: not_read_begin = "" not_read_end = "" html += article[1].encode('utf-8') + \ " - " + not_read_begin + \ """%s""" % \ (feed_id, article[0].encode('utf-8'), article[2].encode('utf-8')) + \ not_read_end else: for rss_feed_id in self.articles.keys(): for article in self.articles[rss_feed_id]: article_content = utils.remove_html_tags(article[4].encode('utf-8')) if not article_content: utils.remove_html_tags(article[2].encode('utf-8')) if querystring.lower() in article_content.lower(): if article[5] == "0": # not readed articles are in bold not_read_begin = "" not_read_end = "" else: not_read_begin = "" not_read_end = "" html += article[1].encode('utf-8') + \ " - " + not_read_begin + \ """%s""" % \ (rss_feed_id, article[0].encode('utf-8'), article[2].encode('utf-8')) + \ not_read_end + """ from %s
\n""" % \ (self.feeds[rss_feed_id][5].encode('utf-8'), \ self.feeds[rss_feed_id][3].encode('utf-8')) html += "
" html += htmlfooter return html q.exposed = True def fetch(self): """ Fetch all feeds """ feed_getter = feedgetter.FeedGetter() feed_getter.retrieve_feed() #self.update() return self.index() fetch.exposed = True def description(self, param): """ Display the description of an article in a new Web page. """ try: feed_id, article_id = param.split(':') except: return self.error_page("Bad URL") try: articles_list = self.articles[feed_id] except KeyError: return self.error_page("This feed do not exists.") html = htmlheader html += htmlnav html += """
""" for article in articles_list: if article_id == article[0]: if article[5] == "0": self.mark_as_read("Article:"+article[3]) # update the database html += """

%s from %s

\n
\n""" % \ (article[2].encode('utf-8'), feed_id, \ self.feeds[feed_id][3].encode('utf-8')) if article[7] == "1": html += """""" % \ (feed_id, article_id) else: html += """""" % \ (feed_id, article_id) html += "

" description = article[4].encode('utf-8') if description: html += description else: html += "No description available." html += "\n
\n" html += """This article seems to be written in %s.\n""" % \ (article[6], article[6]) html += """
\nPlain text\n""" % \ (feed_id, article_id) html += """
\nComplete story\n
\n""" % \ (article[3].encode('utf-8'),) # Share this article: # on Buzz html += """\n    """ % \ (article[3].encode('utf-8'), article[2].encode('utf-8')) # on delicious html += """\n    """ % \ (article[3].encode('utf-8'), article[2].encode('utf-8')) # on Digg html += """\n    """ % \ (article[3].encode('utf-8'), article[2].encode('utf-8')) # on reddit html += """\n    """ % \ (article[3].encode('utf-8'), article[2].encode('utf-8')) # on Scoopeo html += """\n    """ % \ (article[3].encode('utf-8'), article[2].encode('utf-8')) # on Blogmarks html += """\n """ % \ (article[3].encode('utf-8'), article[2].encode('utf-8')) html += """
\n""" % \ (article[3].encode('utf-8'),) break html += "
\n" + htmlfooter return html description.exposed = True def all_articles(self, feed_id): """ Display all articles of a feed. """ try: articles_list = self.articles[feed_id] except KeyError: return self.error_page("This feed do not exists.") html = htmlheader html += htmlnav html += """
\n""" html += """Mark all articles from this feed as read""" % (feed_id,) html += """
\n
\n""" % (feed_id,) html += "
\n" html += self.create_list_of_feeds() html += """
""" html += """

Articles of the feed %s


""" % (self.feeds[feed_id][3].encode('utf-8')) for article in articles_list: if article[5] == "0": # not readed articles are in bold not_read_begin = "" not_read_end = "" else: not_read_begin = "" not_read_end = "" if article[7] == "1": like = """ """ else: like = "" html += article[1].encode('utf-8') + \ " - " + not_read_begin + \ """%s""" % \ (feed_id, article[0].encode('utf-8'), \ utils.remove_html_tags(article[2].encode('utf-8'))) + \ not_read_end + like + \ "
\n" html += """\n

All feeds

""" html += "
\n" html += htmlfooter return html all_articles.exposed = True def unread(self, feed_id): """ Display all unread articles of a feed. """ html = htmlheader html += htmlnav html += """
""" if feed_id == "All": html += "

Unread article(s)

" for rss_feed_id in self.feeds.keys(): for article in self.articles[rss_feed_id]: if article[5] == "0": html += article[1].encode('utf-8') + \ """ - %s from %s
\n""" % \ (rss_feed_id, article[0].encode('utf-8'), article[2].encode('utf-8'), \ self.feeds[rss_feed_id][5].encode('utf-8'), \ self.feeds[rss_feed_id][3].encode('utf-8')) html += """
\nMark articles as read\n""" else: try: articles_list = self.articles[feed_id] except KeyError: return self.error_page("This feed do not exists.") html += """

Unread article(s) of the feed %s


""" % (feed_id, self.feeds[feed_id][3].encode('utf-8')) for article in articles_list: if article[5] == "0": html += article[1].encode('utf-8') + \ """ - %s""" % \ (feed_id, article[0].encode('utf-8'), article[2].encode('utf-8')) + \ "
\n" html += """
\nMark all as read""" % (feed_id,) html += """\n

All feeds

""" html += "
\n" html += htmlfooter return html unread.exposed = True def language(self, lang): """ Display articles by language. """ if lang not in ['english', 'french', 'other']: return self.error_page('This language is not supported.') html = htmlheader html += htmlnav html += """
""" html += """

Article(s) written in %s

\n
\n""" % (lang,) if "oice" not in utils.IMPORT_ERROR: for rss_feed_id in self.articles.keys(): for article in self.articles[rss_feed_id]: if article[6] == lang: html += article[1].encode('utf-8') + \ """ - %s from %s
\n""" % \ (rss_feed_id, article[0].encode('utf-8'), article[2].encode('utf-8'), \ self.feeds[rss_feed_id][5].encode('utf-8'), \ self.feeds[rss_feed_id][3].encode('utf-8')) else: html += "Install the module " html += """oice.langdet""" html += "
\n" html += htmlfooter return html language.exposed = True def plain_text(self, target): """ Display an article in plain text (without HTML tags). """ try: feed_id, article_id = target.split(':') except: return self.error_page("This article do not exists.") try: articles_list = self.articles[feed_id] except KeyError: return self.error_page("This feed do not exists.") html = htmlheader html += htmlnav html += """
""" feed_id, article_id = target.split(':') for article in articles_list: if article_id == article[0]: html += """

%s from %s

\n
\n"""% \ (article[2].encode('utf-8'), feed_id, \ self.feeds[feed_id][3].encode('utf-8')) description = utils.remove_html_tags(article[4].encode('utf-8')) if description: html += description else: html += "No description available." html += "\n
\n" + htmlfooter return html plain_text.exposed = True def error_page(self, message): """ Display a message (bad feed id, bad article id, etc.) """ html = htmlheader html += htmlnav html += """
""" html += """%s""" % message html += "\n
\n" + htmlfooter return html error_page.exposed = True def mark_as_read(self, target): """ Mark one (or more) article(s) as read by setting the value of the field 'article_readed' of the SQLite database to 1. """ param, _, identifiant = target.partition(':') try: LOCKER.acquire() conn = sqlite3.connect(utils.sqlite_base, isolation_level = None) c = conn.cursor() # Mark all articles as read. if param == "All": c.execute("UPDATE articles SET article_readed=1") # Mark all articles from a feed as read. elif param == "Feed" or param == "Feed_FromMainPage": c.execute("UPDATE articles SET article_readed=1 WHERE feed_link='" + \ self.feeds[identifiant][4].encode('utf-8') + "'") # Mark an article as read. elif param == "Article": c.execute("UPDATE articles SET article_readed=1 WHERE article_link='" + \ identifiant + "'") conn.commit() c.close() except Exception: self.error_page("Impossible to mark this article as read (database error).") finally: LOCKER.release() if param == "All" or param == "Feed_FromMainPage": return self.index() elif param == "Feed": return self.all_articles(identifiant) mark_as_read.exposed = True def list_notification(self): """ List all active e-mail notifications. """ html = htmlheader html += htmlnav html += """
""" html += "

You are receiving e-mails for the following feeds:

\n" for rss_feed_id in self.feeds.keys(): if self.feeds[rss_feed_id][6] == "1": html += """\t%s - Stop
\n""" % \ (rss_feed_id, self.feeds[rss_feed_id][3].encode('utf-8'), rss_feed_id) html += """

Notifications are sent to: %s

""" % \ (utils.mail_to, utils.mail_to) html += "\n
\n" + htmlfooter return html list_notification.exposed = True def mail_notification(self, param): """ Enable or disable to notifications of news for a feed. """ try: action, feed_id = param.split(':') except: return self.error_page("Bad URL") if feed_id not in self.feeds.keys(): return self.error_page("This feed do not exists.") conn = sqlite3.connect(utils.sqlite_base, isolation_level = None) c = conn.cursor() if action == "start": try: c.execute("UPDATE feeds SET mail=1 WHERE feed_site_link='" + self.feeds[feed_id][5].encode('utf-8') + "'") except: return self.error_page("Error") else: try: c.execute("UPDATE feeds SET mail=0 WHERE feed_site_link='" + self.feeds[feed_id][5].encode('utf-8') + "'") except: return self.error_page("Error") conn.commit() c.close() return self.index() mail_notification.exposed = True def like(self, param): """ Mark or unmark an article as favorites. """ try: action, feed_id, article_id = param.split(':') except: return self.error_page("Bad URL") try: articles_list = self.articles[feed_id] except KeyError: return self.error_page("This feed do not exists.") for article in articles_list: if article_id == article[0]: try: conn = sqlite3.connect(utils.sqlite_base, isolation_level = None) c = conn.cursor() # Mark all articles as read. if action == "yes": c.execute("UPDATE articles SET like=1 WHERE article_link='" + \ article[3] + "'") if action == "no": c.execute("UPDATE articles SET like=0 WHERE article_link='" + \ article[3] + "'") conn.commit() c.close() except Exception: self.error_page("Impossible to like/dislike this article (database error).") break return self.description(feed_id+":"+article_id) like.exposed = True def list_favorites(self): """ List of favorites articles """ html = htmlheader html += htmlnav html += """
""" html += "

Your favorites articles

" for rss_feed_id in self.feeds.keys(): for article in self.articles[rss_feed_id]: if article[7] == "1": html += article[1].encode('utf-8') + \ """ - %s from %s
\n""" % \ (rss_feed_id, article[0].encode('utf-8'), article[2].encode('utf-8'), \ self.feeds[rss_feed_id][5].encode('utf-8'), \ self.feeds[rss_feed_id][3].encode('utf-8')) html += "
\n" html += htmlfooter return html list_favorites.exposed = True def add_feed(self, url): """ Add a new feed with the URL of a page. """ html = htmlheader html += htmlnav html += """
""" # search the feed in the HTML page with BeautifulSoup feed_url = utils.search_feed(url) # if a feed exists if feed_url is not None: result = utils.add_feed(feed_url) # if the feed is not already in the file feed.lst if result is False: html += "

You are already following this feed!

" else: html += """

Feed added. You can now fetch your feeds.

""" html += "
" html += """Back to the management page.
\n""" html += "
\n" html += htmlfooter return html add_feed.exposed = True def remove_feed(self, url): """ Remove a feed from the file fee.lst. """ html = htmlheader html += htmlnav html += """
""" utils.remove_feed(self.feeds[url][4]) html+= """

All articles from this feed are now removed from the base.


""" html += """Back to the management page.
\n""" html += "
\n" html += htmlfooter return html remove_feed.exposed = True def export(self, export_method): """ Export articles stored in the SQLite database in text files. """ for rss_feed_id in self.feeds.keys(): folder = utils.path + "var/export/" + self.feeds[rss_feed_id][3] folder = folder.replace(' ', '_') try: os.makedirs(folder) except OSError: return self.error_page(utils.path + "var/export/"+" already exists.\nYou should delete this folder.") for article in self.articles[rss_feed_id]: try: if export_method == "export_HTML": name = folder + "/" + article[1]+ ".html" f = open(name.replace(' ', '_'), "w") content = htmlheader content += "

" + article[2].encode('utf-8') + "


" content += article[4].encode('utf-8') content += "
\n" content += htmlfooter elif export_method == "export_TXT": name = folder + "/" + article[1] + ".txt" f = open(name.replace(' ', '_'), "w") content = "Title: " + article[2].encode('utf-8') + "\n\n\n" content += utils.remove_html_tags(article[4].encode('utf-8')) f.write(content) except IOError: pass finally: f.close() return self.management() export.exposed = True def update(self, path=None, event = None): """ Synchronizes transient objects with the database, computes the list of most frequent words and generates the histogram. Called when an article is marked as read or when new articles are fetched. """ self.articles, self.feeds = utils.load_feed() self.nb_articles = sum([feed[0] for feed in self.feeds.values()]) if self.articles != {}: self.top_words = utils.top_words(self.articles, 10, size=6) utils.create_histogram(self.top_words) print "Base (%s) loaded" % utils.sqlite_base else: print "Base (%s) empty!" % utils.sqlite_base def watch_base(self): """Monitor a file. Detect the changes in base of feeds. When a change is detected, reload the base. """ mon = gamin.WatchMonitor() time.sleep(10) mon.watch_file(utils.sqlite_base, self.update) ret = mon.event_pending() try: print "Watching %s" % utils.sqlite_base while True: ret = mon.event_pending() if ret > 0: print "The base of feeds (%s) has changed.\nReloading..." % utils.sqlite_base ret = mon.handle_one_event() time.sleep(1) except KeyboardInterrupt: pass print "Stop watching", sqlite_base mon.stop_watch(sqlite_base) del mon def watch_base_classic(self): """ Monitor the base of feeds if the module gamin is not installed. """ time.sleep(10) old_size = 0 try: print "Watching %s" % utils.sqlite_base while True: time.sleep(2) # very simple test if os.path.getsize(utils.sqlite_base) != old_size: print "The base of feeds (%s) has changed.\nReloading..." % utils.sqlite_base self.update() old_size = os.path.getsize(utils.sqlite_base) except KeyboardInterrupt: pass print "Stop watching", utils.sqlite_base if __name__ == '__main__': # Point of entry in execution mode LOCKER = threading.Lock() root = Root() if not os.path.isfile(utils.sqlite_base): # create the SQLite base if not exists utils.create_base() # load the informations from base in memory root.update() # launch the available base monitoring method (gamin or classic) try: import gamin thread_watch_base = threading.Thread(None, root.watch_base, None, ()) except: print "The gamin module is not installed." print "The base of feeds will be monitored with the simple method." thread_watch_base = threading.Thread(None, root.watch_base_classic, None, ()) thread_watch_base.setDaemon(True) thread_watch_base.start() cherrypy.quickstart(root, config=path)