#! /usr/local/bin/python #-*- coding: utf-8 -*- __author__ = "Cedric Bonhomme" __version__ = "$Revision: 0.8 $" __date__ = "$Date: 2010/02/23 $" __copyright__ = "Copyright (c) 2010 Cedric Bonhomme" __license__ = "GPLv3" import re import os import pylab import sqlite3 import hashlib import cherrypy import ConfigParser from datetime import datetime from string import punctuation from collections import defaultdict from cherrypy.lib.static import serve_file import feedgetter config = ConfigParser.RawConfigParser() config.read("./cfg/pyAggr3g470r.cfg") path = config.get('global','path') bindhost = "0.0.0.0" cherrypy.config.update({ 'server.socket_port': 12556, 'server.socket_host': bindhost}) path = { '/css/style.css': {'tools.staticfile.on': True, \ 'tools.staticfile.filename':path+'css/style.css'}, \ '/var/histogram.png':{'tools.staticfile.on': True, \ 'tools.staticfile.filename':path+'var/histogram.png'}} htmlheader = """\n\n\n\n pyAggr3g470r - RSS Feed Reader """ htmlfooter = """

This software is under GPLv3 license. You are welcome to copy, modify or redistribute the source code according to the GPLv3 license.

""" htmlnav = """

pyAggr3g470r - RSS Feed Reader

pyAggr3g470r (source code) """ class Root: def index(self): """ Main page containing the list of feeds and articles. """ self.dic, self.dic_info = self.load_feed() html = htmlheader html += htmlnav html += """
\n""" html += """Fetch all feeds\n
\n""" html += """Mark all articles as read\n
\n""" html += """Management of feed\n""" html += """
\n""" html += "
\n" html += """Your feeds (%s):
\n""" % len(self.dic.keys()) for rss_feed_id in self.dic.keys(): html += """%s (%s / %s)
\n""" % \ (rss_feed_id.encode('utf-8'), \ self.dic[rss_feed_id][0][5].encode('utf-8'), \ rss_feed_id, self.dic_info[rss_feed_id][1], \ self.dic_info[rss_feed_id][0]) html += """
\n
\n""" for rss_feed_id in self.dic.keys(): html += '

' + \ '' + \ self.dic[rss_feed_id][0][5].encode('utf-8') + "

\n" # The main page display only 10 articles by feeds. for article in self.dic[rss_feed_id][:10]: if article[7] == "0": # not readed articles are in bold not_read_begin = "" not_read_end = "" else: not_read_begin = "" not_read_end = "" html += article[1].encode('utf-8') + \ " - " + not_read_begin + \ """%s""" % \ (article[0].encode('utf-8'), article[2].encode('utf-8')) + \ not_read_end + \ "
\n" html += "
\n" html += """All articles""" % (rss_feed_id,) html += """ Mark all as read""" % (rss_feed_id,) if self.dic_info[rss_feed_id][1] != 0: html += """ Unread article(s) (%s)""" % (rss_feed_id, \ self.dic_info[rss_feed_id][1]) html += """

Top

""" html += "
\n" html += htmlfooter return html def management(self): """ """ self.dic, self.dic_info = self.load_feed() html = htmlheader html += htmlnav html += """
\n""" html += "

Add Feeds

\n" html += """
\n
\n""" html += "

Delete Feeds

\n" html += """
\n""" html += "
\n" html += """

The database contains a total of %s articles with %s unread articles.
""" % \ (sum([feed[0] for feed in self.dic_info.values()]), sum([feed[1] for feed in self.dic_info.values()])) html += """Database: %s.\n
Size: %s bytes.

\n""" % \ (os.path.abspath("./var/feed.db"), os.path.getsize("./var/feed.db")) html += """
\n
\n""" html += """
\n
\n""" html += "
\n" html += "

Statistics

\n" N = 10 words = {} article_content = "" for rss_feed_id in self.dic.keys(): for article in self.dic[rss_feed_id]: article_content += remove_html_tags(article[4].encode('utf-8')) words_gen = (word.strip(punctuation).lower() \ for word in article_content.split() \ if len(word) >= 5) words = defaultdict(int) for word in words_gen: words[word] += 1 top_words = sorted(words.iteritems(), key=lambda(word, count): (-count, word))[:N] html += "\n
" html += "
    \n" for word, frequency in top_words: html += """\t
  1. %s: %s
  2. \n""" % \ (word, word, frequency) html += "
\n
" create_histogram(top_words) html += """
""" html += "
\n" html += htmlfooter return html def q(self, querystring=None): """ Search for a feed. Simply search for the string 'querystring' in the description of the article. """ param, _, value = querystring.partition(':') feed_id = None if param == "Feed": feed_id, _, querystring = value.partition(':') html = htmlheader html += htmlnav html += """
""" html += """

Articles containing the string %s


""" % (querystring,) if feed_id is not None: for article in self.dic[rss_feed_id]: article_content = remove_html_tags(article[4].encode('utf-8') + article[2].encode('utf-8')) if querystring.lower() in article_content.lower(): if article[7] == "0": # not readed articles are in bold not_read_begin = "" not_read_end = "" else: not_read_begin = "" not_read_end = "" html += article[1].encode('utf-8') + \ " - " + not_read_begin + \ """%s""" % \ (article[0].encode('utf-8'), article[2].encode('utf-8')) + \ not_read_end else: for rss_feed_id in self.dic.keys(): for article in self.dic[rss_feed_id]: article_content = remove_html_tags(article[4].encode('utf-8') + article[2].encode('utf-8')) if querystring.lower() in article_content.lower(): if article[7] == "0": # not readed articles are in bold not_read_begin = "" not_read_end = "" else: not_read_begin = "" not_read_end = "" html += article[1].encode('utf-8') + \ " - " + not_read_begin + \ """%s""" % \ (article[0].encode('utf-8'), article[2].encode('utf-8')) + \ not_read_end + """ from %s
\n""" % \ (article[6].encode('utf-8'), article[5].encode('utf-8')) html += "
" html += htmlfooter return html def fetch(self): """ Fetch all feeds """ feed_getter = feedgetter.FeedGetter() feed_getter.retrieve_feed() return self.index() def description(self, article_id): """ Display the description of an article in a new Web page. """ html = htmlheader html += htmlnav html += """
""" for rss_feed_id in self.dic.keys(): for article in self.dic[rss_feed_id]: if article_id == article[0]: if article[7] == "0": self.mark_as_read("Article:"+article[3]) # update the database html += """

%s from %s


""" % \ (article[2].encode('utf-8'), rss_feed_id, article[5].encode('utf-8')) description = article[4].encode('utf-8') if description: html += description else: html += "No description available." html += """
\nComplete story\n""" % (article[3].encode('utf-8'),) html += "
\n" + htmlfooter return html def all_articles(self, feed_id): """ Display all articles of a feed ('feed_title'). """ html = htmlheader html += htmlnav html += """
\n""" html += """Mark all articles from this feed as read""" % (feed_id,) html += """
\n
\n""" % (feed_id,) html += "
\n" html += """Your feeds (%s):
\n""" % len(self.dic.keys()) for rss_feed_id in self.dic.keys(): html += """%s (%s / %s)
\n""" % \ (rss_feed_id.encode('utf-8'), \ self.dic[rss_feed_id][0][5].encode('utf-8'), \ rss_feed_id, self.dic_info[rss_feed_id][1], \ self.dic_info[rss_feed_id][0]) html += """
""" html += """

Articles of the feed %s


""" % (self.dic[feed_id][0][5].encode('utf-8')) for article in self.dic[feed_id]: if article[7] == "0": # not readed articles are in bold not_read_begin = "" not_read_end = "" else: not_read_begin = "" not_read_end = "" html += article[1].encode('utf-8') + \ " - " + not_read_begin + \ """%s""" % \ (article[0].encode('utf-8'), article[2].encode('utf-8')) + \ not_read_end + \ "
\n" html += """\n

All feeds

""" html += "
\n" html += htmlfooter return html def unread(self, feed_id): """ Display all unread articles of a feed ('feed_title'). """ html = htmlheader html += htmlnav html += """
""" html += """

Unread article(s) of the feed %s


""" % (feed_id, self.dic[feed_id][0][5].encode('utf-8')) for article in self.dic[feed_id]: if article[7] == "0": html += article[1].encode('utf-8') + \ """ - %s""" % \ (article[0].encode('utf-8'), article[2].encode('utf-8')) + \ "
\n" html += """
\nMark all as read""" % (feed_id,) html += """\n

All feeds

""" html += "
\n" html += htmlfooter return html def load_feed(self): """ Load feeds in a dictionary. """ list_of_articles = None try: conn = sqlite3.connect("./var/feed.db", isolation_level = None) c = conn.cursor() list_of_articles = c.execute("SELECT * FROM rss_feed").fetchall() c.close() except: pass # The key of dic is the id of the feed: # dic[feed_id] = (article_id, article_date, article_title, # article_link, article_description, feed_title, # feed_link, article_readed) # dic_info[feed_id] = (nb_article, nb_article_unreaded) dic, dic_info = {}, {} if list_of_articles is not None: for article in list_of_articles: sha256_hash = hashlib.sha256() sha256_hash.update(article[5].encode('utf-8')) feed_id = sha256_hash.hexdigest() sha256_hash.update(article[2].encode('utf-8')) article_id = sha256_hash.hexdigest() article_list = [article_id, article[0], article[1], \ article[2], article[3], article[4], article[5], article[6]] if feed_id not in dic: dic[feed_id] = [article_list] else: dic[feed_id].append(article_list) # sort articles by date for each feeds for feeds in dic.keys(): dic[feeds].sort(lambda x,y: compare(y[1], x[1])) for rss_feed_id in dic.keys(): dic_info[rss_feed_id] = (len(dic[rss_feed_id]), \ len([article for article in dic[rss_feed_id] \ if article[7]=="0"]) \ ) return (dic, dic_info) return (dic, dic_info) def mark_as_read(self, target): """ Mark one (or more) article(s) as read by setting the value of the field 'article_readed' of the SQLite database to 1. """ param, _, identifiant = target.partition(':') try: conn = sqlite3.connect("./var/feed.db", isolation_level = None) c = conn.cursor() # Mark all articles as read. if param == "All": c.execute("UPDATE rss_feed SET article_readed=1") # Mark all articles from a feed as read. elif param == "Feed" or param == "Feed_FromMainPage": c.execute("UPDATE rss_feed SET article_readed=1 WHERE feed_site_link='" + self.dic[identifiant][0][6] + "'") # Mark an article as read. elif param == "Article": c.execute("UPDATE rss_feed SET article_readed=1 WHERE article_link='" + identifiant + "'") conn.commit() c.close() except Exception, e: pass self.dic, self.dic_info = self.load_feed() if param == "All" or param == "Feed_FromMainPage": return self.index() elif param == "Feed": return self.all_articles(identifiant) index.exposed = True management.exposed = True fetch.exposed = True q.exposed = True description.exposed = True all_articles.exposed = True mark_as_read.exposed = True unread.exposed = True def remove_html_tags(data): """ Remove HTML tags for the search. """ p = re.compile(r'<[^<]*?/?>') return p.sub('', data) def create_histogram(words, file_name="./var/histogram.png"): """ Create a histogram. """ length = 10 ind = pylab.arange(length) # abscissa width = 0.35 # bars width w = [elem[0] for elem in words] count = [int(elem[1]) for elem in words] max_count = max(count) # maximal weight p = pylab.bar(ind, count, width, color='r') pylab.ylabel("Count") pylab.title("Most frequent words") pylab.xticks(ind + (width / 2), range(1, len(w)+1)) pylab.xlim(-width, len(ind)) # changing the ordinate scale according to the max. if max_count <= 100: pylab.ylim(0, max_count + 5) pylab.yticks(pylab.arange(0, max_count + 5, 5)) elif max_count <= 200: pylab.ylim(0, max_count + 10) pylab.yticks(pylab.arange(0, max_count + 10, 10)) elif max_count <= 600: pylab.ylim(0, max_count + 25) pylab.yticks(pylab.arange(0, max_count + 25, 25)) elif max_count <= 800: pylab.ylim(0, max_count + 50) pylab.yticks(pylab.arange(0, max_count + 50, 50)) pylab.savefig(file_name, dpi = 80) pylab.close() def compare(stringtime1, stringtime2): """ Compare two dates in the format 'yyyy-mm-dd hh:mm:ss'. """ date1, time1 = stringtime1.split(' ') date2, time2 = stringtime2.split(' ') year1, month1, day1 = date1.split('-') year2, month2, day2 = date2.split('-') hour1, minute1, second1 = time1.split(':') hour2, minute2, second2 = time2.split(':') datetime1 = datetime(year=int(year1), month=int(month1), day=int(day1), \ hour=int(hour1), minute=int(minute1), second=int(second1)) datetime2 = datetime(year=int(year2), month=int(month2), day=int(day2), \ hour=int(hour2), minute=int(minute2), second=int(second2)) if datetime1 < datetime2: return -1 elif datetime1 > datetime2: return 1 else: return 0 if __name__ == '__main__': # Point of entry in execution mode root = Root() cherrypy.quickstart(root, config=path)