From 3ee7e79c6e9f2569bebc34ad02c74b2e541fb2d7 Mon Sep 17 00:00:00 2001 From: cedricbonhomme Date: Wed, 24 Feb 2010 09:08:06 +0100 Subject: Added utils.py and lot of improvements. --- pyAggr3g470r.py | 189 +++++++++++--------------------------------------------- 1 file changed, 35 insertions(+), 154 deletions(-) (limited to 'pyAggr3g470r.py') diff --git a/pyAggr3g470r.py b/pyAggr3g470r.py index d72a6008..87be077b 100644 --- a/pyAggr3g470r.py +++ b/pyAggr3g470r.py @@ -7,19 +7,13 @@ __date__ = "$Date: 2010/02/23 $" __copyright__ = "Copyright (c) 2010 Cedric Bonhomme" __license__ = "GPLv3" -import re import os -import pylab -import sqlite3 -import hashlib import cherrypy import ConfigParser -from datetime import datetime -from string import punctuation -from collections import defaultdict from cherrypy.lib.static import serve_file +import utils import feedgetter config = ConfigParser.RawConfigParser() @@ -41,10 +35,10 @@ htmlheader = """pyAggr3g470r - RSS Feed Reader """ htmlfooter = """

This software is under GPLv3 license. You are welcome to copy, modify or - redistribute the source code according to the GPLv3 license.

- """ + redistribute the source code according to the GPLv3 license.

\n + \n""" -htmlnav = """

pyAggr3g470r - RSS Feed Reader

\n

pyAggr3g470r - RSS Feed Reader

\n pyAggr3g470r (source code) """ @@ -55,7 +49,7 @@ class Root: """ Main page containing the list of feeds and articles. """ - self.dic, self.dic_info = self.load_feed() + self.dic, self.dic_info = utils.load_feed() html = htmlheader html += htmlnav html += """
\n""" @@ -113,10 +107,13 @@ class Root: html += htmlfooter return html + index.exposed = True + + def management(self): """ """ - self.dic, self.dic_info = self.load_feed() + self.dic, self.dic_info = utils.load_feed() html = htmlheader html += htmlnav html += """
\n""" @@ -147,35 +144,24 @@ class Root: html += "
\n" html += "

Statistics

\n" - N = 10 - words = {} - article_content = "" - for rss_feed_id in self.dic.keys(): - for article in self.dic[rss_feed_id]: - article_content += remove_html_tags(article[4].encode('utf-8')) - - words_gen = (word.strip(punctuation).lower() \ - for word in article_content.split() \ - if len(word) >= 5) - words = defaultdict(int) - for word in words_gen: - words[word] += 1 + top_words = utils.top_words(self.dic, 10) - top_words = sorted(words.iteritems(), - key=lambda(word, count): (-count, word))[:N] html += "\n
" html += "
    \n" for word, frequency in top_words: html += """\t
  1. %s: %s
  2. \n""" % \ (word, word, frequency) html += "
\n
" - create_histogram(top_words) + utils.create_histogram(top_words) html += """
""" html += "
\n" html += htmlfooter return html + management.exposed = True + + def q(self, querystring=None): """ Search for a feed. Simply search for the string 'querystring' @@ -193,7 +179,7 @@ class Root: if feed_id is not None: for article in self.dic[rss_feed_id]: - article_content = remove_html_tags(article[4].encode('utf-8') + article[2].encode('utf-8')) + article_content = utils.remove_html_tags(article[4].encode('utf-8') + article[2].encode('utf-8')) if querystring.lower() in article_content.lower(): if article[7] == "0": # not readed articles are in bold @@ -211,7 +197,7 @@ class Root: else: for rss_feed_id in self.dic.keys(): for article in self.dic[rss_feed_id]: - article_content = remove_html_tags(article[4].encode('utf-8') + article[2].encode('utf-8')) + article_content = utils.remove_html_tags(article[4].encode('utf-8') + article[2].encode('utf-8')) if querystring.lower() in article_content.lower(): if article[7] == "0": # not readed articles are in bold @@ -231,6 +217,9 @@ class Root: html += htmlfooter return html + q.exposed = True + + def fetch(self): """ Fetch all feeds @@ -239,6 +228,9 @@ class Root: feed_getter.retrieve_feed() return self.index() + fetch.exposed = True + + def description(self, article_id): """ Display the description of an article in a new Web page. @@ -260,10 +252,16 @@ class Root: html += description else: html += "No description available." - html += """
\nComplete story\n""" % (article[3].encode('utf-8'),) + html += """
\nComplete story\n
\n""" % \ + (article[3].encode('utf-8'),) + html += """Delicious""" % \ + (article[3].encode('utf-8'),) html += "
\n" + htmlfooter return html + description.exposed = True + + def all_articles(self, feed_id): """ Display all articles of a feed ('feed_title'). @@ -309,6 +307,9 @@ class Root: html += htmlfooter return html + all_articles.exposed = True + + def unread(self, feed_id): """ Display all unread articles of a feed ('feed_title'). @@ -334,53 +335,8 @@ class Root: html += htmlfooter return html - def load_feed(self): - """ - Load feeds in a dictionary. - """ - list_of_articles = None - try: - conn = sqlite3.connect("./var/feed.db", isolation_level = None) - c = conn.cursor() - list_of_articles = c.execute("SELECT * FROM rss_feed").fetchall() - c.close() - except: - pass - - # The key of dic is the id of the feed: - # dic[feed_id] = (article_id, article_date, article_title, - # article_link, article_description, feed_title, - # feed_link, article_readed) - # dic_info[feed_id] = (nb_article, nb_article_unreaded) - dic, dic_info = {}, {} - if list_of_articles is not None: - for article in list_of_articles: - sha256_hash = hashlib.sha256() - sha256_hash.update(article[5].encode('utf-8')) - feed_id = sha256_hash.hexdigest() - sha256_hash.update(article[2].encode('utf-8')) - article_id = sha256_hash.hexdigest() - - article_list = [article_id, article[0], article[1], \ - article[2], article[3], article[4], article[5], article[6]] - - if feed_id not in dic: - dic[feed_id] = [article_list] - else: - dic[feed_id].append(article_list) - - # sort articles by date for each feeds - for feeds in dic.keys(): - dic[feeds].sort(lambda x,y: compare(y[1], x[1])) - - for rss_feed_id in dic.keys(): - dic_info[rss_feed_id] = (len(dic[rss_feed_id]), \ - len([article for article in dic[rss_feed_id] \ - if article[7]=="0"]) \ - ) + unread.exposed = True - return (dic, dic_info) - return (dic, dic_info) def mark_as_read(self, target): """ @@ -405,91 +361,16 @@ class Root: except Exception, e: pass - self.dic, self.dic_info = self.load_feed() + self.dic, self.dic_info = utils.load_feed() if param == "All" or param == "Feed_FromMainPage": return self.index() elif param == "Feed": return self.all_articles(identifiant) - index.exposed = True - management.exposed = True - fetch.exposed = True - q.exposed = True - description.exposed = True - all_articles.exposed = True mark_as_read.exposed = True - unread.exposed = True -def remove_html_tags(data): - """ - Remove HTML tags for the search. - """ - p = re.compile(r'<[^<]*?/?>') - return p.sub('', data) - -def create_histogram(words, file_name="./var/histogram.png"): - """ - Create a histogram. - """ - length = 10 - ind = pylab.arange(length) # abscissa - width = 0.35 # bars width - - w = [elem[0] for elem in words] - count = [int(elem[1]) for elem in words] - - max_count = max(count) # maximal weight - - p = pylab.bar(ind, count, width, color='r') - - pylab.ylabel("Count") - pylab.title("Most frequent words") - pylab.xticks(ind + (width / 2), range(1, len(w)+1)) - pylab.xlim(-width, len(ind)) - - # changing the ordinate scale according to the max. - if max_count <= 100: - pylab.ylim(0, max_count + 5) - pylab.yticks(pylab.arange(0, max_count + 5, 5)) - elif max_count <= 200: - pylab.ylim(0, max_count + 10) - pylab.yticks(pylab.arange(0, max_count + 10, 10)) - elif max_count <= 600: - pylab.ylim(0, max_count + 25) - pylab.yticks(pylab.arange(0, max_count + 25, 25)) - elif max_count <= 800: - pylab.ylim(0, max_count + 50) - pylab.yticks(pylab.arange(0, max_count + 50, 50)) - - pylab.savefig(file_name, dpi = 80) - pylab.close() - -def compare(stringtime1, stringtime2): - """ - Compare two dates in the format 'yyyy-mm-dd hh:mm:ss'. - """ - date1, time1 = stringtime1.split(' ') - date2, time2 = stringtime2.split(' ') - - year1, month1, day1 = date1.split('-') - year2, month2, day2 = date2.split('-') - - hour1, minute1, second1 = time1.split(':') - hour2, minute2, second2 = time2.split(':') - - datetime1 = datetime(year=int(year1), month=int(month1), day=int(day1), \ - hour=int(hour1), minute=int(minute1), second=int(second1)) - - datetime2 = datetime(year=int(year2), month=int(month2), day=int(day2), \ - hour=int(hour2), minute=int(minute2), second=int(second2)) - - if datetime1 < datetime2: - return -1 - elif datetime1 > datetime2: - return 1 - else: - return 0 + if __name__ == '__main__': -- cgit