From d71823201e196bd92c76c2e44db983e852921baa Mon Sep 17 00:00:00 2001 From: cedricbonhomme Date: Mon, 1 Mar 2010 09:23:16 +0100 Subject: Improvements of the detection of wrords (faster). --- feedgetter.py | 4 ++-- pyAggr3g470r.py | 64 ++++++++++++++++++++++++++++----------------------------- utils.py | 2 +- 3 files changed, 34 insertions(+), 36 deletions(-) diff --git a/feedgetter.py b/feedgetter.py index c66d7a0f..432d603e 100644 --- a/feedgetter.py +++ b/feedgetter.py @@ -4,8 +4,8 @@ from __future__ import with_statement __author__ = "Cedric Bonhomme" -__version__ = "$Revision: 0.6 $" -__date__ = "$Date: 2010/02/05 $" +__version__ = "$Revision: 0.7 $" +__date__ = "$Date: 2010/03/01 $" __copyright__ = "Copyright (c) 2010 Cedric Bonhomme" __license__ = "GPLv3" diff --git a/pyAggr3g470r.py b/pyAggr3g470r.py index e1251664..60087c2a 100644 --- a/pyAggr3g470r.py +++ b/pyAggr3g470r.py @@ -2,8 +2,8 @@ #-*- coding: utf-8 -*- __author__ = "Cedric Bonhomme" -__version__ = "$Revision: 0.8 $" -__date__ = "$Date: 2010/02/24 $" +__version__ = "$Revision: 0.9 $" +__date__ = "$Date: 2010/03/01 $" __copyright__ = "Copyright (c) 2010 Cedric Bonhomme" __license__ = "GPLv3" @@ -50,7 +50,7 @@ htmlheader = '\n' htmlfooter = '

This software is under GPLv3 license. You are welcome to copy, modify or' + \ - 'redistribute the source code according to the GPLv3 license.

\n' + \ + ' redistribute the source code according to the GPLv3 license.

\n' + \ '\n' htmlnav = '\n

pyAggr3g470r - RSS Feed Reader

\n\n""" html += "
\n" - + nb_articles = sum([feed[0] for feed in self.feeds.values()]) html += """

The database contains a total of %s article(s) with %s unread article(s).
""" % \ - (sum([feed[0] for feed in self.feeds.values()]), - sum([feed[1] for feed in self.feeds.values()])) + (nb_articles, sum([feed[1] for feed in self.feeds.values()])) html += """Database: %s.\n
Size: %s bytes.

\n""" % \ (os.path.abspath("./var/feed.db"), os.path.getsize("./var/feed.db")) @@ -165,38 +164,36 @@ class Root: html += "
\n" if self.articles: html += "

Statistics

\n" + top_words = utils.top_words(self.articles, 10) + utils.create_histogram(top_words) + + nb_french = 0 + nb_english = 0 + for rss_feed_id in self.articles.keys(): + for article in self.articles[rss_feed_id]: + if article[6] == 'french': + nb_french += 1 + elif article[6] == 'english': + nb_english += 1 + nb_other = nb_articles - nb_french - nb_english html += "\n\n
" + html += "

Words count

\n" html += "
    \n" for word, frequency in top_words: html += """\t
  1. %s: %s
  2. \n""" % \ (word, word, frequency) - html += "
\n
" - utils.create_histogram(top_words) + html += "\n" + html += "

Languages

\n" + html += "
    \n" + for language in ['english', 'french', 'other']: + html += """\t
  • %s articles in %s
  • \n""" % \ + (locals()["nb_"+language], language, language) + html += "
\n
" html += """
""" - nb_french = 0 - nb_english = 0 - nb_other = 0 - for rss_feed_id in self.articles.keys(): - for article in self.articles[rss_feed_id]: - if article[6] == 'french': - nb_french += 1 - elif article[6] == 'english': - nb_english += 1 - else: - nb_other +=1 - - html += "

Languages

\n" - html += "\n" html += "
\n" - html += htmlfooter return html @@ -296,7 +293,8 @@ class Root: else: html += "No description available." html += "
\n" - html += """This article is written in %s.""" % (article[6],) + html += """This article seems to be written in %s.""" % \ + (article[6], article[6]) html += """
Complete story\n
\n""" % \ (article[3].encode('utf-8'),) # Share this article: @@ -416,15 +414,15 @@ class Root: unread.exposed = True + def language(self, lang): """ + Display articles by language. """ html = htmlheader html += htmlnav html += """
""" - - html += """

Article(s) written in %s

""" % (lang,) - + html += """

Article(s) written in %s

\n
\n""" % (lang,) for rss_feed_id in self.articles.keys(): for article in self.articles[rss_feed_id]: if article[6] == lang: @@ -434,13 +432,13 @@ class Root: (article[0].encode('utf-8'), article[2].encode('utf-8'), \ self.feeds[rss_feed_id][5].encode('utf-8'), \ self.feeds[rss_feed_id][3].encode('utf-8')) - html += "
\n" html += htmlfooter return html language.exposed = True + def mark_as_read(self, target): """ Mark one (or more) article(s) as read by setting the value of the field diff --git a/utils.py b/utils.py index e3e209b2..c6bbcb0c 100644 --- a/utils.py +++ b/utils.py @@ -164,7 +164,7 @@ def load_feed(): language = detect_language(remove_html_tags(article[3][:80]).encode('utf-8') + \ remove_html_tags(article[1]).encode('utf-8')) else: - language = "other" + language = detect_language(remove_html_tags(article[1]).encode('utf-8')) article_list = [article_id, article[0], article[1], \ article[2], article[3], article[4], language] -- cgit