diff options
author | cedricbonhomme <devnull@localhost> | 2010-09-08 23:32:05 +0200 |
---|---|---|
committer | cedricbonhomme <devnull@localhost> | 2010-09-08 23:32:05 +0200 |
commit | 2a5e7b5e92cc1f5015b029055e85806c10f85308 (patch) | |
tree | fd0d304eeb4914ba32f49b8260cdd61cc84c6d97 | |
parent | Improvement of the description of articles page. (diff) | |
download | newspipe-2a5e7b5e92cc1f5015b029055e85806c10f85308.tar.gz newspipe-2a5e7b5e92cc1f5015b029055e85806c10f85308.tar.bz2 newspipe-2a5e7b5e92cc1f5015b029055e85806c10f85308.zip |
Articles are now stored in the Python blist high performance data-structure. if blist module not present, simple lists are used.
-rwxr-xr-x | pyAggr3g470r.py | 13 | ||||
-rwxr-xr-x | utils.py | 17 | ||||
-rwxr-xr-x | var/feed.lst | 1 |
3 files changed, 15 insertions, 16 deletions
diff --git a/pyAggr3g470r.py b/pyAggr3g470r.py index 7666c819..b5169268 100755 --- a/pyAggr3g470r.py +++ b/pyAggr3g470r.py @@ -234,15 +234,10 @@ class Root: self.top_words = utils.top_words(self.articles, n=50, size=int(word_size)) html += "<h1>Statistics</h1>\n<br />\n" if "oice" not in utils.IMPORT_ERROR: - nb_french = 0 - nb_english = 0 + counter = Counter() for rss_feed_id in self.articles.keys(): for article in self.articles[rss_feed_id]: - if article[6] == 'french': - nb_french += 1 - elif article[6] == 'english': - nb_english += 1 - nb_other = self.nb_articles - nb_french - nb_english + counter[article[6]] += 1 html += "Minimum size of a word: " html += """<form method=get action="/management/"><select name="word_size">\n""" @@ -262,9 +257,9 @@ class Root: html += """<a href="http://pypi.python.org/pypi/oice.langdet/">oice.langdet</a>""" else: html += "<ul>\n" - for language in ['english', 'french', 'other']: + for language in ['english', 'french']: html += """\t<li>%s articles in <a href="/language/%s">%s</a></li>\n""" % \ - (locals()["nb_"+language], language, language) + (counter[language], language, language) html += "</ul>\n<br />" html += "<hr />\n" @@ -17,6 +17,12 @@ import operator import urlparse import calendar +try: + # for high performance on list + from blist import * +except: + pass + import smtplib from email.mime.text import MIMEText @@ -294,6 +300,7 @@ def load_feed(): feed_id = sha1_hash.hexdigest() if list_of_articles != []: + list_of_articles.sort(lambda x,y: compare(y[0], x[0])) for article in list_of_articles: sha1_hash.update(article[2].encode('utf-8')) article_id = sha1_hash.hexdigest() @@ -311,15 +318,13 @@ def load_feed(): article[2], article[3], article[4], language, article[6]] if feed_id not in articles: - articles[feed_id] = [article_list] + try: + articles[feed_id] = blist([article_list]) + except Exception: + articles[feed_id] = [article_list] else: articles[feed_id].append(article_list) - - # sort articles by date for each feeds - for rss_feed_id in articles.keys(): - articles[rss_feed_id].sort(lambda x,y: compare(y[1], x[1])) - feeds[feed_id] = (len(articles[feed_id]), \ len([article for article in articles[feed_id] \ if article[5]=="0"]), \ diff --git a/var/feed.lst b/var/feed.lst index 8e94d3a0..9e7f4ee7 100755 --- a/var/feed.lst +++ b/var/feed.lst @@ -27,5 +27,4 @@ http://www.maitre-eolas.fr/feed/atom http://linuxfr.org/backend/journaux/rss20.rss http://www.le-tigre.net/spip.php?page=backend http://www.schneier.com/blog/index.rdf -http://feeds.bbci.co.uk/news/rss.xml http://www.handcrafted-games.com/index.php?feed/atom |