aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcedricbonhomme <devnull@localhost>2010-09-08 23:32:05 +0200
committercedricbonhomme <devnull@localhost>2010-09-08 23:32:05 +0200
commit2a5e7b5e92cc1f5015b029055e85806c10f85308 (patch)
treefd0d304eeb4914ba32f49b8260cdd61cc84c6d97
parentImprovement of the description of articles page. (diff)
downloadnewspipe-2a5e7b5e92cc1f5015b029055e85806c10f85308.tar.gz
newspipe-2a5e7b5e92cc1f5015b029055e85806c10f85308.tar.bz2
newspipe-2a5e7b5e92cc1f5015b029055e85806c10f85308.zip
Articles are now stored in the Python blist high performance data-structure. if blist module not present, simple lists are used.
-rwxr-xr-xpyAggr3g470r.py13
-rwxr-xr-xutils.py17
-rwxr-xr-xvar/feed.lst1
3 files changed, 15 insertions, 16 deletions
diff --git a/pyAggr3g470r.py b/pyAggr3g470r.py
index 7666c819..b5169268 100755
--- a/pyAggr3g470r.py
+++ b/pyAggr3g470r.py
@@ -234,15 +234,10 @@ class Root:
self.top_words = utils.top_words(self.articles, n=50, size=int(word_size))
html += "<h1>Statistics</h1>\n<br />\n"
if "oice" not in utils.IMPORT_ERROR:
- nb_french = 0
- nb_english = 0
+ counter = Counter()
for rss_feed_id in self.articles.keys():
for article in self.articles[rss_feed_id]:
- if article[6] == 'french':
- nb_french += 1
- elif article[6] == 'english':
- nb_english += 1
- nb_other = self.nb_articles - nb_french - nb_english
+ counter[article[6]] += 1
html += "Minimum size of a word: "
html += """<form method=get action="/management/"><select name="word_size">\n"""
@@ -262,9 +257,9 @@ class Root:
html += """<a href="http://pypi.python.org/pypi/oice.langdet/">oice.langdet</a>"""
else:
html += "<ul>\n"
- for language in ['english', 'french', 'other']:
+ for language in ['english', 'french']:
html += """\t<li>%s articles in <a href="/language/%s">%s</a></li>\n""" % \
- (locals()["nb_"+language], language, language)
+ (counter[language], language, language)
html += "</ul>\n<br />"
html += "<hr />\n"
diff --git a/utils.py b/utils.py
index b27f56a5..7f6fd2b0 100755
--- a/utils.py
+++ b/utils.py
@@ -17,6 +17,12 @@ import operator
import urlparse
import calendar
+try:
+ # for high performance on list
+ from blist import *
+except:
+ pass
+
import smtplib
from email.mime.text import MIMEText
@@ -294,6 +300,7 @@ def load_feed():
feed_id = sha1_hash.hexdigest()
if list_of_articles != []:
+ list_of_articles.sort(lambda x,y: compare(y[0], x[0]))
for article in list_of_articles:
sha1_hash.update(article[2].encode('utf-8'))
article_id = sha1_hash.hexdigest()
@@ -311,15 +318,13 @@ def load_feed():
article[2], article[3], article[4], language, article[6]]
if feed_id not in articles:
- articles[feed_id] = [article_list]
+ try:
+ articles[feed_id] = blist([article_list])
+ except Exception:
+ articles[feed_id] = [article_list]
else:
articles[feed_id].append(article_list)
-
- # sort articles by date for each feeds
- for rss_feed_id in articles.keys():
- articles[rss_feed_id].sort(lambda x,y: compare(y[1], x[1]))
-
feeds[feed_id] = (len(articles[feed_id]), \
len([article for article in articles[feed_id] \
if article[5]=="0"]), \
diff --git a/var/feed.lst b/var/feed.lst
index 8e94d3a0..9e7f4ee7 100755
--- a/var/feed.lst
+++ b/var/feed.lst
@@ -27,5 +27,4 @@ http://www.maitre-eolas.fr/feed/atom
http://linuxfr.org/backend/journaux/rss20.rss
http://www.le-tigre.net/spip.php?page=backend
http://www.schneier.com/blog/index.rdf
-http://feeds.bbci.co.uk/news/rss.xml
http://www.handcrafted-games.com/index.php?feed/atom
bgstack15