Improvements of the page of unread articles and optimization of the top_words() function.

author: cedricbonhomme <devnull@localhost> 2010-10-18 09:01:28 +0200
committer: cedricbonhomme <devnull@localhost> 2010-10-18 09:01:28 +0200
commit: 5c60263e78be593e6df4ce83fd5bed85207b614f (patch)
tree: 3197222dd788d23b4fe1873d06665bd7ab392082 /utils.py
parent: Updated README. (diff)
download: newspipe-5c60263e78be593e6df4ce83fd5bed85207b614f.tar.gz
newspipe-5c60263e78be593e6df4ce83fd5bed85207b614f.tar.bz2
newspipe-5c60263e78be593e6df4ce83fd5bed85207b614f.zip
1 files changed, 4 insertions, 6 deletions
diff --git a/utils.py b/utils.py
index 1ff2a57e..18deff58 100755
--- a/utils.py
+++ b/utils.py
@@ -161,15 +161,13 @@ def top_words(dic_articles, n=10, size=5):
     """
     Return the n most frequent words in a list.
     """
-    words_gen = []
+    words = Counter()
     for rss_feed_id in dic_articles.keys():
         for article in dic_articles[rss_feed_id]:
-            words_gen.extend([word.strip(punctuation).lower() \
+            for good_word in [word.strip(punctuation).lower() \
                             for word in clear_string(article[4].encode('utf-8')).split() \
-                            if len(word.strip(punctuation)) >= size])
-    words = Counter()
-    for word in words_gen:
-        words[word] += 1
+                            if len(word.strip(punctuation)) >= size]:
+                words[good_word] += 1
     return words.most_common(n)
 
 def tag_cloud(tags, query="word_count"):
author	cedricbonhomme <devnull@localhost>	2010-10-18 09:01:28 +0200
committer	cedricbonhomme <devnull@localhost>	2010-10-18 09:01:28 +0200
commit	5c60263e78be593e6df4ce83fd5bed85207b614f (patch)
tree	3197222dd788d23b4fe1873d06665bd7ab392082 /utils.py
parent	Updated README. (diff)
download	newspipe-5c60263e78be593e6df4ce83fd5bed85207b614f.tar.gz newspipe-5c60263e78be593e6df4ce83fd5bed85207b614f.tar.bz2 newspipe-5c60263e78be593e6df4ce83fd5bed85207b614f.zip