diff options
author | cedricbonhomme <devnull@localhost> | 2010-10-18 09:01:28 +0200 |
---|---|---|
committer | cedricbonhomme <devnull@localhost> | 2010-10-18 09:01:28 +0200 |
commit | 5c60263e78be593e6df4ce83fd5bed85207b614f (patch) | |
tree | 3197222dd788d23b4fe1873d06665bd7ab392082 /utils.py | |
parent | Updated README. (diff) | |
download | newspipe-5c60263e78be593e6df4ce83fd5bed85207b614f.tar.gz newspipe-5c60263e78be593e6df4ce83fd5bed85207b614f.tar.bz2 newspipe-5c60263e78be593e6df4ce83fd5bed85207b614f.zip |
Improvements of the page of unread articles and optimization of the top_words() function.
Diffstat (limited to 'utils.py')
-rwxr-xr-x | utils.py | 10 |
1 files changed, 4 insertions, 6 deletions
@@ -161,15 +161,13 @@ def top_words(dic_articles, n=10, size=5): """ Return the n most frequent words in a list. """ - words_gen = [] + words = Counter() for rss_feed_id in dic_articles.keys(): for article in dic_articles[rss_feed_id]: - words_gen.extend([word.strip(punctuation).lower() \ + for good_word in [word.strip(punctuation).lower() \ for word in clear_string(article[4].encode('utf-8')).split() \ - if len(word.strip(punctuation)) >= size]) - words = Counter() - for word in words_gen: - words[word] += 1 + if len(word.strip(punctuation)) >= size]: + words[good_word] += 1 return words.most_common(n) def tag_cloud(tags, query="word_count"): |