Little performance optimization.

author: Cédric Bonhomme <kimble.mandel@gmail.com> 2012-11-28 12:56:14 +0100
committer: Cédric Bonhomme <kimble.mandel@gmail.com> 2012-11-28 12:56:14 +0100
commit: 8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d (patch)
tree: 1900a87194415e1b00fcc6a62c5328574744622c /source/utils.py
parent: Ignore stop words when calculating top words. (diff)
download: newspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.tar.gz
newspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.tar.bz2
newspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.zip
1 files changed, 2 insertions, 3 deletions
diff --git a/source/utils.py b/source/utils.py
index 475f3c06..b1392b0e 100755
--- a/source/utils.py
+++ b/source/utils.py
@@ -160,9 +160,8 @@ def top_words(articles, n=10, size=5):
     words = Counter()
     wordre = re.compile(r'\b\w{%s,}\b' % size, re.I)
     for article in articles:
-        for word in wordre.findall(clear_string(article["article_content"])):
-            if word.lower() not in stop_words:
-                words[word.lower()] += 1
+        for word in [elem.lower() for elem in wordre.findall(clear_string(article["article_content"])) if elem.lower() not in stop_words]:
+            words[word] += 1
     return words.most_common(n)
 
 def tag_cloud(tags, query="word_count"):
author	Cédric Bonhomme <kimble.mandel@gmail.com>	2012-11-28 12:56:14 +0100
committer	Cédric Bonhomme <kimble.mandel@gmail.com>	2012-11-28 12:56:14 +0100
commit	8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d (patch)
tree	1900a87194415e1b00fcc6a62c5328574744622c /source/utils.py
parent	Ignore stop words when calculating top words. (diff)
download	newspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.tar.gz newspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.tar.bz2 newspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.zip