aboutsummaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorCédric Bonhomme <kimble.mandel@gmail.com>2012-11-28 12:56:14 +0100
committerCédric Bonhomme <kimble.mandel@gmail.com>2012-11-28 12:56:14 +0100
commit8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d (patch)
tree1900a87194415e1b00fcc6a62c5328574744622c /source
parentIgnore stop words when calculating top words. (diff)
downloadnewspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.tar.gz
newspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.tar.bz2
newspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.zip
Little performance optimization.
Diffstat (limited to 'source')
-rwxr-xr-xsource/utils.py5
1 files changed, 2 insertions, 3 deletions
diff --git a/source/utils.py b/source/utils.py
index 475f3c06..b1392b0e 100755
--- a/source/utils.py
+++ b/source/utils.py
@@ -160,9 +160,8 @@ def top_words(articles, n=10, size=5):
words = Counter()
wordre = re.compile(r'\b\w{%s,}\b' % size, re.I)
for article in articles:
- for word in wordre.findall(clear_string(article["article_content"])):
- if word.lower() not in stop_words:
- words[word.lower()] += 1
+ for word in [elem.lower() for elem in wordre.findall(clear_string(article["article_content"])) if elem.lower() not in stop_words]:
+ words[word] += 1
return words.most_common(n)
def tag_cloud(tags, query="word_count"):
bgstack15