diff options
author | Cédric Bonhomme <kimble.mandel@gmail.com> | 2012-11-28 12:56:14 +0100 |
---|---|---|
committer | Cédric Bonhomme <kimble.mandel@gmail.com> | 2012-11-28 12:56:14 +0100 |
commit | 8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d (patch) | |
tree | 1900a87194415e1b00fcc6a62c5328574744622c /source | |
parent | Ignore stop words when calculating top words. (diff) | |
download | newspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.tar.gz newspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.tar.bz2 newspipe-8bbfeef1a3d3c52a8968ebc32f3139bd14ab9a6d.zip |
Little performance optimization.
Diffstat (limited to 'source')
-rwxr-xr-x | source/utils.py | 5 |
1 files changed, 2 insertions, 3 deletions
diff --git a/source/utils.py b/source/utils.py index 475f3c06..b1392b0e 100755 --- a/source/utils.py +++ b/source/utils.py @@ -160,9 +160,8 @@ def top_words(articles, n=10, size=5): words = Counter() wordre = re.compile(r'\b\w{%s,}\b' % size, re.I) for article in articles: - for word in wordre.findall(clear_string(article["article_content"])): - if word.lower() not in stop_words: - words[word.lower()] += 1 + for word in [elem.lower() for elem in wordre.findall(clear_string(article["article_content"])) if elem.lower() not in stop_words]: + words[word] += 1 return words.most_common(n) def tag_cloud(tags, query="word_count"): |