diff options
author | cedricbonhomme <devnull@localhost> | 2010-07-05 22:15:34 +0200 |
---|---|---|
committer | cedricbonhomme <devnull@localhost> | 2010-07-05 22:15:34 +0200 |
commit | b3bcad5bd36b5bb646d86cfcb6a6d920beaca758 (patch) | |
tree | 9dc9b3c6141d8fb6f640e58a43ab678c36b877c4 /utils.py | |
parent | Better regular expression to remove HTML tags, special caracters and consecut... (diff) | |
download | newspipe-b3bcad5bd36b5bb646d86cfcb6a6d920beaca758.tar.gz newspipe-b3bcad5bd36b5bb646d86cfcb6a6d920beaca758.tar.bz2 newspipe-b3bcad5bd36b5bb646d86cfcb6a6d920beaca758.zip |
Use of collections.Counter() (new in Python 2.7) object instead of defaultDict.
Diffstat (limited to 'utils.py')
-rwxr-xr-x | utils.py | 4 |
1 files changed, 2 insertions, 2 deletions
@@ -28,7 +28,7 @@ from BeautifulSoup import BeautifulSoup from datetime import datetime from string import punctuation -from collections import defaultdict +from collections import Counter from collections import OrderedDict from StringIO import StringIO @@ -97,7 +97,7 @@ def top_words(dic_articles, n=10, size=5): words_gen = [word for word in articles_content.split() if len(word) > size] words_gen = [word.strip(punctuation).lower() for word in words_gen] - words = defaultdict(int) + words = Counter() for word in words_gen: words[word] += 1 top_words = sorted(words.iteritems(), |