diff options
author | cedricbonhomme <devnull@localhost> | 2010-12-03 08:23:04 +0100 |
---|---|---|
committer | cedricbonhomme <devnull@localhost> | 2010-12-03 08:23:04 +0100 |
commit | fd1e4060ebfe5f8932aefe855e96a05cbc5626da (patch) | |
tree | e2b08d3659cf8607f9d4eda36ef95b7d87cffec5 /utils.py | |
parent | Updated code for new data structure. #5 (diff) | |
download | newspipe-fd1e4060ebfe5f8932aefe855e96a05cbc5626da.tar.gz newspipe-fd1e4060ebfe5f8932aefe855e96a05cbc5626da.tar.bz2 newspipe-fd1e4060ebfe5f8932aefe855e96a05cbc5626da.zip |
Updated code for new data structure. #6
Diffstat (limited to 'utils.py')
-rwxr-xr-x | utils.py | 8 |
1 files changed, 4 insertions, 4 deletions
@@ -155,15 +155,15 @@ def normalize_filename(name): file_name = strip_accents(file_name, "utf-8") return os.path.normpath(file_name) -def top_words(dic_articles, n=10, size=5): +def top_words(feeds, n=10, size=5): """ Return the n most frequent words in a list. """ words = Counter() wordre = re.compile(r'\b\w{%s,}\b' % size, re.I) - for rss_feed_id in dic_articles.keys(): - for article in dic_articles[rss_feed_id]: - for word in wordre.findall(clear_string(article[4].encode('utf-8'))): + for feed in feeds.values(): + for article in feed.articles.values(): + for word in wordre.findall(clear_string(article.article_description.encode('utf-8'))): words[word.lower()] += 1 return words.most_common(n) |