diff options
author | cedricbonhomme <devnull@localhost> | 2010-04-16 14:47:01 +0200 |
---|---|---|
committer | cedricbonhomme <devnull@localhost> | 2010-04-16 14:47:01 +0200 |
commit | 496f3a624d56dacc28a3ee9be152ba4a891014a6 (patch) | |
tree | ad42ddb8275da8e28c2e3bdfd42ccee75116a187 | |
parent | Minor improvement. (diff) | |
download | newspipe-496f3a624d56dacc28a3ee9be152ba4a891014a6.tar.gz newspipe-496f3a624d56dacc28a3ee9be152ba4a891014a6.tar.bz2 newspipe-496f3a624d56dacc28a3ee9be152ba4a891014a6.zip |
Added tags cloud.
-rwxr-xr-x | pyAggr3g470r.py | 12 | ||||
-rwxr-xr-x | utils.py | 10 |
2 files changed, 18 insertions, 4 deletions
diff --git a/pyAggr3g470r.py b/pyAggr3g470r.py index 26b44a63..8c33f7dc 100755 --- a/pyAggr3g470r.py +++ b/pyAggr3g470r.py @@ -211,7 +211,7 @@ class Root: html += "<hr />\n" if self.articles: - html += "<h1>Statistics</h1>\n" + html += "<h1>Statistics</h1>\n<br />\n" if "oice" not in utils.IMPORT_ERROR: nb_french = 0 nb_english = 0 @@ -223,7 +223,13 @@ class Root: nb_english += 1 nb_other = self.nb_articles - nb_french - nb_english - html += "<table border=0>\n<tr><td>" + html += "<table border=0>\n" + html += '<tr><td colspan="2">' + html += "<h3>Tag cloud</h3>\n" + html += '<div style="width: 35%; overflow:hidden; text-align: justify">' + \ + utils.tag_cloud(utils.top_words(self.articles, 50)) + '</div>' + html += "<td></tr>" + html += "<tr><td>" html += "<h3>Words count</h3>\n" html += "<ol>\n" for word, frequency in self.top_words: @@ -241,7 +247,7 @@ class Root: html += """\t<li>%s articles in <a href="/language/%s">%s</a></li>\n""" % \ (locals()["nb_"+language], language, language) html += "</ul>\n</td>\n<td>" - html += """<img src="/var/histogram.png" /></td></tr></table>""" + html += """<img src="/var/histogram.png" /></td></tr></table>\n<br />\n""" html += "<hr />\n" html += htmlfooter @@ -85,7 +85,7 @@ def top_words(dic_articles, n=10): articles_content += remove_html_tags(article[4].encode('utf-8')) words_gen = (word.strip(punctuation).lower() \ for word in articles_content.split() \ - if len(word) >= 5) + if len(word) >= 6) words = defaultdict(int) for word in words_gen: words[word] += 1 @@ -93,6 +93,14 @@ def top_words(dic_articles, n=10): key=lambda(word, count): (-count, word))[:n] return top_words +def tag_cloud(tags): + """ + Generates a tags cloud. + """ + tags.sort(lambda x,y: cmp(x[0], y[0])) + return ' '.join([('<font size="%d"><a href="/q/?querystring=%s">%s</a></font>' % \ + (min(1+p*7/max([tag[1] for tag in tags]), 7), x, x)) for (x, p) in tags]) + def create_histogram(words, file_name="./var/histogram.png"): """ Create a histogram. |