Added tags cloud.

author: cedricbonhomme <devnull@localhost> 2010-04-16 14:47:01 +0200
committer: cedricbonhomme <devnull@localhost> 2010-04-16 14:47:01 +0200
commit: 496f3a624d56dacc28a3ee9be152ba4a891014a6 (patch)
tree: ad42ddb8275da8e28c2e3bdfd42ccee75116a187
parent: Minor improvement. (diff)
download: newspipe-496f3a624d56dacc28a3ee9be152ba4a891014a6.tar.gz
newspipe-496f3a624d56dacc28a3ee9be152ba4a891014a6.tar.bz2
newspipe-496f3a624d56dacc28a3ee9be152ba4a891014a6.zip
2 files changed, 18 insertions, 4 deletions
diff --git a/pyAggr3g470r.py b/pyAggr3g470r.py
index 26b44a63..8c33f7dc 100755
--- a/pyAggr3g470r.py
+++ b/pyAggr3g470r.py
@@ -211,7 +211,7 @@ class Root:
 
         html += "<hr />\n"
         if self.articles:
-            html += "<h1>Statistics</h1>\n"
+            html += "<h1>Statistics</h1>\n<br />\n"
             if "oice" not in utils.IMPORT_ERROR:
                 nb_french = 0
                 nb_english = 0
@@ -223,7 +223,13 @@ class Root:
                             nb_english += 1
                 nb_other = self.nb_articles - nb_french - nb_english
 
-            html += "<table border=0>\n<tr><td>"
+            html += "<table border=0>\n"
+            html += '<tr><td colspan="2">'
+            html += "<h3>Tag cloud</h3>\n"
+            html += '<div style="width: 35%; overflow:hidden; text-align: justify">' + \
+                        utils.tag_cloud(utils.top_words(self.articles, 50)) + '</div>'
+            html += "<td></tr>"
+            html += "<tr><td>"
             html += "<h3>Words count</h3>\n"
             html += "<ol>\n"
             for word, frequency in self.top_words:
@@ -241,7 +247,7 @@ class Root:
                     html += """\t<li>%s articles in <a href="/language/%s">%s</a></li>\n""" % \
                                     (locals()["nb_"+language], language, language)
                 html += "</ul>\n</td>\n<td>"
-            html += """<img src="/var/histogram.png" /></td></tr></table>"""
+            html += """<img src="/var/histogram.png" /></td></tr></table>\n<br />\n"""
 
             html += "<hr />\n"
         html += htmlfooter
diff --git a/utils.py b/utils.py
index ffdbc082..5a262412 100755
--- a/utils.py
+++ b/utils.py
@@ -85,7 +85,7 @@ def top_words(dic_articles, n=10):
             articles_content += remove_html_tags(article[4].encode('utf-8'))
     words_gen = (word.strip(punctuation).lower() \
                         for word in articles_content.split() \
-                                if len(word) >= 5)
+                                if len(word) >= 6)
     words = defaultdict(int)
     for word in words_gen:
         words[word] += 1
@@ -93,6 +93,14 @@ def top_words(dic_articles, n=10):
                 key=lambda(word, count): (-count, word))[:n]
     return top_words
 
+def tag_cloud(tags):
+    """
+    Generates a tags cloud.
+    """
+    tags.sort(lambda x,y: cmp(x[0], y[0]))
+    return ' '.join([('<font size="%d"><a href="/q/?querystring=%s">%s</a></font>' % \
+                    (min(1+p*7/max([tag[1] for tag in tags]), 7), x, x)) for (x, p) in tags])
+
 def create_histogram(words, file_name="./var/histogram.png"):
     """
     Create a histogram.
author	cedricbonhomme <devnull@localhost>	2010-04-16 14:47:01 +0200
committer	cedricbonhomme <devnull@localhost>	2010-04-16 14:47:01 +0200
commit	496f3a624d56dacc28a3ee9be152ba4a891014a6 (patch)
tree	ad42ddb8275da8e28c2e3bdfd42ccee75116a187
parent	Minor improvement. (diff)
download	newspipe-496f3a624d56dacc28a3ee9be152ba4a891014a6.tar.gz newspipe-496f3a624d56dacc28a3ee9be152ba4a891014a6.tar.bz2 newspipe-496f3a624d56dacc28a3ee9be152ba4a891014a6.zip