aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xpyAggr3g470r.py12
-rwxr-xr-xutils.py10
2 files changed, 18 insertions, 4 deletions
diff --git a/pyAggr3g470r.py b/pyAggr3g470r.py
index 26b44a63..8c33f7dc 100755
--- a/pyAggr3g470r.py
+++ b/pyAggr3g470r.py
@@ -211,7 +211,7 @@ class Root:
html += "<hr />\n"
if self.articles:
- html += "<h1>Statistics</h1>\n"
+ html += "<h1>Statistics</h1>\n<br />\n"
if "oice" not in utils.IMPORT_ERROR:
nb_french = 0
nb_english = 0
@@ -223,7 +223,13 @@ class Root:
nb_english += 1
nb_other = self.nb_articles - nb_french - nb_english
- html += "<table border=0>\n<tr><td>"
+ html += "<table border=0>\n"
+ html += '<tr><td colspan="2">'
+ html += "<h3>Tag cloud</h3>\n"
+ html += '<div style="width: 35%; overflow:hidden; text-align: justify">' + \
+ utils.tag_cloud(utils.top_words(self.articles, 50)) + '</div>'
+ html += "<td></tr>"
+ html += "<tr><td>"
html += "<h3>Words count</h3>\n"
html += "<ol>\n"
for word, frequency in self.top_words:
@@ -241,7 +247,7 @@ class Root:
html += """\t<li>%s articles in <a href="/language/%s">%s</a></li>\n""" % \
(locals()["nb_"+language], language, language)
html += "</ul>\n</td>\n<td>"
- html += """<img src="/var/histogram.png" /></td></tr></table>"""
+ html += """<img src="/var/histogram.png" /></td></tr></table>\n<br />\n"""
html += "<hr />\n"
html += htmlfooter
diff --git a/utils.py b/utils.py
index ffdbc082..5a262412 100755
--- a/utils.py
+++ b/utils.py
@@ -85,7 +85,7 @@ def top_words(dic_articles, n=10):
articles_content += remove_html_tags(article[4].encode('utf-8'))
words_gen = (word.strip(punctuation).lower() \
for word in articles_content.split() \
- if len(word) >= 5)
+ if len(word) >= 6)
words = defaultdict(int)
for word in words_gen:
words[word] += 1
@@ -93,6 +93,14 @@ def top_words(dic_articles, n=10):
key=lambda(word, count): (-count, word))[:n]
return top_words
+def tag_cloud(tags):
+ """
+ Generates a tags cloud.
+ """
+ tags.sort(lambda x,y: cmp(x[0], y[0]))
+ return ' '.join([('<font size="%d"><a href="/q/?querystring=%s">%s</a></font>' % \
+ (min(1+p*7/max([tag[1] for tag in tags]), 7), x, x)) for (x, p) in tags])
+
def create_histogram(words, file_name="./var/histogram.png"):
"""
Create a histogram.
bgstack15