diff options
Diffstat (limited to 'source')
26 files changed, 1169 insertions, 900 deletions
diff --git a/source/cfg/cherrypy.cfg b/source/cfg/cherrypy.cfg index 6e5374fa..8e721e7b 100644 --- a/source/cfg/cherrypy.cfg +++ b/source/cfg/cherrypy.cfg @@ -1,10 +1,12 @@ [global] -log.error_file = "var/error.log" -log.access_file = "var/access.log" +server.socket_host: "0.0.0.0" +server.socket_port: 12556 server.environment = "production" engine.autoreload_on = True engine.autoreload_frequency = 5 engine.timeout_monitor.on = False +log.error_file = "var/error.log" +log.access_file = "var/access.log" [/] tools.staticdir.root = os.getcwd() @@ -21,4 +23,4 @@ tools.staticdir.match = "(?i)^.+\.css$" [/images] tools.staticdir.on = True tools.staticdir.dir = "img" -tools.staticdir.match = "(?i)^.+\.png$" +tools.staticdir.match = "(?i)^.+\.png$"
\ No newline at end of file diff --git a/source/clusters.py b/source/clusters.py deleted file mode 100755 index e53fac9b..00000000 --- a/source/clusters.py +++ /dev/null @@ -1,157 +0,0 @@ -#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-import math
-import random
-
-from math import sqrt
-from PIL import Image, ImageDraw
-
-def readfile(filename):
- lines=[line for line in file(filename)]
-
- # First line is the column titles
- colnames=lines[0].strip().split('\t')[1:]
- rownames=[]
- data=[]
- for line in lines[1:]:
- p=line.strip().split('\t')
- # First column in each row is the rowname
- rownames.append(p[0])
- # The data for this row is the remainder of the row
- data.append([float(x) for x in p[1:]])
- return rownames,colnames,data
-
-def pearson(v1,v2):
- # Simple sums
- sum1=sum(v1)
- sum2=sum(v2)
-
- # Sums of the squares
- sum1Sq=sum([pow(v,2) for v in v1])
- sum2Sq=sum([pow(v,2) for v in v2])
-
- # Sum of the products
- pSum=sum([v1[i]*v2[i] for i in range(len(v1))])
-
- # Calculate r (Pearson score)
- num=pSum-(sum1*sum2/len(v1))
- den=sqrt((sum1Sq-pow(sum1,2)/len(v1))*(sum2Sq-pow(sum2,2)/len(v1)))
- if den==0: return 0
-
- return 1.0-num/den
-
-def tanimoto(v1, v2):
- c1, c2, shr = 0, 0, 0
- for i in range(len(v1)):
- if v1[i] != 0:
- c1 += 1 # in v1
- if v2[i] != 0:
- c2 += 1 # in v2
- if v1[i] != 0 and v2[i] != 0:
- shr += 1 # in both
- return 1.0 - (float(shr) / (c1 + c2 - shr))
-
-def euclidian(v1, v2):
- d = 0.0
- for i in range(len(v1)):
- d += (v1[i] - v2[i])**2
- return math.sqrt(d)
-
-def kcluster(rows,distance=pearson,k=4):
- # Determine the minimum and maximum values for each point
- ranges=[(min([row[i] for row in rows]),max([row[i] for row in rows]))
- for i in range(len(rows[0]))]
-
- # Create k randomly placed centroids
- clusters=[[random.random()*(ranges[i][1]-ranges[i][0])+ranges[i][0]
- for i in range(len(rows[0]))] for j in range(k)]
-
- lastmatches=None
- for t in range(100):
- print 'Iteration %d' % t
- bestmatches=[[] for i in range(k)]
-
- # Find which centroid is the closest for each row
- for j in range(len(rows)):
- row=rows[j]
- bestmatch=0
- for i in range(k):
- d=distance(clusters[i],row)
- if d<distance(clusters[bestmatch],row): bestmatch=i
- bestmatches[bestmatch].append(j)
-
- # If the results are the same as last time, this is complete
- if bestmatches==lastmatches: break
- lastmatches=bestmatches
-
- # Move the centroids to the average of their members
- for i in range(k):
- avgs=[0.0]*len(rows[0])
- if len(bestmatches[i])>0:
- for rowid in bestmatches[i]:
- for m in range(len(rows[rowid])):
- avgs[m]+=rows[rowid][m]
- for j in range(len(avgs)):
- avgs[j]/=len(bestmatches[i])
- clusters[i]=avgs
-
- return bestmatches
-
-def scaledown(data,distance=pearson,rate=0.01):
- n=len(data)
-
- # The real distances between every pair of items
- realdist=[[distance(data[i],data[j]) for j in range(n)]
- for i in range(0,n)]
-
- # Randomly initialize the starting points of the locations in 2D
- loc=[[random.random(),random.random()] for i in range(n)]
- fakedist=[[0.0 for j in range(n)] for i in range(n)]
-
- lasterror=None
- for m in range(0,1000):
- # Find projected distances
- for i in range(n):
- for j in range(n):
- fakedist[i][j]=sqrt(sum([pow(loc[i][x]-loc[j][x],2)
- for x in range(len(loc[i]))]))
-
- # Move points
- grad=[[0.0,0.0] for i in range(n)]
-
- totalerror=0
- for k in range(n):
- for j in range(n):
- if j==k: continue
- # The error is percent difference between the distances
- errorterm=(fakedist[j][k]-realdist[j][k])/realdist[j][k]
-
- # Each point needs to be moved away from or towards the other
- # point in proportion to how much error it has
- grad[k][0]+=((loc[k][0]-loc[j][0])/fakedist[j][k])*errorterm
- grad[k][1]+=((loc[k][1]-loc[j][1])/fakedist[j][k])*errorterm
-
- # Keep track of the total error
- totalerror+=abs(errorterm)
-
-
- # If the answer got worse by moving the points, we are done
- if lasterror and lasterror<totalerror: break
- lasterror=totalerror
-
- # Move each of the points by the learning rate times the gradient
- for k in range(n):
- loc[k][0]-=rate*grad[k][0]
- loc[k][1]-=rate*grad[k][1]
-
- return loc
-
-def draw2d(data,labels,jpeg='mds2d.jpg'):
- img=Image.new('RGB',(2000,2000),(255,255,255))
- draw=ImageDraw.Draw(img)
- for i in range(len(data)):
- x=(data[i][0]+0.5)*1000
- y=(data[i][1]+0.5)*1000
- draw.text((x,y),labels[i],(0,0,0))
- img.save(jpeg,'JPEG')
diff --git a/source/css/style.css b/source/css/style.css index 51a02c29..86036357 100755 --- a/source/css/style.css +++ b/source/css/style.css @@ -1,16 +1,12 @@ html, body { margin: 0px 0px 0px 5px; padding: 0px 0px 0px 0px; - width: 100%; height: 100%; overflow-x: hidden; background-color: white; color: black; -} - -body { text-align: justify; - font: 400 0.85em Cambria, Georgia, "Trebuchet MS", Verdana, sans-serif; + font: normal small 'Gill Sans','Gill Sans MT',Verdana,sans-serif; } img { @@ -45,28 +41,6 @@ h1 a, h2 a, h3 a { text-decoration: none; } -ol.lower { - list-style-type: lower-alpha; -} - -ol.upper { - list-style-type: upper-alpha; -} - -ol.roman { - list-style-type: lower-roman; -} - -ul { - margin: 0px 0px 0px 5px; - padding: 0em; -} - -li { - margin: 0px 0px 0px 5px; - padding: 0em; -} - a:link, a:visited { color: #003399; text-decoration:none @@ -76,16 +50,6 @@ a:hover { color: blue; } -dt { - margin: 0.5em 0em 0em 0em; - padding: 0em; - font-weight: bold; -} - -dd { - margin: 0em; -} - hr { color: white; border-top: dotted black; @@ -93,19 +57,35 @@ hr { margin: 1em 0em; } - -#heading { - position: absolute; - left: 0px; - top: 0px; - width: 100%; - z-index: 1; +/* Menu */ +.menu_container { + position:fixed; + margin:0px; + padding:0px; + z-index:4; } /* Navigation bars */ -.nav_container { position:fixed; top:145px; right:5px; margin:0px; padding:0px; white-space:nowrap; z-index:11; clear:both;} -.nav_container.horizontal { position:absolute; white-space:normal; z-index:25; width:*; } -.nav_container.horizontal div { float:right; padding-right:10px; } +.nav_container { + position:fixed; + top:112px; + right:5px; + margin:0px; + padding:0px; + white-space:nowrap; + z-index:3; + clear:both; +} +.nav_container.horizontal { + position:absolute; + white-space:normal; + z-index:4; + width:*; +} +.nav_container.horizontal div { + float:right; + padding-right:10px; +} #nav { position: absolute; @@ -191,7 +171,6 @@ blockquote.right { .clear { font-size: 1px; - line-height: 1px; height: 0px; clear: both; } @@ -223,34 +202,33 @@ blockquote.right { margin-right: -0.1em; } - /* CSS ToolTips */ - .tooltip { - color: #FFF; - outline: none; - text-decoration: none; - position: relative; - } - - .tooltip span { - color: #FFF; - margin-left: -999em; - position: absolute; - } - - .tooltip:hover span { - border-radius: 5px 5px; -moz-border-radius: 5px; -webkit-border-radius: 5px; - box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.1); -webkit-box-shadow: 5px 5px rgba(0, 0, 0, 0.1); -moz-box-shadow: 5px 5px rgba(0, 0, 0, 0.1); - font-family: Calibri, Tahoma, Geneva, sans-serif; - position: absolute; left: 1em; top: 2em; z-index: 99; - margin-left: 0; width: 250px; - } - .classic { - padding: 0.8em 1em; - background: rgba(0, 0, 0, 0.85); - border: 5px 5px; - } - - * html a:hover { - background: transparent; - } +.tooltip { + color: #FFF; + outline: none; + text-decoration: none; + position: relative; +} + +.tooltip span { + color: #FFF; + margin-left: -999em; + position: absolute; +} + +.tooltip:hover span { + border-radius: 5px 5px; -moz-border-radius: 5px; -webkit-border-radius: 5px; + box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.1); -webkit-box-shadow: 5px 5px rgba(0, 0, 0, 0.1); -moz-box-shadow: 5px 5px rgba(0, 0, 0, 0.1); + font-family: Calibri, Tahoma, Geneva, sans-serif; + position: absolute; left: 1em; top: 2em; z-index: 99; + margin-left: 0; width: 250px; +} +.classic { + padding: 0.8em 1em; + background: rgba(0, 0, 0, 0.85); + border: 5px 5px; +} + +* html a:hover { + background: transparent; +}
\ No newline at end of file diff --git a/source/generatefeedvector.py b/source/generatefeedvector.py deleted file mode 100755 index 3c33efa5..00000000 --- a/source/generatefeedvector.py +++ /dev/null @@ -1,65 +0,0 @@ -# -*- coding: utf-8 -*- -import feedparser -import re - -import conf -import mongodb - -# Returns title and dictionary of word counts for an RSS feed -def getwordcounts(feed_id): - wc={} - # Loop over all the entries - for article in mongo.get_articles_from_collection(feed_id): - summary = article["article_content"] - - # Extract a list of words - words = getwords(feed["feed_title"] + ' ' + summary) - for word in words: - wc.setdefault(word,0) - wc[word] += 1 - return feed["feed_title"], wc - -def getwords(html): - # Remove all the HTML tags - txt=re.compile(r'<[^>]+>').sub('',html) - - # Split words by all non-alpha characters - words=re.compile(r'[^A-Z^a-z]+').split(txt) - - # Convert to lowercase - return [word.lower() for word in words if word!=''] - - -apcount={} -wordcounts={} -mongo = mongodb.Articles(conf.MONGODB_ADDRESS, conf.MONGODB_PORT, \ - conf.MONGODB_DBNAME, conf.MONGODB_USER, conf.MONGODB_PASSWORD) -feeds = mongo.get_all_feeds() -for feed in feeds: - try: - title,wc=getwordcounts(feed["feed_id"]) - wordcounts[title]=wc - for word,count in list(wc.items()): - apcount.setdefault(word,0) - if count>1: - apcount[word]+=1 - except: - print('Failed to parse feed %s' % feed["feed_title"]) - -wordlist=[] -for w,bc in list(apcount.items()): - frac=float(bc)/len(feeds) - if frac>0.1 and frac<0.5: - wordlist.append(w) - -out=open('blogdata1.txt','w') -out.write('Blog') -for word in wordlist: out.write('\t%s' % word) -out.write('\n') -for blog,wc in list(wordcounts.items()): - print(blog) - out.write(blog) - for word in wordlist: - if word in wc: out.write('\t%d' % wc[word]) - else: out.write('\t0') - out.write('\n') diff --git a/source/mongodb.py b/source/mongodb.py index 68ccf5bc..b9e6686e 100644 --- a/source/mongodb.py +++ b/source/mongodb.py @@ -20,9 +20,9 @@ # along with this program. If not, see <http://www.gnu.org/licenses/> __author__ = "Cedric Bonhomme" -__version__ = "$Revision: 0.3 $" +__version__ = "$Revision: 0.4 $" __date__ = "$Date: 2012/03/03 $" -__revision__ = "$Date: 2012/05/01 $" +__revision__ = "$Date: 2012/12/02 $" __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "GPLv3" @@ -82,7 +82,8 @@ class Articles(object): def get_all_feeds(self, condition=None): """ - Return all feeds object. + Return all feeds object. The returned list + is sorted by alphabetically (by feed name). """ feeds = [] collections = self.db.collection_names() @@ -97,35 +98,53 @@ class Articles(object): feeds.sort(key = lambda elem: elem['feed_title'].lower()) return feeds - def get_all_articles(self): - """ - Return articles of all feeds object (articles of all MongoDB articles collections). - All articles collections are of type 1. - """ - articles = [] - collections = self.db.collection_names() - for collection_name in collections: - collection = self.db[collection_name] - articles.extend(collection.find({'type':1})) - return articles + def get_articles(self, feed_id=None, article_id=None, condition=None, limit=1000000000): + """ + Return one or several articles. + The parameter "condition" is an optional requirement, for example: + get_articles(feed_id, condition=("article_readed", False)) will + return all unread articles of the feed 'feed_id'. + """ + if feed_id == None and article_id == None: + # Return all articles. + articles = [] + collections = self.db.collection_names() + for collection_name in collections: + collection = self.db[collection_name] + if condition is None: + articles.extend(collection.find({"type":1}, limit=limit)) + else: + articles.extend(collection.find({"type":1, condition[0]:condition[1]}, limit=limit)) + return articles - def get_article(self, feed_id, article_id): - """ - Get an article of a specified feed. - """ - collection = self.db[str(feed_id)] - return next(collection.find({"article_id":article_id})) + elif feed_id != None and article_id == None: + # Return all the articles of a collection. + collection = self.db[str(feed_id)] + if condition is None: + cursor = collection.find({"type":1}, limit=limit) + else: + cursor = collection.find({"type":1, condition[0]:condition[1]}, limit=limit) + return cursor.sort([("article_date", pymongo.DESCENDING)]) + + elif feed_id != None and article_id != None: + # Return a precise article. + collection = self.db[str(feed_id)] + return next(collection.find({"article_id":article_id})) - def get_articles_from_collection(self, feed_id, condition=None, limit=1000000000): + def get_favorites(self, feed_id=None): """ - Return all the articles of a collection. + Return favorites articles. """ - collection = self.db[str(feed_id)] - if condition is None: - cursor = collection.find({"type":1}, limit=limit) + if feed_id is not None: + # only for a feed + collection = self.db[feed_id] + cursor = collection.find({'type':1, 'article_like':True}) + return cursor.sort([("article_date", pymongo.DESCENDING)]) else: - cursor = collection.find({"type":1, condition[0]:condition[1]}, limit=limit) - return cursor.sort([("article_date", pymongo.DESCENDING)]) + favorites = [] + for feed_id in self.db.collection_names(): + favorites += self.get_favorites(feed_id) + return favorites def nb_articles(self, feed_id=None): """ @@ -142,32 +161,32 @@ class Articles(object): nb_articles += self.nb_articles(feed_id) return nb_articles - def get_favorites(self, feed_id=None): + def nb_unread_articles(self, feed_id=None): """ - Return favorites articles. + Return the number of unread articles of a feed + or of all the database. """ if feed_id is not None: - # only for a feed - collection = self.db[feed_id] - cursor = collection.find({'type':1, 'article_like':True}) - return cursor - + return self.get_articles(feed_id=feed_id, condition=("article_readed", False)).count() + else: + return len(self.get_articles(condition=("article_readed", False))) + + def like_article(self, like, feed_id, article_id): + """ + Like or unlike an article. + """ + collection = self.db[str(feed_id)] + collection.update({"article_id": article_id}, {"$set": {"article_like": like}}) + def nb_favorites(self, feed_id=None): """ Return the number of favorites articles of a feed or of all the database. """ if feed_id is not None: - # only for a feed - collection = self.db[feed_id] - cursor = collection.find({'type':1, 'article_like':True}) - return cursor.count() + return self.get_favorites(feed_id).count() else: - # for all feeds - nb_favorites = 0 - for feed_id in self.db.collection_names(): - nb_favorites += self.nb_favorites(feed_id) - return nb_favorites + return len(self.get_favorites()) def nb_mail_notifications(self): """ @@ -180,28 +199,6 @@ class Articles(object): nb_mail_notifications += cursor.count() return nb_mail_notifications - def nb_unread_articles(self, feed_id=None): - """ - Return the number of unread articles of a feed - or of all the database. - """ - if feed_id is not None: - collection = self.db[feed_id] - cursor = collection.find({'article_readed':False}) - return cursor.count() - else: - unread_articles = 0 - for feed_id in self.db.collection_names(): - unread_articles += self.nb_unread_articles(feed_id) - return unread_articles - - def like_article(self, like, feed_id, article_id): - """ - Like or unlike an article. - """ - collection = self.db[str(feed_id)] - collection.update({"article_id": article_id}, {"$set": {"article_like": like}}) - def mark_as_read(self, readed, feed_id=None, article_id=None): """ """ @@ -222,13 +219,6 @@ class Articles(object): collection = self.db[str(feed_id)] collection.update({"type": 0, "feed_id":feed_id}, {"$set": changes}, multi=True) - def list_collections(self): - """ - List all collections (feed). - """ - collections = self.db.collection_names() - return collections - # Functions on database def drop_database(self): """ diff --git a/source/pyAggr3g470r.py b/source/pyAggr3g470r.py index 9a16d437..9bc53b3e 100755 --- a/source/pyAggr3g470r.py +++ b/source/pyAggr3g470r.py @@ -22,13 +22,13 @@ __author__ = "Cedric Bonhomme" __version__ = "$Revision: 3.6 $" __date__ = "$Date: 2010/01/29 $" -__revision__ = "$Date: 2012/11/8 $" +__revision__ = "$Date: 2012/12/04 $" __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "GPLv3" # # This file contains the "Root" class which describes -# all pages of pyAggr3g470r. These pages are: +# all pages (views) of pyAggr3g470r. These pages are: # - main page; # - management; # - history; @@ -36,13 +36,19 @@ __license__ = "GPLv3" # - notifications; # - unread; # - feed summary. +# Templates are described in ./templates with the Mako +# template library. # import os import re -import cherrypy import calendar +import cherrypy +from mako.template import Template +from mako.lookup import TemplateLookup +lookup = TemplateLookup(directories=['templates']) + from collections import Counter import datetime @@ -56,7 +62,7 @@ from auth import AuthController, require, member_of, name_is #from qrcode import qr -def error_page_404(status, message, traceback, version): +def error_404(status, message, traceback, version): """ Display an error if the page does not exist. """ @@ -97,9 +103,7 @@ htmlfooter = '<p>This software is under GPLv3 license. You are welcome to copy, htmlnav = '<body>\n<h1><div class="right innerlogo"><a href="/"><img src="/img/tuxrss.png"' + \ """ title="What's new today?"/></a>""" + \ - '</div><a name="top"><a href="/">pyAggr3g470r - News aggregator</a></a></h1>\n<a' + \ - ' href="http://bitbucket.org/cedricbonhomme/pyaggr3g470r/" rel="noreferrer" target="_blank">' + \ - 'pyAggr3g470r (source code)</a>' + '</div><a name="top"><a href="/">pyAggr3g470r - News aggregator</a></a></h1>\n' class RestrictedArea(object): """ @@ -140,126 +144,14 @@ class pyAggr3g470r(object): nb_unread_articles = self.mongo.nb_unread_articles() nb_favorites = self.mongo.nb_favorites() nb_mail_notifications = self.mongo.nb_mail_notifications() - - # if there are unread articles, display the number in the tab of the browser - html = htmlheader((nb_unread_articles and \ - ['(' + str(nb_unread_articles) +')'] or \ - [""])[0]) - html += htmlnav - html += self.create_right_menu() - html += """<div class="left inner">\n""" - - if feeds: - html += '<a href="/management/"><img src="/img/management.png" title="Management" /></a>\n' - html += '<a href="/history/"><img src="/img/history.png" title="History" /></a>\n' - html += ' \n' - - html += """<a href="/favorites/"><img src="/img/heart-32x32.png" title="Your favorites (%s)" /></a>\n""" % \ - (nb_favorites,) - - html += """<a href="/notifications/"><img src="/img/email-follow.png" title="Active e-mail notifications (%s)" /></a>\n""" % \ - (nb_mail_notifications,) - - html += ' ' - if nb_unread_articles != 0: - html += '<a href="/mark_as_read/"><img src="/img/mark-as-read.png" title="Mark articles as read" /></a>\n' - html += """<a href="/unread/"><img src="/img/unread.png" title="Unread article(s): %s" /></a>\n""" % \ - (nb_unread_articles,) - html += '<a accesskey="F" href="/fetch/"><img src="/img/check-news.png" title="Check for news" /></a>\n' - - - # The main page display all the feeds. - for feed in feeds: - html += """<h2><a name="%s"><a href="%s" rel="noreferrer" - target="_blank">%s</a></a> - <a href="%s" rel="noreferrer" - target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \ - (feed["feed_id"], feed["site_link"], feed["feed_title"], \ - feed["feed_link"], feed["feed_image"]) - - # The main page display only 10 articles by feeds. - for article in self.mongo.get_articles_from_collection(feed["feed_id"], limit=10): - if article["article_readed"] == False: - # not readed articles are in bold - not_read_begin, not_read_end = "<b>", "</b>" - else: - not_read_begin, not_read_end = "", "" - - # display a heart for faved articles - if article["article_like"] == True: - like = """ <img src="/img/heart.png" title="I like this article!" />""" - else: - like = "" - - # Descrition for the CSS ToolTips - article_content = utils.clear_string(article["article_content"]) - if article_content: - description = " ".join(article_content.split(' ')[:55]) - else: - description = "No description." - # Title of the article - article_title = article["article_title"] - if len(article_title) >= 80: - article_title = article_title[:80] + " ..." - - # a description line per article (date, title of the article and - # CSS description tooltips on mouse over) - html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \ - """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \ - (feed["feed_id"], article["article_id"], not_read_begin, \ - article_title, not_read_end, description) + like + "<br />\n" - html += "<br />\n" - - # some options for the current feed - html += """<a href="/articles/%s">All articles</a> """ % (feed["feed_id"],) - html += """<a href="/feed/%s">Feed summary</a> """ % (feed["feed_id"],) - if self.mongo.nb_unread_articles(feed["feed_id"]) != 0: - html += """ <a href="/mark_as_read/Feed_FromMainPage:%s">Mark all as read</a>""" % (feed["feed_id"],) - html += """ <a href="/unread/%s" title="Unread article(s)">Unread article(s) (%s)</a>""" % (feed["feed_id"], self.mongo.nb_unread_articles(feed["feed_id"])) - if feed["mail"] == "0": - html += """<br />\n<a href="/mail_notification/1:%s" title="By e-mail">Stay tuned</a>""" % (feed["feed_id"],) - else: - html += """<br />\n<a href="/mail_notification/0:%s" title="By e-mail">Stop staying tuned</a>""" % (feed["feed_id"],) - html += """<h4><a href="/#top">Top</a></h4>""" - html += "<hr />\n" - html += htmlfooter - return html + tmpl = lookup.get_template("index.html") + return tmpl.render(feeds=feeds, nb_feeds=len(feeds), mongo=self.mongo, \ + nb_favorites=nb_favorites, nb_unread_articles=nb_unread_articles, \ + nb_mail_notifications=nb_mail_notifications, header_text=nb_unread_articles) index.exposed = True @require() - def create_right_menu(self): - """ - Create the right menu. - """ - html = """<div class="right inner">\n""" - html += """<form method=get action="/search/"><input type="search" name="query" value="" placeholder="Search articles" maxlength=2048 autocomplete="on"></form>\n""" - html += "<hr />\n" - # insert the list of feeds in the menu - html += self.create_list_of_feeds() - html += "</div>\n" - - return html - - @require() - def create_list_of_feeds(self): - """ - Create the list of feeds. - """ - feeds = self.mongo.get_all_feeds() - html = """<div class="nav_container">Your feeds (%s):<br />\n""" % len(feeds) - for feed in feeds: - if self.mongo.nb_unread_articles(feed["feed_id"]) != 0: - # not readed articles are in bold - not_read_begin, not_read_end = "<b>", "</b>" - else: - not_read_begin, not_read_end = "", "" - html += """<div><a href="/#%s">%s</a> (<a href="/unread/%s" title="Unread article(s)">%s%s%s</a> / %s)</div>""" % \ - (feed["feed_id"], feed["feed_title"], feed["feed_id"], not_read_begin, \ - self.mongo.nb_unread_articles(feed["feed_id"]), not_read_end, self.mongo.nb_articles(feed["feed_id"])) - return html + "</div>" - - @require() def management(self): """ Management page. @@ -271,51 +163,10 @@ class pyAggr3g470r(object): nb_favorites = self.mongo.nb_favorites() nb_articles = self.mongo.nb_articles() nb_unread_articles = self.mongo.nb_unread_articles() - - html = htmlheader() - html += htmlnav - html += """<div class="left inner">\n""" - html += "<h1>Add Feeds</h1>\n" - # Form: add a feed - html += """<form method=get action="/add_feed/"><input type="url" name="url" placeholder="URL of a site" maxlength=2048 autocomplete="off">\n<input type="submit" value="OK"></form>\n""" - - if feeds: - # Form: delete a feed - html += "<h1>Delete Feeds</h1>\n" - html += """<form method=get action="/remove_feed/"><select name="feed_id">\n""" - for feed in feeds: - html += """\t<option value="%s">%s</option>\n""" % (feed["feed_id"], feed["feed_title"]) - html += """</select><input type="submit" value="OK"></form>\n""" - - html += """<p>Active e-mail notifications: <a href="/notifications/">%s</a></p>\n""" % \ - (nb_mail_notifications,) - html += """<p>You like <a href="/favorites/">%s</a> article(s).</p>\n""" % \ - (nb_favorites, ) - - html += "<hr />\n" - - # Informations about the data base of articles - html += """<p>%s article(s) are stored in the database with - <a href="/unread/">%s unread article(s)</a>.<br />\n""" % \ - (nb_articles, nb_unread_articles) - #html += """Database: %s.\n<br />Size: %s bytes.<br />\n""" % \ - #(os.path.abspath(utils.sqlite_base), os.path.getsize(utils.sqlite_base)) - html += '<a href="/statistics/">Advanced statistics.</a></p>\n' - - html += """<form method=get action="/fetch/">\n<input type="submit" value="Fetch all feeds"></form>\n""" - html += """<form method=get action="/drop_base">\n<input type="submit" value="Delete all articles"></form>\n""" - - # Export functions - html += "<h1>Export articles</h1>\n\n" - html += """<form method=get action="/export/"><select name="export_method">\n""" - html += """\t<option value="export_html" selected='selected'>HTML (simple Webzine)</option>\n""" - html += """\t<option value="export_epub">ePub</option>\n""" - html += """\t<option value="export_pdf">PDF</option>\n""" - html += """\t<option value="export_txt">Text</option>\n""" - html += """</select>\n\t<input type="submit" value="Export">\n</form>\n""" - html += "<hr />" - html += htmlfooter - return html + tmpl = lookup.get_template("management.html") + return tmpl.render(feeds=feeds, nb_mail_notifications=nb_mail_notifications, \ + nb_favorites=nb_favorites, nb_articles=nb_articles, \ + nb_unread_articles=nb_unread_articles) management.exposed = True @@ -324,26 +175,11 @@ class pyAggr3g470r(object): """ More advanced statistics. """ - articles = self.mongo.get_all_articles() - html = htmlheader() - html += htmlnav - html += """<div class="left inner">\n""" - - # Some statistics (most frequent word) - if articles: - top_words = utils.top_words(articles, n=50, size=int(word_size)) - html += "<h1>Statistics</h1>\n" - html += "<h3>Tag cloud</h3>\n" - # Tags cloud - html += '<form method=get action="/statistics/">\n' - html += "Minimum size of a word:\n" - html += """<input type="number" name="word_size" value="%s" min="2" max="15" step="1" size="2"></form>\n""" % (word_size) - html += '<div style="width: 35%; overflow:hidden; text-align: justify">' + \ - utils.tag_cloud(top_words) + '</div>' - html += "<hr />\n" - - html += htmlfooter - return html + articles = self.mongo.get_articles() + top_words = utils.top_words(articles, n=50, size=int(word_size)) + tag_cloud = utils.tag_cloud(top_words) + tmpl = lookup.get_template("statistics.html") + return tmpl.render(articles=articles, word_size=word_size, tag_cloud=tag_cloud) statistics.exposed = True @@ -358,69 +194,10 @@ class pyAggr3g470r(object): feed_id = None if param == "Feed": feed_id, _, query = value.partition(':') - html = htmlheader() - html += htmlnav - html += """<div class="left inner">""" - html += """<h1>Articles containing the string <i>%s</i></h1><br />""" % (query,) - - if feed_id is not None: - for article in self.mongo.get_articles_from_collection(feed_id): - article_content = utils.clear_string(article.article_description) - if not article_content: - utils.clear_string(article.article_title) - if wordre.findall(article_content) != []: - if article.article_readed == "0": - # not readed articles are in bold - not_read_begin, not_read_end = "<b>", "</b>" - else: - not_read_begin, not_read_end = "", "" - - html += article.article_date + " - " + not_read_begin + \ - """<a href="/article/%s:%s" rel="noreferrer" target="_blank">%s</a>""" % \ - (feed_id, article.article_id, article.article_title) + \ - not_read_end + """<br />\n""" - else: - feeds = self.mongo.get_all_feeds() - for feed in feeds: - new_feed_section = True - for article in self.mongo.get_articles_from_collection(feed["feed_id"]): - article_content = utils.clear_string(article["article_content"]) - if not article_content: - utils.clear_string(article["article_title"]) - if wordre.findall(article_content) != []: - if new_feed_section is True: - new_feed_section = False - html += """<h2><a href="/articles/%s" rel="noreferrer" target="_blank">%s</a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \ - (feed["feed_id"], feed["feed_title"], feed["feed_link"], feed["feed_image"]) - - if article["article_readed"] == False: - # not readed articles are in bold - not_read_begin, not_read_end = "<b>", "</b>" - else: - not_read_begin, not_read_end = "", "" - - # display a heart for faved articles - if article["article_like"] == True: - like = """ <img src="/img/heart.png" title="I like this article!" />""" - else: - like = "" - - # descrition for the CSS ToolTips - article_content = utils.clear_string(article["article_content"]) - if article_content: - description = " ".join(article_content[:500].split(' ')[:-1]) - else: - description = "No description." - - # a description line per article (date, title of the article and - # CSS description tooltips on mouse over) - html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \ - """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \ - (feed["feed_id"], article["article_id"], not_read_begin, \ - article["article_title"][:150], not_read_end, description) + like + "<br />\n" - html += "<hr />" - html += htmlfooter - return html + feeds = self.mongo.get_all_feeds() + tmpl = lookup.get_template("search.html") + return tmpl.render(feeds=feeds, feed_id=feed_id, query=query, \ + wordre=wordre, mongo=self.mongo) search.exposed = True @@ -443,44 +220,25 @@ class pyAggr3g470r(object): try: feed_id, article_id = param.split(':') feed = self.mongo.get_feed(feed_id) - articles = self.mongo.get_articles_from_collection(feed_id) - article = self.mongo.get_article(feed_id, article_id) + articles = self.mongo.get_articles(feed_id) + article = self.mongo.get_articles(feed_id, article_id) except: - return self.error_page("Bad URL. This article do not exists.") - html = htmlheader(article["article_title"]) - html += htmlnav - html += """<div>""" + return self.error("Bad URL. This article do not exists.") if article["article_readed"] == False: # if the current article is not yet readed, update the database self.mark_as_read("Article:"+article["article_id"]+":"+feed["feed_id"]) - html += '\n<div style="width: 50%; overflow:hidden; text-align: justify; margin:0 auto">\n' - # Title of the article - html += """<h1><i>%s</i> from <a href="/feed/%s">%s</a></h1>\n<br />\n""" % \ - (article["article_title"], feed_id, feed["feed_title"]) - if article["article_like"] == True: - html += """<a href="/like/0:%s:%s"><img src="/img/heart.png" title="I like this article!" /></a>""" % \ - (feed_id, article["article_id"]) - else: - html += """<a href="/like/1:%s:%s"><img src="/img/heart_open.png" title="Click if you like this article." /></a>""" % \ - (feed_id, article["article_id"]) - html += """ <a href="/delete_article/%s:%s"><img src="/img/cross.png" title="Delete this article" /></a>""" % \ - (feed_id, article["article_id"]) - html += "<br /><br />" - # Description (full content) of the article description = article["article_content"] if description: p = re.compile(r'<code><') q = re.compile(r'></code>') - description = p.sub('<code><', description) description = q.sub('></code>', description) - - html += description + "\n<br /><br /><br />" + description = description + "\n<br /><br /><br />" else: - html += "No description available.\n<br /><br /><br />" + description += "No description available.\n<br /><br /><br />" """ # Generation of the QR Code for the current article try: @@ -500,8 +258,8 @@ class pyAggr3g470r(object): # Previous and following articles previous, following = None, None - liste = self.mongo.get_articles_from_collection(feed_id) - for current_article in self.mongo.get_articles_from_collection(feed_id): + liste = self.mongo.get_articles(feed_id) + for current_article in self.mongo.get_articles(feed_id): next(articles) if current_article["article_id"] == article_id: break @@ -513,70 +271,9 @@ class pyAggr3g470r(object): except StopIteration: previous = liste[0] - html += """<div style="float:right;"><a href="/article/%s:%s" title="%s"><img src="/img/following-article.png" /></a></div>\n""" % \ - (feed_id, following["article_id"], following["article_title"]) - html += """<div style="float:left;"><a href="/article/%s:%s" title="%s"><img src="/img/previous-article.png" /></a></div>\n""" % \ - (feed_id, previous["article_id"], previous["article_title"]) - - html += "\n</div>\n" - - # Footer menu - html += "<hr />\n" - html += """\n<a href="/plain_text/%s:%s">Plain text</a>\n""" % (feed_id, article["article_id"]) - html += """ - <a href="/epub/%s:%s">Export to EPUB</a>\n""" % (feed_id, article["article_id"]) - html += """<br />\n<a href="%s">Complete story</a>\n<br />\n""" % (article["article_link"],) - - # Share this article: - html += "Share this article:<br />\n" - # on Diaspora - html += """<a href="javascript:(function(){f='https://%s/bookmarklet?url=%s&title=%s&notes=%s&v=1&';a=function(){if(!window.open(f+'noui=1&jump=doclose','diasporav1','location=yes,links=no,scrollbars=no,toolbar=no,width=620,height=250'))location.href=f+'jump=yes'};if(/Firefox/.test(navigator.userAgent)){setTimeout(a,0)}else{a()}})()">\n\t - <img src="/img/diaspora.png" title="Share on Diaspora" /></a>\n""" % \ - (conf.DIASPORA_POD, article["article_link"], article["article_title"], "via pyAggr3g470r") - - # on Identi.ca - html += """\n\n<a href="http://identi.ca/index.php?action=newnotice&status_textarea=%s: %s" title="Share on Identi.ca" target="_blank"><img src="/img/identica.png" /></a>""" % \ - (article["article_title"], article["article_link"]) - - # on Hacker News - html += """\n\n<a href='javascript:window.location="http://news.ycombinator.com/submitlink?u="+encodeURIComponent("%s")+"&t="+encodeURIComponent("%s")'><img src="/img/hacker-news.png" title="Share on Hacker News" /></a>""" % \ - (article["article_link"], article["article_title"]) - - # on Pinboard - html += """\n\n\t<a href="https://api.pinboard.in/v1/posts/add?url=%s&description=%s" - rel="noreferrer" target="_blank">\n - <img src="/img/pinboard.png" title="Share on Pinboard" /></a>""" % \ - (article["article_link"], article["article_title"]) - - # on Digg - html += """\n\n\t<a href="http://digg.com/submit?url=%s&title=%s" - rel="noreferrer" target="_blank">\n - <img src="/img/digg.png" title="Share on Digg" /></a>""" % \ - (article["article_link"], article["article_title"]) - # on reddit - html += """\n\n\t<a href="http://reddit.com/submit?url=%s&title=%s" - rel="noreferrer" target="_blank">\n - <img src="/img/reddit.png" title="Share on reddit" /></a>""" % \ - (article["article_link"], article["article_title"]) - # on Scoopeo - html += """\n\n\t<a href="http://scoopeo.com/scoop/new?newurl=%s&title=%s" - rel="noreferrer" target="_blank">\n - <img src="/img/scoopeo.png" title="Share on Scoopeo" /></a>""" % \ - (article["article_link"], article["article_title"]) - # on Blogmarks - html += """\n\n\t<a href="http://blogmarks.net/my/new.php?url=%s&title=%s" - rel="noreferrer" target="_blank">\n - <img src="/img/blogmarks.png" title="Share on Blogmarks" /></a>""" % \ - (article["article_link"], article["article_title"]) - - # Google +1 button - html += """\n\n<g:plusone size="standard" count="true" href="%s"></g:plusone>""" % \ - (article["article_link"],) - - - # QRCode (for smartphone) - html += """<br />\n<a href="/var/qrcode/%s.png"><img src="/var/qrcode/%s.png" title="Share with your smartphone" width="500" height="500" /></a>""" % (article_id, article_id) - html += "<hr />\n" + htmlfooter - return html + tmpl = lookup.get_template("article.html") + return tmpl.render(header_text=article["article_title"], article=article, previous=previous, following=following, \ + diaspora=conf.DIASPORA_POD, feed=feed, description=description) article.exposed = True @@ -589,12 +286,12 @@ class pyAggr3g470r(object): """ try: feed = self.mongo.get_feed(feed_id) - articles = self.mongo.get_articles_from_collection(feed_id, limit=10) + articles = self.mongo.get_articles(feed_id, limit=10) nb_articles_feed = self.mongo.nb_articles(feed_id) nb_articles_total = self.mongo.nb_articles() nb_unread_articles_feed = self.mongo.nb_unread_articles(feed_id) except KeyError: - return self.error_page("This feed do not exists.") + return self.error("This feed do not exists.") html = htmlheader() html += htmlnav html += """<div class="left inner">""" @@ -696,7 +393,7 @@ class pyAggr3g470r(object): (feed["feed_id"],) dic = {} - top_words = utils.top_words(articles = self.mongo.get_articles_from_collection(feed_id), n=50, size=int(word_size)) + top_words = utils.top_words(articles = self.mongo.get_articles(feed_id), n=50, size=int(word_size)) html += "</br />\n<h1>Tag cloud</h1>\n" # Tags cloud html += """<form method=get action="/feed/%s">\n""" % (feed["feed_id"],) @@ -719,50 +416,11 @@ class pyAggr3g470r(object): """ try: feed = self.mongo.get_feed(feed_id) - articles = self.mongo.get_articles_from_collection(feed_id) + articles = self.mongo.get_articles(feed_id) except KeyError: - return self.error_page("This feed do not exists.") - html = htmlheader() - html += htmlnav - html += """<div class="right inner">\n""" - html += """<a href="/mark_as_read/Feed:%s">Mark all articles from this feed as read</a>""" % (feed_id,) - html += """<br />\n<form method=get action="/search/%s"><input type="search" name="query" value="" placeholder="Search this feed" maxlength=2048 autocomplete="on"></form>\n""" % ("Feed:"+feed_id,) - html += "<hr />\n" - html += self.create_list_of_feeds() - html += """</div> <div class="left inner">""" - html += """<h1>Articles of the feed <i><a href="/feed/%s">%s</a></i></h1><br />""" % (feed_id, feed["feed_title"]) - - for article in articles: - - if article["article_readed"] == False: - # not readed articles are in bold - not_read_begin, not_read_end = "<b>", "</b>" - else: - not_read_begin, not_read_end = "", "" - - if article["article_like"] == True: - like = """ <img src="/img/heart.png" title="I like this article!" />""" - else: - like = "" - - # descrition for the CSS ToolTips - article_content = utils.clear_string(article["article_content"]) - if article_content: - description = " ".join(article_content[:500].split(' ')[:-1]) - else: - description = "No description." - - # a description line per article (date, title of the article and - # CSS description tooltips on mouse over) - html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \ - """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \ - (feed_id, article["article_id"], not_read_begin, \ - article["article_title"][:150], not_read_end, description) + like + "<br />\n" - - html += """\n<h4><a href="/">All feeds</a></h4>""" - html += "<hr />\n" - html += htmlfooter - return html + return self.error("This feed do not exists.") + tmpl = lookup.get_template("articles.html") + return tmpl.render(articles=articles, feed=feed) articles.exposed = True @@ -786,7 +444,7 @@ class pyAggr3g470r(object): nb_unread = 0 # For all unread article of the current feed. - for article in self.mongo.get_articles_from_collection(feed["feed_id"], condition=("article_readed", False)): + for article in self.mongo.get_articles(feed["feed_id"], condition=("article_readed", False)): nb_unread += 1 if new_feed_section is True: new_feed_section = False @@ -816,12 +474,12 @@ class pyAggr3g470r(object): try: feed = self.mongo.get_feed(feed_id) except: - self.error_page("This feed do not exists.") + self.error("This feed do not exists.") html += """<h1>Unread article(s) of the feed <a href="/articles/%s">%s</a></h1> <br />""" % (feed_id, feed["feed_title"]) # For all unread article of the feed. - for article in self.mongo.get_articles_from_collection(feed_id, condition=("article_readed", False)): + for article in self.mongo.get_articles(feed_id, condition=("article_readed", False)): # descrition for the CSS ToolTips article_content = utils.clear_string(article["article_content"]) if article_content: @@ -877,7 +535,7 @@ class pyAggr3g470r(object): timeline = Counter() for feed in feeds: new_feed_section = True - for article in self.mongo.get_articles_from_collection(feed["feed_id"]): + for article in self.mongo.get_articles(feed["feed_id"]): if query == "all": timeline[str(article["article_date"]).split(' ')[0].split('-')[0]] += 1 @@ -944,37 +602,28 @@ class pyAggr3g470r(object): try: feed_id, article_id = target.split(':') feed = self.mongo.get_feed(feed_id) - article = self.mongo.get_article(feed_id, article_id) + article = self.mongo.get_articles(feed_id, article_id) except: - return self.error_page("Bad URL. This article do not exists.") - html = htmlheader() - html += htmlnav - html += """<div class="left inner">""" - html += """<h1><i>%s</i> from <a href="/articles/%s">%s</a></h1>\n<br />\n"""% \ - (article["article_title"], feed_id, feed["feed_title"]) + return self.error("Bad URL. This article do not exists.") description = utils.clear_string(article["article_content"]) - if description: - html += description - else: - html += "No description available." - html += "\n<hr />\n" + htmlfooter - return html + if not description: + description = "Unvailable" + tmpl = lookup.get_template("plain_text.html") + return tmpl.render(feed_title=feed["feed_title"], \ + article_title=article["article_title"], \ + description = description) plain_text.exposed = True @require() - def error_page(self, message): + def error(self, message): """ Display a message (bad feed id, bad article id, etc.) """ - html = htmlheader() - html += htmlnav - html += """<div class="left inner">""" - html += """%s""" % message - html += "\n<hr />\n" + htmlfooter - return html + tmpl = lookup.get_template("error.html") + return tmpl.render(message=message) - error_page.exposed = True + error.exposed = True @require() def mark_as_read(self, target=""): @@ -1002,21 +651,9 @@ class pyAggr3g470r(object): """ List all active e-mail notifications. """ - html = htmlheader() - html += htmlnav - html += """<div class="left inner">""" feeds = self.mongo.get_all_feeds(condition=("mail",True)) - if feeds != []: - html += "<h1>You are receiving e-mails for the following feeds:</h1>\n" - for feed in feeds: - html += """\t<a href="/articles/%s">%s</a> - <a href="/mail_notification/0:%s">Stop</a><br />\n""" % \ - (feed["feed_id"], feed["feed_title"], feed["feed_id"]) - else: - html += "<p>No active notifications.<p>\n" - html += """<p>Notifications are sent to: <a href="mail:%s">%s</a></p>""" % \ - (conf.mail_to, conf.mail_to) - html += "\n<hr />\n" + htmlfooter - return html + tmpl = lookup.get_template("notifications.html") + return tmpl.render(feeds=feeds, mail_to=conf.mail_to) notifications.exposed = True @@ -1028,8 +665,7 @@ class pyAggr3g470r(object): try: action, feed_id = param.split(':') except: - return self.error_page("Bad URL. This feed do not exists.") - + return self.error("Bad URL. This feed do not exists.") return self.index() mail_notification.exposed = True @@ -1041,9 +677,9 @@ class pyAggr3g470r(object): """ try: like, feed_id, article_id = param.split(':') - articles = self.mongo.get_article(feed_id, article_id) + articles = self.mongo.get_articles(feed_id, article_id) except: - return self.error_page("Bad URL. This article do not exists.") + return self.error("Bad URL. This article do not exists.") self.mongo.like_article("1"==like, feed_id, article_id) return self.article(feed_id+":"+article_id) @@ -1055,36 +691,33 @@ class pyAggr3g470r(object): List of favorites articles """ feeds = self.mongo.get_all_feeds() - html = htmlheader() - html += htmlnav - html += """<div class="left inner">""" - html += "<h1>Your favorites articles</h1>" + articles = {} for feed in feeds: - new_feed_section = True - for article in self.mongo.get_articles_from_collection(feed["feed_id"]): - if article["article_like"] == True: - if new_feed_section is True: - new_feed_section = False - html += """<h2><a name="%s"><a href="%s" rel="noreferrer"target="_blank">%s</a></a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \ - (feed["feed_id"], feed["site_link"], feed["feed_title"], feed["feed_link"], feed["feed_image"]) + articles[feed["feed_id"]] = self.mongo.get_favorites(feed["feed_id"]) + tmpl = lookup.get_template("favorites.html") + return tmpl.render(feeds=feeds, \ + articles=articles) - # descrition for the CSS ToolTips - article_content = utils.clear_string(article["article_content"]) - if article_content: - description = " ".join(article_content[:500].split(' ')[:-1]) - else: - description = "No description." + favorites.exposed = True - # a description line per article (date, title of the article and - # CSS description tooltips on mouse over) - html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \ - """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s<span class="classic">%s</span></a><br />\n""" % \ - (feed["feed_id"], article["article_id"], article["article_title"][:150], description) - html += "<hr />\n" - html += htmlfooter - return html + @require() + def inactives(self, nb_days=365): + """ + List of favorites articles + """ + feeds = self.mongo.get_all_feeds() + today = datetime.datetime.now() + inactives = [] + for feed in feeds: + more_recent_article = self.mongo.get_articles(feed["feed_id"], limit=1) + last_post = next(more_recent_article)["article_date"] + elapsed = today - last_post + if elapsed > datetime.timedelta(days=int(nb_days)): + inactives.append((feed, elapsed)) + tmpl = lookup.get_template("inactives.html") + return tmpl.render(inactives=inactives, nb_days=int(nb_days)) - favorites.exposed = True + inactives.exposed = True @require() def add_feed(self, url): @@ -1097,7 +730,7 @@ class pyAggr3g470r(object): # search the feed in the HTML page with BeautifulSoup feed_url = utils.search_feed(url) if feed_url is None: - return self.error_page("Impossible to find a feed at this URL.") + return self.error("Impossible to find a feed at this URL.") # if a feed exists else: result = utils.add_feed(feed_url) @@ -1118,20 +751,12 @@ class pyAggr3g470r(object): """ Remove a feed from the file feed.lst and from the MongoDB database. """ - html = htmlheader() - html += htmlnav - html += """<div class="left inner">""" - feed = self.mongo.get_feed(feed_id) self.mongo.delete_feed(feed_id) utils.remove_feed(feed["feed_link"]) - - html += """<p>All articles from the feed <i>%s</i> are now removed from the base.</p><br />""" % \ - (feed["feed_title"],) - html += """<a href="/management/">Back to the management page.</a><br />\n""" - html += "<hr />\n" - html += htmlfooter - return html + message = """All articles from the feed <i>%s</i> are now removed from the base.""" % (feed["feed_title"],) + tmpl = lookup.get_template("confirmation.html") + return tmpl.render(message=message) remove_feed.exposed = True @@ -1140,15 +765,10 @@ class pyAggr3g470r(object): """ Enables to change the URL of a feed already present in the database. """ - html = htmlheader() - html += htmlnav - html += """<div class="left inner">""" self.mongo.update_feed(feed_id, {"feed_link":new_feed_url}) utils.change_feed_url(old_feed_url, new_feed_url) - html += "<p>The URL of the feed has been changed.</p>" - html += "<hr />\n" - html += htmlfooter - return html + tmpl = lookup.get_template("confirmation.html") + return tmpl.render(message="The URL of the feed has been changed.") change_feed_url.exposed = True @@ -1157,14 +777,9 @@ class pyAggr3g470r(object): """ Enables to change the name of a feed. """ - html = htmlheader() - html += htmlnav - html += """<div class="left inner">""" self.mongo.update_feed(feed_id, {"feed_title":new_feed_name}) - html += "<p>The name of the feed has been changed.</p>" - html += "<hr />\n" - html += htmlfooter - return html + tmpl = lookup.get_template("confirmation.html") + return tmpl.render(message="The name of the feed has been changed.") change_feed_name.exposed = True @@ -1173,14 +788,9 @@ class pyAggr3g470r(object): """ Enables to change the name of a feed. """ - html = htmlheader() - html += htmlnav - html += """<div class="left inner">""" self.mongo.update_feed(feed_id, {"feed_image":new_feed_logo}) - html += "<p>The logo of the feed has been changed.</p>" - html += "<hr />\n" - html += htmlfooter - return html + tmpl = lookup.get_template("confirmation.html") + return tmpl.render(message="The logo of the feed has been changed.") change_feed_logo.exposed = True @@ -1193,7 +803,7 @@ class pyAggr3g470r(object): feed_id, article_id = param.split(':') self.mongo.delete_article(feed_id, article_id) except: - return self.error_page("Bad URL. This article do not exists.") + return self.error("Bad URL. This article do not exists.") return self.index() @@ -1220,7 +830,7 @@ class pyAggr3g470r(object): getattr(export, export_method)(self.mongo) except Exception as e: print(e) - return self.error_page(e) + return self.error(e) return self.management() export.exposed = True @@ -1233,18 +843,18 @@ class pyAggr3g470r(object): try: from epub import ez_epub except Exception as e: - return self.error_page(e) + return self.error(e) try: feed_id, article_id = param.split(':') except: - return self.error_page("Bad URL.") + return self.error("Bad URL.") try: feed_id, article_id = param.split(':') feed = self.mongo.get_feed(feed_id) - articles = self.mongo.get_articles_from_collection(feed_id) - article = self.mongo.get_article(feed_id, article_id) + articles = self.mongo.get_articles(feed_id) + article = self.mongo.get_articles(feed_id, article_id) except: - self.error_page("This article do not exists.") + self.error("This article do not exists.") try: folder = conf.path + "/var/export/epub/" os.makedirs(folder) @@ -1265,7 +875,5 @@ if __name__ == '__main__': # Point of entry in execution mode root = pyAggr3g470r() root.favicon_ico = cherrypy.tools.staticfile.handler(filename=os.path.join(conf.path + "/img/favicon.png")) - cherrypy.config.update({ 'server.socket_port': 12556, 'server.socket_host': "0.0.0.0"}) - cherrypy.config.update({'error_page.404': error_page_404}) - + cherrypy.config.update({'error_page.404': error_404}) cherrypy.quickstart(root, "/" ,config=conf.path + "/cfg/cherrypy.cfg") diff --git a/source/templates/article.html b/source/templates/article.html new file mode 100644 index 00000000..bd459c4e --- /dev/null +++ b/source/templates/article.html @@ -0,0 +1,58 @@ +## article.html +<%inherit file="base.html"/> +<div> + <div style="width: 50%; overflow:hidden; text-align: justify; margin:0 auto"> + <h1><i>${article["article_title"]}</i> from <a href="/feed/${feed['feed_id']}">${feed["feed_title"]}</a></h1> + <br /> + %if article["article_like"]: + <a href="/like/0:${feed['feed_id']}:${article['article_id']}"><img src="/img/heart.png" title="I like this article!" /></a> + %else: + <a href="/like/1:${feed['feed_id']}:${article['article_id']}"><img src="/img/heart_open.png" title="Click if you like this article." /></a> + %endif + <a href="/delete_article/${feed['feed_id']}:${article['article_id']}"><img src="/img/cross.png" title="Delete this article" /></a> + <br /><br /> + + ${description} + + <div style="float:right;"><a href="/article/${feed['feed_id']}:${following['article_id']}" title="${following['article_title']}"><img src="/img/following-article.png" /></a></div> + <div style="float:left;"><a href="/article/${feed['feed_id']}:${previous['article_id']}" title="${previous['article_title']}"><img src="/img/previous-article.png" /></a></div> + </div> + + <hr /> + <a href="/plain_text/${feed['feed_id']}:${article['article_id']}">Plain text</a> + - <a href="/epub/${feed['feed_id']}:${article['article_id']}">Export to EPUB</a> + <br /> + <a href="${article['article_link']}">Complete story</a> + <br /> + + Share this article:<br /> + <a href="javascript:(function(){f='https://${diaspora}/bookmarklet?url=${article['article_link']}&title=${article['article_title']}&notes=via pyAggr3g470r&v=1&';a=function(){if(!window.open(f+'noui=1&jump=doclose','diasporav1','location=yes,links=no,scrollbars=no,toolbar=no,width=620,height=250'))location.href=f+'jump=yes'};if(/Firefox/.test(navigator.userAgent)){setTimeout(a,0)}else{a()}})()"> + <img src="/img/diaspora.png" title="Share on Diaspora" /></a> + + <a href="http://identi.ca/index.php?action=newnotice&status_textarea=${article['article_title']}:${article['article_link']}" title="Share on Identi.ca" target="_blank"><img src="/img/identica.png" /></a> + + <a href="https://api.pinboard.in/v1/posts/add?url=${article['article_link']}&description=${article['article_title']}" + rel="noreferrer" target="_blank"> + <img src="/img/pinboard.png" title="Share on Pinboard" /></a> + + <a href="http://digg.com/submit?url=${article['article_link']}&title=${article['article_title']}" + rel="noreferrer" target="_blank"> + <img src="/img/digg.png" title="Share on Digg" /></a> + + <a href="http://reddit.com/submit?url=${article['article_link']}&title=${article['article_title']}" + rel="noreferrer" target="_blank"> + <img src="/img/reddit.png" title="Share on reddit" /></a> + + <a href="http://scoopeo.com/scoop/new?newurl=${article['article_link']}&title=${article['article_title']}" + rel="noreferrer" target="_blank"> + <img src="/img/scoopeo.png" title="Share on Scoopeo" /></a> + + <a href="http://blogmarks.net/my/new.php?url=${article['article_link']}&title=${article['article_title']}" + rel="noreferrer" target="_blank"> + <img src="/img/blogmarks.png" title="Share on Blogmarks" /></a> + + <g:plusone size="standard" count="true" href="${article['article_link']}"></g:plusone> + + + <br /> + <a href="/var/qrcode/${article['article_id']}.png"><img src="/var/qrcode/${article['article_id']}.png" title="Share with your smartphone" width="500" height="500" /></a>
\ No newline at end of file diff --git a/source/templates/articles.html b/source/templates/articles.html new file mode 100644 index 00000000..cbba2508 --- /dev/null +++ b/source/templates/articles.html @@ -0,0 +1,39 @@ +## articles.html +<%inherit file="base.html"/> +<% +import utils +%> +<div class="right inner"> + <a href="/mark_as_read/Feed:${feed['feed_id']}">Mark all articles from this feed as read</a> + <br /> + <form method=get action="/search/Feed${feed['feed_id']}"> + <input type="search" name="query" value="" placeholder="Search this feed" maxlength=2048 autocomplete="on"> + </form> + <hr /> +</div> + +<div class="left inner"> + <h1>Articles of the feed <i><a href="/feed/${feed['feed_id']}">${feed['feed_title']}</a></i></h1> + <br /> + %for article in articles: + <% + if article["article_readed"] == False: + not_read_begin, not_read_end = "<b>", "</b>" + else: + not_read_begin, not_read_end = "", "" + + if article["article_like"] == True: + like = """ <img src="/img/heart.png" title="I like this article!" />""" + else: + like = "" + + article_content = utils.clear_string(article["article_content"]) + if article_content: + description = " ".join(article_content[:500].split(' ')[:-1]) + else: + description = "No description." + %> + ${article["article_date"].strftime('%Y-%m-%d %H:%M')} - <a class="tooltip" href="/article/${feed['feed_id']}:${article['article_id']}" rel="noreferrer" target="_blank">${not_read_begin}${article["article_title"][:150]}${not_read_end}<span class="classic">${description}</span></a> + <br /> + %endfor + <h4><a href="/">All feeds</a></h4>
\ No newline at end of file diff --git a/source/templates/base.html b/source/templates/base.html new file mode 100644 index 00000000..c6b33a82 --- /dev/null +++ b/source/templates/base.html @@ -0,0 +1,28 @@ +## base.html +<!DOCTYPE html> +<html> +<head> + %if header_text is UNDEFINED: + <title>pyAggr3g470r</title> + %elif header_text == 0: + <title>pyAggr3g470r</title> + %else: + <title>${header_text} - pyAggr3g470r</title> + %endif + <link rel="stylesheet" type="text/css" href="/css/style.css" /> + <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> + <script type="text/javascript" src="https://apis.google.com/js/plusone.js"></script> +</head> +<body> + <div class="right innerlogo"> + <a href="/"><img src="/img/tuxrss.png" title="What's new today?"/></a> + </div> + <a name="top"><a href="/"><h1>pyAggr3g470r</h1></a></a> + ${self.body()} + <hr /> + <p>This software is under GPLv3 license. You are welcome to copy, modify or + redistribute the source code according to the <a href="http://www.gnu.org/licenses/gpl-3.0.txt">GPLv3</a> license.<br /> + <a href="https://bitbucket.org/cedricbonhomme/pyaggr3g470r/" rel="noreferrer" target="_blank">Source code</a> of pyAggr3g470r.</p> + </div> +</body> +</html>
\ No newline at end of file diff --git a/source/templates/confirmation.html b/source/templates/confirmation.html new file mode 100644 index 00000000..7f631d3c --- /dev/null +++ b/source/templates/confirmation.html @@ -0,0 +1,4 @@ +## confirmation.html +<%inherit file="base.html"/> +<div class="left inner"> +<p>${message}</p> diff --git a/source/templates/error.html b/source/templates/error.html new file mode 100644 index 00000000..dbdf66db --- /dev/null +++ b/source/templates/error.html @@ -0,0 +1,4 @@ +## error.html +<%inherit file="base.html"/> +<div class="left inner"> +${message}
\ No newline at end of file diff --git a/source/templates/favorites.html b/source/templates/favorites.html new file mode 100644 index 00000000..5eba8a3c --- /dev/null +++ b/source/templates/favorites.html @@ -0,0 +1,30 @@ +## favorites.html +<%inherit file="base.html"/> +<% +import utils +%> +<div class="left inner"> + <h1>Your favorites articles</h1> + %for feed in feeds: + <% + new_feed_section = True + %> + %for article in articles[feed["feed_id"]]: + <% + if new_feed_section: + new_feed_section = False + title = """<h2><a name="%s"><a href="%s" rel="noreferrer"target="_blank">%s</a></a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \ + (feed["feed_id"], feed["site_link"], feed["feed_title"], feed["feed_link"], feed["feed_image"]) + else: + title = "" + article_content = utils.clear_string(article["article_content"]) + if article_content: + description = " ".join(article_content[:500].split(' ')[:-1]) + else: + description = "No description." + %> + ${title} + + ${article["article_date"].strftime('%Y-%m-%d %H:%M')} - <a class="tooltip" href="/article/${feed['feed_id']}:${article['article_id']}" rel="noreferrer" target="_blank">${article["article_title"][:150]}<span class="classic">${description}</span></a><br /> + %endfor + %endfor
\ No newline at end of file diff --git a/source/templates/inactives.html b/source/templates/inactives.html new file mode 100644 index 00000000..57482b61 --- /dev/null +++ b/source/templates/inactives.html @@ -0,0 +1,15 @@ +## inactives.html +<%inherit file="base.html"/> +<div class="left inner"> + %if inactives != []: + <form method=get action="/inactives/"> + <h1>Feeds with no recent articles since <input type="number" name="nb_days" value="${nb_days}" min="0" max="1000000" step="1" size="4" style="text-align: center" /> days:</h1> + </form> + <ul> + %for item in inactives: + <li><a href="/feed/${item[0]["feed_id"]}">${item[0]["feed_title"]}</a> (${item[1].days} days)</li> + %endfor + </ul> + %else: + <p>No inactive feeds.<p> + %endif diff --git a/source/templates/index.html b/source/templates/index.html new file mode 100644 index 00000000..fea71154 --- /dev/null +++ b/source/templates/index.html @@ -0,0 +1,104 @@ +## index.html +<%inherit file="base.html"/> +<% +import utils +%> +<div class="right inner"> + <form method=get action="/search/"> + <input type="search" name="query" value="" placeholder="Search articles" maxlength=2048 autocomplete="on"> + </form> + <hr /> + <div class="nav_container">Your feeds (${nb_feeds}):<br /> + <% + html = "" + %> + %for feed in feeds: + <% + if mongo.nb_unread_articles(feed["feed_id"]) != 0: + # not readed articles are in bold + not_read_begin, not_read_end = "<b>", "</b>" + else: + not_read_begin, not_read_end = "", "" + html += """<div><a href="/#%s">%s</a> (<a href="/unread/%s" title="Unread article(s)">%s%s%s</a> / %s)</div>\n""" % \ + (feed["feed_id"], feed["feed_title"], feed["feed_id"], not_read_begin, \ + mongo.nb_unread_articles(feed["feed_id"]), not_read_end, mongo.nb_articles(feed["feed_id"])) + %> + %endfor + ${html} + </div> +</div> + +<div class="left inner"> + <div class="menu_container"> + %if feeds: + <a href="/management/"><img src="/img/management.png" title="Management" /></a> + <a href="/history/"><img src="/img/history.png" title="History" /></a> + + <a href="/favorites/"><img src="/img/heart-32x32.png" title="Your favorites (${nb_favorites})" /></a> + <a href="/notifications/"><img src="/img/email-follow.png" title="Active e-mail notifications (${nb_mail_notifications})" /></a> + + %if nb_unread_articles != 0: + <a href="/mark_as_read/"><img src="/img/mark-as-read.png" title="Mark articles as read" /></a> + <a href="/unread/"><img src="/img/unread.png" title="Unread article(s): ${nb_unread_articles}" /></a> + %endif + %endif + <a accesskey="F" href="/fetch/"><img src="/img/check-news.png" title="Check for news" /></a> + </div><br/> + <% + html = "" + %> + <% + for feed in feeds: + html += """<a name="%s"></a>\n""" % (feed["feed_id"],) + html += """<h2><a href="%s" rel="noreferrer" target="_blank">%s</a> + <a href="%s" rel="noreferrer" + target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n<br />""" % \ + (feed["site_link"], feed["feed_title"], \ + feed["feed_link"], feed["feed_image"]) + + # The main page display only 10 articles by feeds. + for article in mongo.get_articles(feed["feed_id"], limit=10): + if article["article_readed"] == False: + # not readed articles are in bold + not_read_begin, not_read_end = "<b>", "</b>" + else: + not_read_begin, not_read_end = "", "" + + # display a heart for faved articles + if article["article_like"] == True: + like = """ <img src="/img/heart.png" title="I like this article!" />""" + else: + like = "" + + # Descrition for the CSS ToolTips + article_content = utils.clear_string(article["article_content"]) + if article_content: + description = " ".join(article_content.split(' ')[:55]) + else: + description = "No description." + # Title of the article + article_title = article["article_title"] + if len(article_title) >= 80: + article_title = article_title[:80] + " ..." + + # a description line per article (date, title of the article and + # CSS description tooltips on mouse over) + html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \ + """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \ + (feed["feed_id"], article["article_id"], not_read_begin, \ + article_title, not_read_end, description) + like + "<br />\n" + html += "<br />\n" + + # some options for the current feed + html += """<a href="/articles/%s">All articles</a> """ % (feed["feed_id"],) + html += """<a href="/feed/%s">Feed summary</a> """ % (feed["feed_id"],) + if mongo.nb_unread_articles(feed["feed_id"]) != 0: + html += """ <a href="/mark_as_read/Feed_FromMainPage:%s">Mark all as read</a>""" % (feed["feed_id"],) + html += """ <a href="/unread/%s" title="Unread article(s)">Unread article(s) (%s)</a>""" % (feed["feed_id"], mongo.nb_unread_articles(feed["feed_id"])) + if feed["mail"] == "0": + html += """<br />\n<a href="/mail_notification/1:%s" title="By e-mail">Stay tuned</a>""" % (feed["feed_id"],) + else: + html += """<br />\n<a href="/mail_notification/0:%s" title="By e-mail">Stop staying tuned</a>""" % (feed["feed_id"],) + html += """<h4><a href="/#top">Top</a></h4>\n""" + %> + ${html} diff --git a/source/templates/management.html b/source/templates/management.html new file mode 100644 index 00000000..5bc38439 --- /dev/null +++ b/source/templates/management.html @@ -0,0 +1,55 @@ +## management.html +<%inherit file="base.html"/> +<div class="left inner"> + <h1>Add Feeds</h1> + <form method=get action="/add_feed/"> + <input type="url" name="url" placeholder="URL of a site" maxlength=2048 autocomplete="off"> + <input type="submit" value="OK"> + </form> + + %if feeds: + <h1>Delete Feeds</h1> + <form method=get action="/remove_feed/"> + <select name="feed_id"> + %for feed in feeds: + <option value="${feed['feed_id']}">${feed['feed_title']}</option> + %endfor + </select> + <input type="submit" value="OK"> + </form> + + <hr /> + + <h1>Facts</h1> + <ul> + <li>active e-mail notifications: <a href="/notifications/">${nb_mail_notifications}</a>;</li> + <li>you like <a href="/favorites/">${nb_favorites}</a> article(s);</li> + <li><a href="/statistics/">tag clouds</a>;</li> + <li><a href="/inactives/">inactive feeds</a>.</li> + </ul> + %endif + + <hr /> + + <h1>Database</h1> + <p>${nb_articles} article(s) are stored in the database with <a href="/unread/">${nb_unread_articles} unread article(s)</a>. + + <form method=get action="/fetch/"> + <input type="submit" value="Fetch all feeds"> + </form> + <form method=get action="/drop_base"> + <input type="submit" value="Delete all articles"> + </form> + + <hr /> + + <h1>Export articles</h1> + <form method=get action="/export/"> + <select name="export_method"> + <option value="export_html" selected='selected'>HTML (simple Webzine)</option> + <option value="export_epub">ePub</option> + <option value="export_pdf">PDF</option> + <option value="export_txt">Text</option> + </select> + <input type="submit" value="Export"> + </form> diff --git a/source/templates/notifications.html b/source/templates/notifications.html new file mode 100644 index 00000000..35aa72a6 --- /dev/null +++ b/source/templates/notifications.html @@ -0,0 +1,14 @@ +## article.html +<%inherit file="base.html"/> +<div class="left inner"> + %if feeds != []: + <h1>You are receiving e-mails for the following feeds:</h1> + <ul> + %for feed in feeds: + <li><a href="/feed/${feed['feed_id']}">${feed['feed_title']}</a> - <a href="/mail_notification/0:${feed['feed_id']}">Stop</a></li> + %endfor + </ul> + %else: + <p>No active notifications.<p> + %endif + <p>Notifications are sent to: <a href="mail:${mail_to}">${mail_to}</a></p> diff --git a/source/templates/plain_text.html b/source/templates/plain_text.html new file mode 100644 index 00000000..44b7da91 --- /dev/null +++ b/source/templates/plain_text.html @@ -0,0 +1,5 @@ +## plain_text.html +<%inherit file="base.html"/> +<div class="left inner"> + <h1><i>${article_title}</i> from <a href="/articles/%s">${feed_title}</a></h1><br /> + ${description}
\ No newline at end of file diff --git a/source/templates/search.html b/source/templates/search.html new file mode 100644 index 00000000..e4a4ac9c --- /dev/null +++ b/source/templates/search.html @@ -0,0 +1,56 @@ +## search.html +<%inherit file="base.html"/> +<% +import re +import utils +%> +<div class="left inner"> +<h1>Articles containing the string <i>${query}</i></h1> +<br /> +<% + html = "" +%> +%if feed_id is None: + %for feed in feeds: + <% + new_feed_section = True + for article in mongo.get_articles(feed["feed_id"]): + article_content = utils.clear_string(article["article_content"]) + if not article_content: + utils.clear_string(article["article_title"]) + if wordre.findall(article_content) != []: + if new_feed_section is True: + new_feed_section = False + html += """<h2><a href="/articles/%s" rel="noreferrer" target="_blank">%s</a><a href="%s" rel="noreferrer" target="_blank"><img src="%s" width="28" height="28" /></a></h2>\n""" % \ + (feed["feed_id"], feed["feed_title"], feed["feed_link"], feed["feed_image"]) + + if article["article_readed"] == False: + # not readed articles are in bold + not_read_begin, not_read_end = "<b>", "</b>" + else: + not_read_begin, not_read_end = "", "" + + # display a heart for faved articles + if article["article_like"] == True: + like = """ <img src="/img/heart.png" title="I like this article!" />""" + else: + like = "" + + # descrition for the CSS ToolTips + article_content = utils.clear_string(article["article_content"]) + if article_content: + description = " ".join(article_content[:500].split(' ')[:-1]) + else: + description = "No description." + + # a description line per article (date, title of the article and + # CSS description tooltips on mouse over) + html += article["article_date"].strftime('%Y-%m-%d %H:%M') + " - " + \ + """<a class="tooltip" href="/article/%s:%s" rel="noreferrer" target="_blank">%s%s%s<span class="classic">%s</span></a>""" % \ + (feed["feed_id"], article["article_id"], not_read_begin, \ + article["article_title"][:150], not_read_end, description) + like + "<br />\n" + %> + %endfor +%endif + ${html} +
\ No newline at end of file diff --git a/source/templates/statistics.html b/source/templates/statistics.html new file mode 100644 index 00000000..5dfcbfa8 --- /dev/null +++ b/source/templates/statistics.html @@ -0,0 +1,14 @@ +## statistics.html +<%inherit file="base.html"/> +<div class="left inner"> + %if articles: + <h1>Statistics</h1> + <h3>Tag cloud</h3> + <form method=get action="/statistics/"> + Minimum size of a word: + <input type="number" name="word_size" value="${word_size}" min="2" max="15" step="1" size="2" /> + </form> + <div style="width: 35%; overflow:hidden; text-align: justify"> + ${tag_cloud} + </div> + %endif
\ No newline at end of file diff --git a/source/testclusters.py b/source/testclusters.py deleted file mode 100644 index 728e9c1b..00000000 --- a/source/testclusters.py +++ /dev/null @@ -1,24 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -import clusters - -K = 7 - -blognames,words,data = clusters.readfile("blogdata1.txt") - -coords = clusters.scaledown(data) - -print "Generating clusters..." -kclust = clusters.kcluster(data, k=K, distance=clusters.pearson) -print -print "Clusters:" -for i in range(K): - print "Cluster" + str(i) - print ", ".join([blognames[r] for r in kclust[i]]) - print - - - - -clusters.draw2d(coords,blognames,jpeg='mds2d.jpg') diff --git a/source/utils.py b/source/utils.py index 7681fea7..b1392b0e 100755 --- a/source/utils.py +++ b/source/utils.py @@ -36,6 +36,7 @@ __license__ = "GPLv3" import os import re +import glob import operator import urllib.parse import calendar @@ -139,15 +140,28 @@ def normalize_filename(name): file_name = strip_accents(file_name, "utf-8") return os.path.normpath(file_name) +def load_stop_words(): + """ + Load the stop words and return them in a list. + """ + stop_words_lists = glob.glob('./var/stop_words/*.txt') + stop_words = [] + + for stop_wods_list in stop_words_lists: + with open(stop_wods_list, "r") as stop_wods_file: + stop_words += stop_wods_file.read().split(";") + return stop_words + def top_words(articles, n=10, size=5): """ Return the n most frequent words in a list. """ + stop_words = load_stop_words() words = Counter() wordre = re.compile(r'\b\w{%s,}\b' % size, re.I) for article in articles: - for word in wordre.findall(clear_string(article["article_content"])): - words[word.lower()] += 1 + for word in [elem.lower() for elem in wordre.findall(clear_string(article["article_content"])) if elem.lower() not in stop_words]: + words[word] += 1 return words.most_common(n) def tag_cloud(tags, query="word_count"): diff --git a/source/var/english-stop-words.txt b/source/var/english-stop-words.txt new file mode 100644 index 00000000..497a1f96 --- /dev/null +++ b/source/var/english-stop-words.txt @@ -0,0 +1,311 @@ + + | An English stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | Many of the forms below are quite rare (e.g. "yourselves") but included for + | completeness. + + | PRONOUNS FORMS + | 1st person sing + +i | subject, always in upper case of course + +me | object +my | possessive adjective + | the possessive pronoun `mine' is best suppressed, because of the + | sense of coal-mine etc. +myself | reflexive + | 1st person plural +we | subject + +| us | object + | care is required here because US = United States. It is usually + | safe to remove it if it is in lower case. +our | possessive adjective +ours | possessive pronoun +ourselves | reflexive + | second person (archaic `thou' forms not included) +you | subject and object +your | possessive adjective +yours | possessive pronoun +yourself | reflexive (singular) +yourselves | reflexive (plural) + | third person singular +he | subject +him | object +his | possessive adjective and pronoun +himself | reflexive + +she | subject +her | object and possessive adjective +hers | possessive pronoun +herself | reflexive + +it | subject and object +its | possessive adjective +itself | reflexive + | third person plural +they | subject +them | object +their | possessive adjective +theirs | possessive pronoun +themselves | reflexive + | other forms (demonstratives, interrogatives) +what +which +who +whom +this +that +these +those + + | VERB FORMS (using F.R. Palmer's nomenclature) + | BE +am | 1st person, present +is | -s form (3rd person, present) +are | present +was | 1st person, past +were | past +be | infinitive +been | past participle +being | -ing form + | HAVE +have | simple +has | -s form +had | past +having | -ing form + | DO +do | simple +does | -s form +did | past +doing | -ing form + + | The forms below are, I believe, best omitted, because of the significant + | homonym forms: + + | He made a WILL + | old tin CAN + | merry month of MAY + | a smell of MUST + | fight the good fight with all thy MIGHT + + | would, could, should, ought might however be included + + | | AUXILIARIES + | | WILL + |will + +would + + | | SHALL + |shall + +should + + | | CAN + |can + +could + + | | MAY + |may + |might + | | MUST + |must + | | OUGHT + +ought + + | COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing + | pronoun + verb + +i'm +you're +he's +she's +it's +we're +they're +i've +you've +we've +they've +i'd +you'd +he'd +she'd +we'd +they'd +i'll +you'll +he'll +she'll +we'll +they'll + + | verb + negation + +isn't +aren't +wasn't +weren't +hasn't +haven't +hadn't +doesn't +don't +didn't + + | auxiliary + negation + +won't +wouldn't +shan't +shouldn't +can't +cannot +couldn't +mustn't + + | miscellaneous forms + +let's +that's +who's +what's +here's +there's +when's +where's +why's +how's + + | rarer forms + + | daren't needn't + + | doubtful forms + + | oughtn't mightn't + + | ARTICLES +a +an +the + + | THE REST (Overlap among prepositions, conjunctions, adverbs etc is so + | high, that classification is pointless.) +and +but +if +or +because +as +until +while + +of +at +by +for +with +about +against +between +into +through +during +before +after +above +below +to +from +up +down +in +out +on +off +over +under + +again +further +then +once + +here +there +when +where +why +how + +all +any +both +each +few +more +most +other +some +such + +no +nor +not +only +own +same +so +than +too +very + + | Just for the record, the following words are among the commonest in English + + | one + | every + | least + | less + | many + | now + | ever + | never + | say + | says + | said + | also + | get + | go + | goes + | just + | made + | make + | put + | see + | seen + | whether + | like + | well + | back + | even + | still + | way + | take + | since + | another + | however + | two + | three + | four + | five + | first + | second + | new + | old + | high + | long
\ No newline at end of file diff --git a/source/var/french-stop-words.txt b/source/var/french-stop-words.txt new file mode 100644 index 00000000..08a2f5d7 --- /dev/null +++ b/source/var/french-stop-words.txt @@ -0,0 +1,176 @@ + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +celà | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself
\ No newline at end of file diff --git a/source/var/generate-top-words-list.sh b/source/var/generate-top-words-list.sh new file mode 100755 index 00000000..2a87e147 --- /dev/null +++ b/source/var/generate-top-words-list.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +if test $# != 2 ; then + echo No input files given 1>&2 + exit 1 +fi + +awk 'BEGIN{FS = " "} { if ($1 ~ /^[A-Za-z]/) {print $1}}' $1 | sort | tr '\n' ';' > $2
\ No newline at end of file diff --git a/source/var/stop_words/english-stop-words-list.txt b/source/var/stop_words/english-stop-words-list.txt new file mode 100644 index 00000000..caa26aaf --- /dev/null +++ b/source/var/stop_words/english-stop-words-list.txt @@ -0,0 +1 @@ +a;about;above;after;again;against;all;am;an;and;any;are;aren't;as;at;be;because;been;before;being;below;between;both;but;by;cannot;can't;could;couldn't;did;didn't;do;does;doesn't;doing;don't;down;during;each;few;for;from;further;had;hadn't;has;hasn't;have;haven't;having;he;he'd;he'll;her;here;here's;hers;herself;he's;him;himself;his;how;how's;i;i'd;if;i'll;i'm;in;into;is;isn't;it;its;it's;itself;i've;let's;me;more;most;mustn't;my;myself;no;nor;not;of;off;on;once;only;or;other;ought;our;ours;ourselves;out;over;own;same;shan't;she;she'd;she'll;she's;should;shouldn't;slashdot;so;some;such;than;that;that's;the;their;theirs;them;themselves;then;there;there's;these;they;they'd;they'll;they're;they've;this;those;through;to;too;under;until;up;very;was;wasn't;we;we'd;we'll;were;we're;weren't;we've;what;what's;when;when's;where;where's;which;while;who;whom;who's;why;why's;with;won't;would;wouldn't;writes;you;you'd;you'll;your;you're;yours;yourself;yourselves;you've; diff --git a/source/var/stop_words/french-stop-words-list.txt b/source/var/stop_words/french-stop-words-list.txt new file mode 100644 index 00000000..a6a36c79 --- /dev/null +++ b/source/var/stop_words/french-stop-words-list.txt @@ -0,0 +1 @@ +à;ai;aie;aient;aies;ait;as;au;aura;aurai;auraient;aurais;aurait;auras;aurez;auriez;aurions;aurons;auront;aux;avaient;avais;avait;avec;avez;aviez;avions;avons;ayant;ayez;ayons;c;ce;ceci;celà;ces;cet;cette;d;dans;de;des;du;elle;en;es;est;et;étaient;étais;était;étant;été;étée;étées;êtes;étés;étiez;étions;eu;eue;eues;eûmes;eurent;eus;eusse;eussent;eusses;eussiez;eussions;eut;eût;eûtes;eux;fûmes;furent;fus;fusse;fussent;fusses;fussiez;fussions;fut;fût;fûtes;ici;il;ils;j;je;l;la;le;les;leur;leurs;lui;m;ma;mais;me;même;mes;moi;mon;n;ne;nos;notre;nous;on;ont;ou;par;pas;pour;qu;que;quel;quelle;quelles;quels;qui;s;sa;sans;se;sera;serai;seraient;serais;serait;seras;serez;seriez;serions;serons;seront;ses;soi;soient;sois;soit;sommes;son;sont;soyez;soyons;suis;sur;t;ta;te;tes;toi;ton;tu;toujours;un;une;vos;votre;vous;y; |