aboutsummaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rwxr-xr-xsource/auth.py269
-rw-r--r--source/binarytree.py177
-rw-r--r--source/conf.py57
-rw-r--r--source/export.py274
-rwxr-xr-xsource/feedgetter.py231
-rwxr-xr-xsource/log.py67
-rw-r--r--source/mongodb.py283
-rwxr-xr-xsource/pyAggr3g470r143
-rwxr-xr-xsource/pyAggr3g470r.py715
-rw-r--r--source/search.py129
-rw-r--r--source/testbinarytree.py45
-rwxr-xr-xsource/utils.py317
12 files changed, 0 insertions, 2707 deletions
diff --git a/source/auth.py b/source/auth.py
deleted file mode 100755
index 82c3a440..00000000
--- a/source/auth.py
+++ /dev/null
@@ -1,269 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/
-#
-# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.3 $"
-__date__ = "$Date: 2012/10/12 $"
-__revision__ = "$Date: 2013/01/14 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "GPLv3"
-
-#
-# Form based authentication for CherryPy. Requires the
-# Session tool to be loaded.
-#
-
-import cherrypy
-import hashlib
-
-import log
-
-SESSION_KEY = '_cp_username'
-
-import csv
-class excel_french(csv.Dialect):
- delimiter = ';'
- quotechar = '"'
- doublequote = True
- skipinitialspace = False
- lineterminator = '\n'
- quoting = csv.QUOTE_MINIMAL
-
-csv.register_dialect('excel_french', excel_french)
-
-def change_username(username, new_username, password_file='./var/password'):
- """
- Change the password corresponding to username.
- """
- users_list = []
- result = False
- with open(password_file, 'r') as csv_readfile_read:
- cr = csv.reader(csv_readfile_read, 'excel_french')
- users_list = [elem for elem in cr]
- with open(password_file, 'w') as csv_file_write:
- cw = csv.writer(csv_file_write, 'excel_french')
- for user in users_list:
- if user[0] == username:
- cw.writerow([new_username, user[1]])
- result = True
- else:
- cw.writerow(user)
- return result
-
-def change_password(username, new_password, password_file='./var/password'):
- """
- Change the password corresponding to username.
- """
- users_list = []
- result = False
- with open(password_file, 'r') as csv_readfile_read:
- cr = csv.reader(csv_readfile_read, 'excel_french')
- users_list = [elem for elem in cr]
- with open(password_file, 'w') as csv_file_write:
- cw = csv.writer(csv_file_write, 'excel_french')
- for user in users_list:
- if user[0] == username:
- m = hashlib.sha1()
- m.update(new_password.encode())
- cw.writerow([user[0], m.hexdigest()])
- result = True
- else:
- cw.writerow(user)
- return result
-
-def check_credentials(username, password, password_file='./var/password'):
- """
- Verifies credentials for username and password.
- Returns None on success or a string describing the error on failure.
- """
- USERS = {}
- cr = csv.reader(open(password_file, "r"), 'excel_french')
- for row in cr:
- USERS[row[0]] = row[1]
-
- m = hashlib.sha1()
- m.update(password.encode())
- if username in list(USERS.keys()) and USERS[username] == m.hexdigest():
- return None
- else:
- return "Incorrect username or password."
- # An example implementation which uses an ORM could be:
- # u = User.get(username)
- # if u is None:
- # return u"Username %s is unknown to me." % username
- # if u.password != md5.new(password).hexdigest():
- # return u"Incorrect password"
-
-def check_auth(*args, **kwargs):
- """
- A tool that looks in config for 'auth.require'. If found and it
- is not None, a login is required and the entry is evaluated as a list of
- conditions that the user must fulfill.
- """
- conditions = cherrypy.request.config.get('auth.require', None)
- if conditions is not None:
- username = cherrypy.session.get(SESSION_KEY)
- if username:
- cherrypy.request.login = username
- for condition in conditions:
- # A condition is just a callable that returns true or false
- if not condition():
- raise cherrypy.HTTPRedirect("/auth/login")
- else:
- raise cherrypy.HTTPRedirect("/auth/login")
-
-cherrypy.tools.auth = cherrypy.Tool('before_handler', check_auth)
-
-def require(*conditions):
- """
- A decorator that appends conditions to the auth.require config
- variable.
- """
- def decorate(f):
- if not hasattr(f, '_cp_config'):
- f._cp_config = dict()
- if 'auth.require' not in f._cp_config:
- f._cp_config['auth.require'] = []
- f._cp_config['auth.require'].extend(conditions)
- return f
- return decorate
-
-
-# Conditions are callables that return True
-# if the user fulfills the conditions they define, False otherwise
-#
-# They can access the current username as cherrypy.request.login
-#
-# Define those at will however suits the application.
-
-def member_of(groupname):
- def check():
- # replace with actual check if <username> is in <groupname>
- return cherrypy.request.login == 'joe' and groupname == 'admin'
- return check
-
-def name_is(reqd_username):
- return lambda: reqd_username == cherrypy.request.login
-
-# These might be handy
-
-def any_of(*conditions):
- """
- Returns True if any of the conditions match.
- """
- def check():
- for c in conditions:
- if c():
- return True
- return False
- return check
-
-# By default all conditions are required, but this might still be
-# needed if you want to use it inside of an any_of(...) condition
-def all_of(*conditions):
- """
- Returns True if all of the conditions match.
- """
- def check():
- for c in conditions:
- if not c():
- return False
- return True
- return check
-
-
-class AuthController(object):
- """
- This class provides login and logout actions.
- """
- def __init__(self):
- self.logger = log.Log()
- self.username = ""
-
- def on_login(self, username):
- """
- Called on successful login.
- """
- self.username = username
- self.logger.info(username + ' logged in.')
-
- def on_logout(self, username):
- """
- Called on logout.
- """
- self.logger.info(username + ' logged out.')
- self.username = ""
-
- def get_loginform(self, username, msg="Enter login information", from_page="/"):
- """
- Login page.
- """
- return """<!DOCTYPE html>\n<html>
- <head>
- <meta charset="utf-8" />
- <title>pyAggr3g470r</title>
- <link rel="stylesheet" href="/css/log.css" />
- </head>
- <body>
- <div>
- <div id="logform">
- <img src="/static/img/tuxrss.png" alt="pyAggr3g470r" />
- <form method="post" action="/auth/login">
- <input type="hidden" name="from_page" value="%(from_page)s" />
- %(msg)s<br />
- <input type="text" name="username" value="%(username)s" placeholder="Username" autofocus="autofocus" /><br />
- <input type="password" name="password" placeholder="Password" /><br />
- <input type="submit" value="Log in" />
- </form>
- </div><!-- end #main -->
- </div><!-- end #center -->
- </body>
-</html>""" % locals()
-
- @cherrypy.expose
- def login(self, username=None, password=None, from_page="/"):
- """
- Open a session for an authenticated user.
- """
- if username is None or password is None:
- return self.get_loginform("", from_page=from_page)
-
- error_msg = check_credentials(username, password)
- if error_msg:
- self.logger.info(error_msg)
- return self.get_loginform(username, error_msg, from_page)
- else:
- cherrypy.session[SESSION_KEY] = cherrypy.request.login = username
- self.on_login(username)
- raise cherrypy.HTTPRedirect(from_page or "/")
-
- @cherrypy.expose
- def logout(self, from_page="/"):
- """
- Cloase a session.
- """
- sess = cherrypy.session
- username = sess.get(SESSION_KEY, None)
- sess[SESSION_KEY] = None
- if username:
- cherrypy.request.login = None
- self.on_logout(username)
- raise cherrypy.HTTPRedirect(from_page or "/") \ No newline at end of file
diff --git a/source/binarytree.py b/source/binarytree.py
deleted file mode 100644
index a9294251..00000000
--- a/source/binarytree.py
+++ /dev/null
@@ -1,177 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-"""
-A binary ordered tree implementation.
-"""
-
-class Node(object):
- """
- Represents a node.
- """
- def __init__(self, data):
- """
- Initialization.
- """
- self.left = None
- self.right = None
- self.data = data
-
-class OrderedBinaryTree(object):
- """
- Represents a binary ordered .
- """
- def __init__(self, root=None):
- """
- Initializes the root member.
- """
- self.root = root
-
- def addNode(self, data):
- """
- Creates a new node and returns it.
- """
- return Node(data)
-
- def insert(self, root, data):
- """
- Inserts a new data.
- """
- if root == None:
- # it there isn't any data
- # adds it and returns
- return self.addNode(data)
- else:
- # enters into the
- if data['article_date'] <= root.data['article_date']:
- # if the data is less than the stored one
- # goes into the left-sub-
- root.left = self.insert(root.left, data)
- else:
- # processes the right-sub-
- root.right = self.insert(root.right, data)
- return root
-
- def lookup(self, root, target):
- """
- Looks for a value into the .
- """
- if root == None:
- return 0
- else:
- # if it has found it...
- if target == root.data:
- return 1
- else:
- if target['article_date'] < root.data['article_date']:
- # left side
- return self.lookup(root.left, target)
- else:
- # right side
- return self.lookup(root.right, target)
-
- def minValue(self, root):
- """
- Goes down into the left
- arm and returns the last value.
- """
- while(root.left != None):
- root = root.left
- return root.data
-
- def maxValue(self, root):
- """
- Goes down into the right
- arm and returns the last value.
- """
- while(root.right != None):
- root = root.right
- return root.data
-
- def maxDepth(self, root):
- """
- Return the maximum depth.
- """
- if root == None:
- return 0
- else:
- # computes the two depths
- ldepth = self.maxDepth(root.left)
- rdepth = self.maxDepth(root.right)
- # returns the appropriate depth
- return max(ldepth, rdepth) + 1
-
- def size(self, root):
- if root == None:
- return 0
- else:
- return self.size(root.left) + 1 + self.size(root.right)
-
- def pre_order_traversal(self, root, result=[]):
- """
- Depth-first. Pre-order traversal.
- """
- if root == None:
- pass
- else:
- result.append(root.data)
- self.pre_order_traversal(root.left, result)
- self.pre_order_traversal(root.right, result)
- return result
-
- def in_order_traversal(self, root, result=[]):
- """
- Depth-first. In-order traversal.
- """
- if root == None:
- pass
- else:
- self.in_order_traversal(root.left, result)
- result.append(root.data)
- self.in_order_traversal(root.right, result)
- return result
-
- def post_order_traversal(self, root, result=[]):
- """
- Depth-first. Post-order traversal.
- """
- if root == None:
- pass
- else:
- self.post_order_traversal(root.left, result)
- self.post_order_traversal(root.right, result)
- result.append(root.data)
- return result
-
- def __str__(self):
- """
- Pretty display.
- """
- return ", ".join([article["article_title"] for article in \
- self.in_order_traversal(self.root)])
-
-if __name__ == "__main__":
- # Point of entry in execution mode.
- # create the tree
- tree = OrderedBinaryTree()
- # add the root node
- root = tree.addNode(0)
- # ask the user to insert values
- for i in range(0, 5):
- data = int(input("insert the node value nr %d: " % i))
- # insert values
- tree.insert(root, data)
-
- tree.printTree(root)
- print()
- tree.printRevTree(root)
- print()
- data = int(input("Insert a value to find: "))
- if tree.lookup(root, data):
- print("found")
- else:
- print("not found")
-
- print(tree.minValue(root))
- print(tree.maxDepth(root))
- print(tree.size(root)) \ No newline at end of file
diff --git a/source/conf.py b/source/conf.py
deleted file mode 100644
index 1b262927..00000000
--- a/source/conf.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/
-#
-# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.2 $"
-__date__ = "$Date: 2012/04/22 $"
-__revision__ = "$Date: 2013/08/15 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "GPLv3"
-
-
-import os
-import configparser
-# load the configuration
-config = configparser.SafeConfigParser()
-try:
- config.read("./cfg/pyAggr3g470r.cfg")
-except:
- config.read("./cfg/pyAggr3g470r.cfg-sample")
-path = os.path.abspath(".")
-
-MONGODB_ADDRESS = config.get('MongoDB', 'address')
-MONGODB_PORT = int(config.get('MongoDB', 'port'))
-MONGODB_DBNAME = config.get('MongoDB', 'dbname')
-MONGODB_USER = config.get('MongoDB', 'user')
-MONGODB_PASSWORD = config.get('MongoDB', 'password')
-
-HTTP_PROXY = config.get('feedparser', 'http_proxy')
-USER_AGENT = config.get('feedparser', 'user_agent')
-FEED_LIST = config.get('feedparser', 'feed_list')
-
-MAIL_ENABLED = bool(int(config.get('mail','enabled')))
-mail_from = config.get('mail','mail_from')
-mail_to = config.get('mail','mail_to')
-smtp_server = config.get('mail','smtp')
-username = config.get('mail','username')
-password = config.get('mail','password')
-
-DIASPORA_POD = config.get('misc', 'diaspora_pod') \ No newline at end of file
diff --git a/source/export.py b/source/export.py
deleted file mode 100644
index 5e06aea1..00000000
--- a/source/export.py
+++ /dev/null
@@ -1,274 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/
-#
-# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.4 $"
-__date__ = "$Date: 2011/10/24 $"
-__revision__ = "$Date: 2013/03/05 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "GPLv3"
-
-#
-# This file contains the export functions of pyAggr3g470r. Indeed
-# it is possible to export the database of articles in different formats:
-# - simple HTML webzine;
-# - text file;
-# - ePub file;
-# - PDF file.
-#
-
-import os
-import time
-
-import conf
-import utils
-
-def HTML_HEADER(title="pyAggr3g470r", css="./style.css"):
- return """<!DOCTYPE html>
-<html lang="en-US">
-<head>
-<title>%s</title>
-<meta charset="utf-8"/>
-<link rel="stylesheet" href="%s" />
-</head>
-<body>""" % (title, css)
-
-HTML_FOOTER = """<hr />
-<p>This archive has been generated with
-<a href="https://bitbucket.org/cedricbonhomme/pyaggr3g470r/">pyAggr3g470r</a>.
-A software under GPLv3 license.
-You are welcome to copy, modify or redistribute the source code according to the
-<a href="http://www.gnu.org/licenses/gpl-3.0.txt">GPLv3</a> license.</p>
-</body>
-</html>
-"""
-
-CSS = """body {
- font:normal medium 'Gill Sans','Gill Sans MT',Verdana,sans-serif;
- margin:1.20em auto;
- width:80%;
- line-height:1.75;
-}
-blockquote {
- font-size:small;
- line-height:2.153846;
- margin:2.153846em 0;
- padding:0;font-style:oblique;
- border-left:1px dotted;
- margin-left:2.153846em;
- padding-left:2.153846em;
-}
-blockquote p{
- margin:2.153846em 0;
-}
-p+br {
- display:none;
-}
-h1 {
-font-size:large;
-}
-h2,h3 {
- font-size:medium;
-}
-hr {
- border-style:dotted;
- height:1px;
- border-width: 1px 0 0 0;
- margin:1.45em 0 1.4em;
- padding:0;
-}
-a {
- text-decoration:none;
- color:#00008B;
-}
-#footer {
- clear:both;
- text-align:center;
- font-size:small;
-}
-img {
- border:0;
-}
-.horizontal,.simple li {
- margin:0;
- padding:0;
- list-style:none;
- display:inline
-}
-.simple li:before {
- content:"+ ";
-}
-.simple > li:first-child:before {
- content:"";
-}
-.author {
- text-decoration:none;
- display:block;
- float:right;
- margin-left:2em;
- font-size:small;
-}
-.content {
- margin:1.00em 1.00em;
-}"""
-
-def export_html(mongo_db):
- """
- Export the articles given in parameter in a simple Webzine.
- """
- nb_articles = format(mongo_db.nb_articles(), ",d")
- feeds = mongo_db.get_all_feeds()
- index = HTML_HEADER("News archive")
- index += "<h1>List of feeds</h1>\n"
- index += """<p>%s articles.</p>\n<ul>\n""" % (nb_articles,)
- for feed in feeds:
- # creates a folder for each stream
- feed_folder = conf.path + "/var/export/webzine/" + \
- utils.normalize_filename(feed["feed_id"])
- try:
- os.makedirs(feed_folder)
- except OSError:
- # directories already exists (not a problem)
- pass
-
- index += """ <li><a href="%s">%s</a></li>\n""" % \
- (feed["feed_id"], feed["feed_title"])
-
- posts = HTML_HEADER(feed["feed_title"], "../style.css")
- posts += """<h1>Articles of the feed <a href="%s">%s</a></h1>\n""" % (feed["site_link"], feed["feed_title"])
- posts += """<p>%s articles.</p>\n""" % (format(mongo_db.nb_articles(feed["feed_id"]), ",d"),)
-
- for article in mongo_db.get_articles(feed_id=feed["feed_id"]):
-
- post_file_name = os.path.normpath(feed_folder + "/" + article["article_id"] + ".html")
- feed_index = os.path.normpath(feed_folder + "/index.html")
-
- posts += article["article_date"].ctime() + " - " + \
- """<a href="./%s.html">%s</a>""" % \
- (article["article_id"], article["article_title"][:150]) + "<br />\n"
-
- a_post = HTML_HEADER(article["article_title"], "../style.css")
- a_post += '<div style="width:60%; overflow:hidden; text-align:justify; margin:0 auto">\n'
- a_post += """<h1><a href="%s">%s</a></h1>\n<br />""" % \
- (article["article_link"], article["article_title"])
- a_post += article["article_content"]
- a_post += "</div>\n<hr />\n"
- a_post += """<br />\n<a href="%s">Complete story</a>\n<br />\n""" % (article["article_link"],)
- a_post += HTML_FOOTER
-
- with open(post_file_name, "w") as f:
- f.write(a_post)
-
- posts += HTML_FOOTER
- with open(feed_index, "w") as f:
- f.write(posts)
-
- index += "</ul>\n"
- index += "<p>" + time.strftime("Generated on %d %b %Y at %H:%M.") + "</p>\n"
- index += HTML_FOOTER
- with open(conf.path + "/var/export/webzine/" + "index.html", "w") as f:
- f.write(index)
- with open(conf.path + "/var/export/webzine/" + "style.css", "w") as f:
- f.write(CSS)
-
-def export_txt(mongo_db):
- """
- Export the articles given in parameter in text files.
- """
- feeds = mongo_db.get_all_feeds()
- for feed in feeds:
- # creates folder for each stream
- folder = conf.path + "/var/export/txt/" + \
- utils.normalize_filename(feed["feed_title"].strip().replace(':', '').lower())
- try:
- os.makedirs(folder)
- except OSError:
- # directories already exists (not a problem)
- pass
-
- for article in mongo_db.get_articles(feed_id=feed["feed_id"]):
- name = article["article_date"].ctime().strip().replace(' ', '_')
- name = os.path.normpath(folder + "/" + name + ".txt")
-
- content = "Title: " + article["article_title"] + "\n\n\n"
- content += utils.clear_string(article["article_content"])
-
- with open(name, "w") as f:
- f.write(content)
-
-def export_epub(mongo_db):
- """
- Export the articles given in parameter in ePub files.
- """
- from epub import ez_epub
- feeds = mongo_db.get_all_feeds()
- for feed in feeds:
- # creates folder for each stream
- folder = conf.path + "/var/export/epub/" + \
- utils.normalize_filename(feed["feed_title"].strip().replace(':', '').lower().encode('utf-8'))
- try:
- os.makedirs(folder)
- except OSError:
- # directories already exists (not a problem)
- pass
-
- for article in mongo_db.get_articles(feed_id=feed["feed_id"]):
- name = article["article_date"].ctime().strip().replace(' ', '_')
- name = os.path.normpath(folder + "/" + name + ".epub")
-
- section = ez_epub.Section()
- section.title = article["article_title"]
- section.paragraphs = [utils.clear_string(article["article_content"])]
- ez_epub.makeBook(article["article_title"], [feed["feed_title"]], [section], \
- name, lang='en-US', cover=None)
-
-def export_pdf(feeds):
- """
- Export the articles given in parameter in PDF files.
- """
- from xhtml2pdf import pisa
- import io as StringIO
- for feed in list(feeds.values()):
- # creates folder for each stream
- folder = utils.path + "/var/export/pdf/" + \
- utils.normalize_filename(feed.feed_title.strip().replace(':', '').lower())
- try:
- os.makedirs(folder)
- except OSError:
- # directories already exists (not a problem)
- pass
-
- for article in list(feed.articles.values()):
- name = article.article_date.strip().replace(' ', '_')
- name = os.path.normpath(folder + "/" + name + ".pdf")
-
- content = HTML_HEADER(article.article_title)
- content += '\n<div style="width: 50%; overflow:hidden; text-align: justify; margin:0 auto">\n'
- content += """<h1><a href="%s">%s</a></h1><br />""" % \
- (article.article_link, article.article_title)
- content += article.article_description
- content += "</div>\n<hr />\n"
- content += HTML_FOOTER
-
- try:
- pdf = pisa.CreatePDF(StringIO.StringIO(content), file(name, "wb"))
- except:
- pass
diff --git a/source/feedgetter.py b/source/feedgetter.py
deleted file mode 100755
index ce1cba1b..00000000
--- a/source/feedgetter.py
+++ /dev/null
@@ -1,231 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/
-#
-# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 1.8 $"
-__date__ = "$Date: 2010/09/02 $"
-__revision__ = "$Date: 2013/08/15 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "GPLv3"
-
-import hashlib
-import threading
-import urllib.request
-import feedparser
-from bs4 import BeautifulSoup
-from datetime import datetime
-from contextlib import contextmanager
-
-import conf
-import search
-import utils
-import mongodb
-
-import log
-pyaggr3g470r_log = log.Log()
-
-list_of_threads = []
-
-@contextmanager
-def opened_w_error(filename, mode="r"):
- try:
- f = open(filename, mode)
- except IOError as err:
- yield None, err
- else:
- try:
- yield f, None
- finally:
- f.close()
-
-class FeedGetter(object):
- """
- This class is in charge of retrieving feeds listed in ./var/feed.lst.
- This class uses feedparser module from Mark Pilgrim.
- For each feed a new thread is launched.
- """
- def __init__(self):
- """
- Initializes the database connection.
- """
- # MongoDB connections
- self.articles = mongodb.Articles(conf.MONGODB_ADDRESS, conf.MONGODB_PORT, \
- conf.MONGODB_DBNAME, conf.MONGODB_USER, conf.MONGODB_PASSWORD)
- if conf.HTTP_PROXY == "":
- self.proxy = urllib.request.ProxyHandler({})
- else:
- self.proxy = urllib.request.ProxyHandler({"http" : conf.HTTP_PROXY})
- feedparser.USER_AGENT = conf.USER_AGENT
-
- def retrieve_feed(self, feed_url=None, feed_original=None):
- """
- Parse the file 'feeds.lst' and launch a thread for each RSS feed.
- """
- if feed_url != None:
- self.process(feed_url, feed_original)
- else:
- with opened_w_error(conf.FEED_LIST) as (f, err):
- if err:
- pyaggr3g470r_log.error("List of feeds not found.")
- else:
- for a_feed in f:
- # test if the URL is well formed
- for url_regexp in utils.url_finders:
- if url_regexp.match(a_feed):
- the_good_url = url_regexp.match(a_feed).group(0).replace("\n", "")
- try:
- # launch a new thread for the RSS feed
- thread = threading.Thread(None, self.process, \
- None, (the_good_url,))
- thread.start()
- list_of_threads.append(thread)
- except:
- pass
- break
-
- # wait for all threads are done
- for th in list_of_threads:
- th.join()
-
- def process(self, the_good_url, feed_original=None):
- """Request the URL
-
- Executed in a thread.
- """
- if utils.open_url(the_good_url)[0] == True:
- # if ressource is available add the articles in the base.
- self.add_into_database(the_good_url, feed_original)
-
- def add_into_database(self, feed_link, feed_original=None):
- """
- Add the articles of the feed 'a_feed' in the database.
- """
- a_feed = feedparser.parse(feed_link, handlers = [self.proxy])
- if a_feed['entries'] == []:
- return
- try:
- feed_image = a_feed.feed.image.href
- except:
- feed_image = "/img/feed-icon-28x28.png"
-
- if feed_original != None:
- feed_link = feed_original
-
- sha1_hash = hashlib.sha1()
- sha1_hash.update(feed_link.encode('utf-8'))
- feed_id = sha1_hash.hexdigest()
-
- feed = self.articles.get_feed(feed_id)
- if None == feed:
- collection_dic = {"feed_id": feed_id, \
- "type": 0, \
- "feed_image": feed_image, \
- "feed_title": utils.clear_string(a_feed.feed.title), \
- "feed_link": feed_link, \
- "site_link": a_feed.feed.link, \
- "mail": False \
- }
- self.articles.add_collection(collection_dic)
- feed = self.articles.get_feed(feed_id)
-
- articles = []
- for article in a_feed['entries']:
- description = ""
- article_title = ""
- try:
- # article content
- description = article.content[0].value
- except AttributeError:
- try:
- # article description
- description = article.description
- except Exception:
- description = ""
- try:
- description = BeautifulSoup(description, "html.parser").decode()
- article_title = BeautifulSoup(article.title, "html.parser").decode()
- except Exception as E:
- pyaggr3g470r_log.error("Problem when sanitizing the content of the feed: " + feed_link)
- article_title = article.title
-
- try:
- post_date = datetime(*article.published_parsed[:6])
- except:
- post_date = datetime(*article.updated_parsed[:6])
-
- sha1_hash = hashlib.sha1()
- sha1_hash.update(article.link.encode('utf-8'))
- article_id = sha1_hash.hexdigest()
-
- article = {"article_id": article_id, \
- "type":1, \
- "article_date": post_date, \
- "article_link": article.link, \
- "article_title": article_title, \
- "article_content": description, \
- "article_readed": False, \
- "article_like": False \
- }
-
- articles.append(article)
-
- if self.articles.get_articles(feed_id, article_id) == []:
- # add the article to the Whoosh index
- try:
- search.add_to_index([article], feed)
- except:
- print("Whoosh error.")
- pyaggr3g470r_log.error("Whoosh error.")
- continue
-
- if conf.MAIL_ENABLED and feed["mail"]:
- # if subscribed to the feed
- threading.Thread(None, utils.send_mail, None, (conf.mail_from, conf.mail_to, \
- a_feed.feed.title, \
- article_title, description)).start()
- self.articles.add_articles(articles, feed_id)
-
-
-if __name__ == "__main__":
- # Point of entry in execution mode
- feed_getter = FeedGetter()
- # Retrieve all feeds
- feed_getter.retrieve_feed()
-
- # If you want to get all articles of a blog:
- """
- for i in range(1,86):
- feed_original = "http://esr.ibiblio.org/?feed=rss2"
- feed = feed_original + "&paged=" + str(i)
- print("Retrieving", feed, "...")
- feed_getter.retrieve_feed(feed, feed_original)
- """
- """
- for i in range(1,5):
- feed_original = "http://spaf.wordpress.com/feed/"
- feed = feed_original + "?paged=" + str(i)
- print("Retrieving", feed, "...")
- feed_getter.retrieve_feed(feed, feed_original)
- """
-
- # For a blogspot blog:
- #feed_getter.retrieve_feed("http://www.blogger.com/feeds/4195135246107166251/posts/default", "http://neopythonic.blogspot.com/feeds/posts/default")
- #feed_getter.retrieve_feed("http://www.blogger.com/feeds/8699431508730375743/posts/default", "http://python-history.blogspot.com/feeds/posts/default") \ No newline at end of file
diff --git a/source/log.py b/source/log.py
deleted file mode 100755
index 5db5d838..00000000
--- a/source/log.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/
-#
-# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.1 $"
-__date__ = "$Date: 2012/10/12 $"
-__revision__ = "$Date: 2012/10/12 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "GPLv3"
-
-class Log(object):
- """
- Log events. Especially events relative to authentication.
- """
- def __init__(self):
- """
- Initialization of the logger.
- """
- import logging
- self.logger = logging.getLogger("pyaggr3g470r")
- hdlr = logging.FileHandler('./var/pyaggr3g470r.log')
- formater = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
- hdlr.setFormatter(formater)
- self.logger.addHandler(hdlr)
- self.logger.setLevel(logging.INFO)
-
- def info(self, message):
- """
- Log notices.
- """
- self.logger.info(message)
-
- def warning(self, message):
- """
- Log warnings.
- """
- self.logger.warning(message)
-
- def error(self, message):
- """
- Log errors.
- """
- self.logger.warning(message)
-
- def critical(self, message):
- """
- Log critical errors.
- """
- self.logger.critical(message)
diff --git a/source/mongodb.py b/source/mongodb.py
deleted file mode 100644
index 04cd44fa..00000000
--- a/source/mongodb.py
+++ /dev/null
@@ -1,283 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/
-#
-# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.8 $"
-__date__ = "$Date: 2012/03/03 $"
-__revision__ = "$Date: 2013/06/25 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "GPLv3"
-
-import pymongo
-
-class Articles(object):
- """
- This class is responsible of the management of the MongoDB
- database.
- """
- def __init__(self, url='localhost', port=27017, db_name="pyaggr3g470r", user="", password=""):
- """
- Instantiates the connection.
- """
- self.db_name = db_name
- self.connection = pymongo.connection.Connection(url, port)
- self.db = pymongo.database.Database(self.connection, self.db_name)
- if password != "":
- self.db.authenticate(user, password)
- collections = self.db.collection_names()
- for collection_name in collections:
- if collection_name != "system.indexes":
- self.db[collection_name].ensure_index([("article_date", pymongo.DESCENDING)], \
- name="date_index", unique=False, \
- background=True)
-
- def add_collection(self, new_collection):
- """
- Creates a new collection for a new feed.
- """
- collection = self.db[new_collection["feed_id"]]
- collection.insert(new_collection)
-
- def add_articles(self, articles, feed_id):
- """
- Add article(s) in a collection.
- """
- collection = self.db[str(feed_id)]
- for article in articles:
- cursor = collection.find({"article_id":article["article_id"]})
- if cursor.count() == 0:
- collection.insert(article)
-
- def delete_feed(self, feed_id):
- """
- Delete a collection (feed with all articles).
- """
- self.db.drop_collection(feed_id)
-
- def delete_article(self, feed_id, article_id):
- """
- Delete an article.
- """
- collection = self.db[str(feed_id)]
- collection.remove(spec_or_id={"article_id":article_id}, safe=True)
-
- def get_feed(self, feed_id):
- """
- Return information about a feed (collection).
- Return None if the collection does not exist.
- """
- try:
- return next(self.db[str(feed_id)].find())
- except:
- return None
-
- def get_all_feeds(self, condition=None):
- """
- Return all feeds object. The returned list
- is sorted by alphabetically (by feed name).
- """
- feeds = []
- collections = self.db.collection_names()
- for collection_name in collections:
- if collection_name != "system.indexes":
- if condition is None:
- cursor = self.db[collection_name].find({"type":0})
- else:
- cursor = self.db[collection_name].find({"type":0, condition[0]:condition[1]})
- if cursor.count() != 0:
- feeds.append(next(cursor))
- feeds.sort(key = lambda elem: elem['feed_title'].lower())
- return feeds
-
- def get_articles(self, feed_id=None, article_id=None, condition=None, limit=1000000000):
- """
- Return one or several articles.
- The parameter "condition" is an optional requirement, for example:
- get_articles(feed_id, condition=("article_readed", False)) will
- return all unread articles of the feed 'feed_id'.
- """
- if feed_id == None and article_id == None:
- # Return all articles.
- articles = []
- collections = self.db.collection_names()
- for collection_name in collections:
- collection = self.db[collection_name]
- if condition is None:
- articles.extend(collection.find({"type":1}, limit=limit))
- else:
- articles.extend(collection.find({"type":1, condition[0]:condition[1]}, limit=limit))
- return articles
-
- elif feed_id != None and article_id == None:
- # Return all the articles of a collection.
- collection = self.db[str(feed_id)]
- if condition is None:
- cursor = collection.find({"type":1}, limit=limit)
- else:
- cursor = collection.find({"type":1, condition[0]:condition[1]}, limit=limit)
- return cursor.sort([("article_date", pymongo.DESCENDING)])
-
- elif feed_id != None and article_id != None:
- # Return a precise article.
- collection = self.db[str(feed_id)]
- try:
- return next(collection.find({"article_id":article_id}))
- except:
- return []
-
- def get_favorites(self, feed_id=None):
- """
- Return favorites articles.
- """
- if feed_id is not None:
- # only for a feed
- collection = self.db[feed_id]
- cursor = collection.find({'type':1, 'article_like':True})
- return cursor.sort([("article_date", pymongo.DESCENDING)])
- else:
- favorites = []
- for feed_id in self.db.collection_names():
- favorites += self.get_favorites(feed_id)
- return favorites
-
- def nb_articles(self, feed_id=None):
- """
- Return the number of articles of a feed
- or of all the database.
- """
- if feed_id is not None:
- collection = self.db[feed_id]
- cursor = collection.find({'type':1})
- return cursor.count()
- else:
- nb_articles = 0
- for feed_id in self.db.collection_names():
- nb_articles += self.nb_articles(feed_id)
- return nb_articles
-
- def nb_unread_articles(self, feed_id=None):
- """
- Return the number of unread articles of a feed
- or of all the database.
- """
- if feed_id is not None:
- return self.get_articles(feed_id=feed_id, condition=("article_readed", False)).count()
- else:
- return len(self.get_articles(condition=("article_readed", False)))
-
- def like_article(self, like, feed_id, article_id):
- """
- Like or unlike an article.
- """
- collection = self.db[str(feed_id)]
- collection.update({"article_id": article_id}, {"$set": {"article_like": like}})
-
- def nb_favorites(self, feed_id=None):
- """
- Return the number of favorites articles of a feed
- or of all the database.
- """
- if feed_id is not None:
- return self.get_favorites(feed_id).count()
- else:
- return len(self.get_favorites())
-
- def nb_mail_notifications(self):
- """
- Return the number of subscribed feeds.
- """
- nb_mail_notifications = 0
- for feed_id in self.db.collection_names():
- collection = self.db[feed_id]
- cursor = collection.find({'type':0, 'mail':True})
- nb_mail_notifications += cursor.count()
- return nb_mail_notifications
-
- def mark_as_read(self, readed, feed_id=None, article_id=None):
- """
- Mark one or several articles as read.
- """
- if feed_id != None and article_id != None:
- collection = self.db[str(feed_id)]
- collection.update({"article_id": article_id, "article_readed":not readed}, {"$set": {"article_readed": readed}})
- elif feed_id != None and article_id == None:
- collection = self.db[str(feed_id)]
- collection.update({"type": 1, "article_readed":not readed}, {"$set": {"article_readed": readed}}, multi=True)
- else:
- for feed_id in self.db.collection_names():
- self.mark_as_read(readed, feed_id, None)
-
- def update_feed(self, feed_id, changes):
- """
- Update a feed.
- """
- collection = self.db[str(feed_id)]
- collection.update({"type": 0, "feed_id":feed_id}, {"$set": changes}, multi=True)
- if "feed_id" in changes.keys():
- self.db[str(feed_id)].rename(str(changes["feed_id"]))
-
- # Functions on database
- def drop_database(self):
- """
- Drop all the database
- """
- self.connection.drop_database(self.db_name)
-
-
-if __name__ == "__main__":
- # Point of entry in execution mode.
- articles = Articles()
- # Create a collection for a stream
- collection_dic = {"collection_id": 42,\
- "feed_image": "Image", \
- "feed_title": "Title", \
- "feed_link": "Link", \
- "site_title": "Site link", \
- "mail": True, \
- }
- #articles.add_collection(collection_dic)
-
- # Add an article in the newly created collection
- article_dic1 = {"article_id": 51, \
- "article_date": "Today", \
- "article_link": "Link of the article", \
- "article_title": "The title", \
- "article_content": "The content of the article", \
- "article_readed": True, \
- "article_like": True \
- }
- article_dic2 = {"article_id": 52, \
- "article_date": "Yesterday", \
- "article_link": "Link", \
- "article_title": "Hello", \
- "article_content": "The content of the article", \
- "article_readed": True, \
- "article_like": True \
- }
-
- #articles.add_articles([article_dic1, article_dic2], 42)
-
- print("All articles:")
- #print articles.get_all_articles()
-
-
- # Drop the database
- #articles.drop_database()
diff --git a/source/pyAggr3g470r b/source/pyAggr3g470r
deleted file mode 100755
index 3755ad16..00000000
--- a/source/pyAggr3g470r
+++ /dev/null
@@ -1,143 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2012 Cédric Bonhomme - http://cedricbonhomme.org/
-#
-# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.2 $"
-__date__ = "$Date: 2011/06/20 $"
-__date__ = "$Date: 2013/02/17 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "GPLv3"
-
-# This control file is inspired from Forban: http://www.foo.be/forban.
-
-import os
-import sys
-import time
-import subprocess
-import platform
-import signal
-
-PATH = os.path.abspath(".")
-SERVICE = "pyAggr3g470r"
-
-def service_start(python_command, servicename=None):
- """
- Starts a new service with Popen and returns the processus id.
- """
- if servicename is not None:
- service = servicename + ".py"
- proc = subprocess.Popen([python_command, "-tt", service], stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
- time.sleep(0.15)
- return proc.pid
- return False
-
-def writepid(processname=None, pid=None):
- """
- Writes the pid of processname in a file.
- """
- pidpath = os.path.join(PATH, "var", processname + ".pid")
- if processname is not None and pid is not None:
- with open(pidpath, "w") as f:
- f.write(str(pid))
- return True
- return False
-
-def checkpid(servicename=None):
- pidpath = os.path.join(PATH,"var", servicename + ".pid")
- if os.path.exists(pidpath):
- return True
- else:
- return False
-
-def pidof(processname=None):
- pidpath = os.path.join(PATH, "var", processname + ".pid")
- if processname is not None and os.path.exists(pidpath):
- with open(pidpath) as f:
- pid = f.read()
- return pid
- return False
-
-def rmpid(processname=None):
- """
- Deletes the file which contains the PID.
- """
- pidpath = os.path.join(PATH, "var", processname + ".pid")
- if os.path.exists(pidpath):
- os.unlink(pidpath)
- return True
- else:
- return False
-
-def start(python_command):
- if not checkpid(servicename=SERVICE):
- pid = service_start(python_command, servicename =SERVICE)
- writepid(processname=SERVICE, pid=pid)
- print(SERVICE + " is starting with pid: " + pidof(processname=SERVICE))
- else:
- print(SERVICE + " could not be started (pid exists)")
- retval = False
-
-def stop():
- """
- Stop the process SERVICE.
- """
- print("Stopping " + SERVICE + "...")
- retval = True
- pid = pidof(processname=SERVICE)
- if pid:
- if platform.system() == "Windows":
- import win32api
- import win32con
- phandle = win32api.OpenProcess(win32con.PROCESS_TERMINATE, 0, int(pid))
- win32api.TerminateProcess(phandle, 0)
- win32api.CloseHandle(phandle)
- rmpid(processname=SERVICE)
- else:
- try:
- os.kill(int(pid), signal.SIGKILL)
- except OSError as e:
- print(SERVICE + " unsuccessfully stopped")
- retval = False
- finally:
- rmpid(processname=SERVICE)
- return retval
-
-def usage():
- print("pyAggr3g470r (start|stop|restart)")
- exit (1)
-
-if __name__ == "__main__":
- # Point of entry in execution mode.
- python_command = "python"
- if sys.version_info.major == 2:
- # Ensures that code doesn't illegally mixed tabs and spaces
- python_command = "python3.3"
- if len(sys.argv) == 1:
- usage()
- elif sys.argv[1] == "start":
- start(python_command)
- elif sys.argv[1] == "stop":
- stop()
- elif sys.argv[1] == "restart":
- stop()
- start(python_command)
- else:
- usage()
diff --git a/source/pyAggr3g470r.py b/source/pyAggr3g470r.py
deleted file mode 100755
index 922e7114..00000000
--- a/source/pyAggr3g470r.py
+++ /dev/null
@@ -1,715 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/
-#
-# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 4.1 $"
-__date__ = "$Date: 2010/01/29 $"
-__revision__ = "$Date: 2013/09/09 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "GPLv3"
-
-#
-# This file contains the "Root" class which describes
-# all pages (views) of pyAggr3g470r. These pages are:
-# - main page;
-# - management;
-# - history;
-# - favorites;
-# - notifications;
-# - unread;
-# - feed summary;
-# - inactives;
-# - languages.
-# Templates are described in ./templates with the Mako
-# template library.
-#
-
-import os
-import re
-import time
-import datetime
-
-from collections import defaultdict
-from whoosh.index import EmptyIndexError
-
-import cherrypy
-from mako.template import Template
-from mako.lookup import TemplateLookup
-lookup = TemplateLookup(directories=['templates'])
-
-import conf
-import utils
-import export
-import mongodb
-import search
-import feedgetter
-import auth
-
-def error_404(status, message, traceback, version):
- """
- Display an error if the page does not exist.
- """
- message = "<p>Error %s - This page does not exist.</p>" % status
- tmpl = lookup.get_template("error.html")
- return tmpl.render(message=message)
-
-def handle_error():
- """
- Handle different type of errors.
- """
- message = "<p>Sorry, an error occured.</p>"
- cherrypy.response.status = 500
- cherrypy.response.body = [message]
-
-class RestrictedArea(object):
- """
- All methods in this controller (and subcontrollers) is
- open only to members of the admin group
- """
- _cp_config = {
- 'auth.auth.require': [auth.member_of('admin')]
- }
-
- @cherrypy.expose
- def index(self):
- message = "<p>This is the admin only area.</p>"
- tmpl = lookup.get_template("error.html")
- return tmpl.render(message=message)
-
-class pyAggr3g470r(object):
- """
- Main class.
- All pages of pyAggr3g470r are described in this class.
- """
- _cp_config = {'request.error_response': handle_error, \
- 'tools.sessions.on': True, \
- 'tools.auth.on': True}
-
- def __init__(self):
- """
- """
- self.auth = auth.AuthController()
- restricted = RestrictedArea()
-
- self.mongo = mongodb.Articles(conf.MONGODB_ADDRESS, conf.MONGODB_PORT, \
- conf.MONGODB_DBNAME, conf.MONGODB_USER, conf.MONGODB_PASSWORD)
- @auth.require()
- def index(self):
- """
- Main page containing the list of feeds and articles.
- """
- feeds = self.mongo.get_all_feeds()
- nb_unread_articles = self.mongo.nb_unread_articles()
- nb_favorites = self.mongo.nb_favorites()
- nb_mail_notifications = self.mongo.nb_mail_notifications()
- tmpl = lookup.get_template("index.html")
- return tmpl.render(feeds=feeds, nb_feeds=len(feeds), mongo=self.mongo, \
- nb_favorites=nb_favorites, nb_unread_articles=nb_unread_articles, \
- nb_mail_notifications=nb_mail_notifications, header_text=nb_unread_articles)
-
- index.exposed = True
-
- @auth.require()
- def management(self):
- """
- Management page.
- Allows adding and deleting feeds. Export functions of the MongoDB data base
- and display some statistics.
- """
- feeds = self.mongo.get_all_feeds()
- nb_mail_notifications = self.mongo.nb_mail_notifications()
- nb_favorites = self.mongo.nb_favorites()
- nb_articles = format(self.mongo.nb_articles(), ",d")
- nb_unread_articles = format(self.mongo.nb_unread_articles(), ",d")
- nb_indexed_documents = format(search.nb_documents(), ",d")
- tmpl = lookup.get_template("management.html")
- return tmpl.render(feeds=feeds, nb_mail_notifications=nb_mail_notifications, \
- nb_favorites=nb_favorites, nb_articles=nb_articles, \
- nb_unread_articles=nb_unread_articles, \
- mail_notification_enabled=conf.MAIL_ENABLED, \
- nb_indexed_documents=nb_indexed_documents)
-
- management.exposed = True
-
- @auth.require()
- def statistics(self, word_size=6):
- """
- More advanced statistics.
- """
- articles = self.mongo.get_articles()
- top_words = utils.top_words(articles, n=50, size=int(word_size))
- tag_cloud = utils.tag_cloud(top_words)
- tmpl = lookup.get_template("statistics.html")
- return tmpl.render(articles=articles, word_size=word_size, tag_cloud=tag_cloud)
-
- statistics.exposed = True
-
- @auth.require()
- def search(self, query=None):
- """
- Simply search for the string 'query'
- in the description of the article.
- """
- param, _, value = query.partition(':')
- feed_id = None
- if param == "Feed":
- feed_id, _, query = value.partition(':')
- search_result = defaultdict(list)
- try:
- results = search.search(param)
- except EmptyIndexError as e:
- return self.error('<p>The database has not been <a href="/index_base">indexed</a>.</p>')
- for result in results:
- article = self.mongo.get_articles(result[0], result[1])
- if article != []:
- search_result[result[0]].append(article)
- sorted_search_result = {feed_id: sorted(articles, key=lambda t: t['article_date'], reverse=True) \
- for feed_id, articles in search_result.items()}
- tmpl = lookup.get_template("search.html")
- return tmpl.render(search_result=sorted_search_result, query=query, feed_id=feed_id, mongo=self.mongo)
-
- search.exposed = True
-
- @auth.require()
- def fetch(self, param=None):
- """
- Fetch all feeds.
- """
- feed_link = None
- if None != param:
- # Fetch only the feed specified in parameter
- feed_link = self.mongo.get_feed(param)["feed_link"]
- feed_getter = feedgetter.FeedGetter()
- feed_getter.retrieve_feed(feed_url=feed_link)
- return self.index()
-
- fetch.exposed = True
-
- @auth.require()
- def article(self, param, plain_text=0):
- """
- Display the article in parameter in a new Web page.
- """
- try:
- feed_id, article_id = param.split(':')
- article = self.mongo.get_articles(feed_id, article_id)
- if article == []:
- return self.error("<p>This article do not exists.</p>")
- feed = self.mongo.get_feed(feed_id)
- articles = self.mongo.get_articles(feed_id)
- except:
- return self.error("<p>Bad URL. This article do not exists.</p>")
-
- if article["article_readed"] == False:
- # if the current article is not yet readed, update the database
- self.mark_as_read("Article:"+article["article_id"]+":"+feed["feed_id"])
-
- # Description (full content) of the article
- if plain_text == "1":
- description = "<p>" + utils.clear_string(article["article_content"]) + "</p>"
- else:
- description = article["article_content"]
- if description == "":
- description = "<p>No description available.</p>"
-
- # Generation of the QR Code for the current article
- utils.generate_qr_code(article)
-
- # Previous and following articles
- previous, following = None, None
- liste = self.mongo.get_articles(feed_id)
- for current_article in self.mongo.get_articles(feed_id):
- next(articles)
- if current_article["article_id"] == article_id:
- break
- following = current_article
- if following is None:
- following = liste[liste.count()-1]
- try:
- previous = next(articles)
- except StopIteration:
- previous = liste[0]
-
- tmpl = lookup.get_template("article.html")
- return tmpl.render(header_text=article["article_title"], article=article, previous=previous, following=following, \
- diaspora=conf.DIASPORA_POD, feed=feed, description=description, plain_text=plain_text)
-
- article.exposed = True
-
- @auth.require()
- def feed(self, feed_id, word_size=6):
- """
- This page gives summary informations about a feed (number of articles,
- unread articles, average activity, tag cloud, e-mail notification and
- favourite articles for the current feed.
- """
- try:
- feed = self.mongo.get_feed(feed_id)
- if feed == None:
- return self.error("<p>This feed do not exists.</p>")
- articles = self.mongo.get_articles(feed_id, limit=10)
- nb_articles_feed = self.mongo.nb_articles(feed_id)
- nb_articles_total = self.mongo.nb_articles()
- nb_unread_articles_feed = self.mongo.nb_unread_articles(feed_id)
- favorites = self.mongo.get_favorites(feed_id)
- nb_favorites = self.mongo.nb_favorites(feed_id)
- except KeyError:
- return self.error("<p>This feed do not exists.</p>")
-
-
- if articles.count() != 0:
- today = datetime.datetime.now()
- last_article = articles[0]["article_date"]
- first_article = articles[self.mongo.nb_articles(feed_id)-2]["article_date"]
- delta = last_article - first_article
- elapsed = today - last_article
- average = round(nb_articles_feed / abs(delta.days), 2)
-
- top_words = utils.top_words(articles = self.mongo.get_articles(feed_id), n=50, size=int(word_size))
- tag_cloud = utils.tag_cloud(top_words)
-
- tmpl = lookup.get_template("feed.html")
- return tmpl.render(feed=feed, articles=articles, favorites=favorites, \
- nb_articles_feed=nb_articles_feed, nb_articles_total=nb_articles_total, nb_unread_articles_feed=nb_unread_articles_feed, \
- nb_favorites = nb_favorites, first_post_date=first_article, end_post_date=last_article, \
- average=average, delta=delta, elapsed=elapsed, \
- tag_cloud=tag_cloud, word_size=word_size, \
- mail_to=conf.mail_to, mail_notification_enabled=conf.MAIL_ENABLED)
-
- tmpl = lookup.get_template("feed.html")
- return tmpl.render(feed=feed, articles=[])
-
- feed.exposed = True
-
- @auth.require()
- def articles(self, feed_id):
- """
- This page displays all articles of a feed.
- """
- try:
- feed = self.mongo.get_feed(feed_id)
- articles = self.mongo.get_articles(feed_id)
- except KeyError:
- return self.error("<p>This feed do not exists.</p>")
- tmpl = lookup.get_template("articles.html")
- return tmpl.render(articles=articles, feed=feed)
-
- articles.exposed = True
-
- @auth.require()
- def unread(self, feed_id=""):
- """
- This page displays all unread articles of a feed.
- """
- feeds = self.mongo.get_all_feeds()
- tmpl = lookup.get_template("unread.html")
- return tmpl.render(feeds=feeds, feed_id=feed_id, mongo=self.mongo)
- unread.exposed = True
-
- @auth.require()
- def history(self, query="all", m=""):
- """
- This page enables to browse articles chronologically.
- """
- feeds = self.mongo.get_all_feeds()
- tmpl = lookup.get_template("history.html")
- return tmpl.render(feeds=feeds, mongo=self.mongo, query=query, m=m)
-
- history.exposed = True
-
- @auth.require()
- def error(self, message):
- """
- Display a message (bad feed id, bad article id, etc.)
- """
- tmpl = lookup.get_template("error.html")
- return tmpl.render(message=message)
-
- error.exposed = True
-
- @auth.require()
- def mark_as_read(self, target=""):
- """
- Mark one (or more) article(s) as read by setting the value of the field
- 'article_readed' of the MongoDB database to 'True'.
- """
- param, _, identifiant = target.partition(':')
-
- # Mark all articles as read.
- if param == "":
- self.mongo.mark_as_read(True, None, None)
- # Mark all articles from a feed as read.
- elif param == "Feed" or param == "Feed_FromMainPage":
- self.mongo.mark_as_read(True, identifiant, None)
- # Mark an article as read.
- elif param == "Article":
- self.mongo.mark_as_read(True, identifiant.split(':')[1], identifiant.split(':')[0])
- return self.index()
-
- mark_as_read.exposed = True
-
- @auth.require()
- def notifications(self):
- """
- List all active e-mail notifications.
- """
- feeds = self.mongo.get_all_feeds(condition=("mail",True))
- tmpl = lookup.get_template("notifications.html")
- return tmpl.render(feeds=feeds, mail_to=conf.mail_to, mail_notification_enabled=conf.MAIL_ENABLED)
-
- notifications.exposed = True
-
- @auth.require()
- def mail_notification(self, param):
- """
- Enable or disable to notifications of news for a feed.
- """
- try:
- action, feed_id = param.split(':')
- new_value = 1 == int(action)
- self.mongo.update_feed(feed_id, {"mail":new_value})
- except:
- return self.error("<p>Bad URL. This feed do not exists.</p>")
- return self.index()
-
- mail_notification.exposed = True
-
- @auth.require()
- def like(self, param):
- """
- Mark or unmark an article as favorites.
- """
- try:
- like, feed_id, article_id = param.split(':')
- articles = self.mongo.get_articles(feed_id, article_id)
- except:
- return self.error("<p>Bad URL. This article do not exists.</p>")
- self.mongo.like_article("1"==like, feed_id, article_id)
- return self.article(feed_id+":"+article_id)
-
- like.exposed = True
-
- @auth.require()
- def subscriptions(self):
- """
- List all active e-mail notifications.
- """
- feeds = self.mongo.get_all_feeds()
- tmpl = lookup.get_template("subscriptions.html")
- return tmpl.render(feeds=feeds)
-
- subscriptions.exposed = True
-
- @auth.require()
- def favorites(self):
- """
- List of favorites articles
- """
- feeds = self.mongo.get_all_feeds()
- articles = {}
- for feed in feeds:
- articles[feed["feed_id"]] = self.mongo.get_favorites(feed["feed_id"])
- tmpl = lookup.get_template("favorites.html")
- return tmpl.render(feeds=feeds, \
- articles=articles)
-
- favorites.exposed = True
-
- @auth.require()
- def inactives(self, nb_days=365):
- """
- List of favorites articles
- """
- feeds = self.mongo.get_all_feeds()
- today = datetime.datetime.now()
- inactives = []
- for feed in feeds:
- more_recent_article = self.mongo.get_articles(feed["feed_id"], limit=1)
- if more_recent_article.count() == 0:
- last_post = datetime.datetime.fromtimestamp(time.mktime(time.gmtime(0)))
- else:
- last_post = next(more_recent_article)["article_date"]
- elapsed = today - last_post
- if elapsed > datetime.timedelta(days=int(nb_days)):
- inactives.append((feed, elapsed))
- tmpl = lookup.get_template("inactives.html")
- return tmpl.render(inactives=inactives, nb_days=int(nb_days))
-
- inactives.exposed = True
-
- @auth.require()
- def languages(self):
- """
- Filter by languages.
- """
- try:
- from guess_language import guess_language_name
- except:
- tmpl = lookup.get_template("error.html")
- return tmpl.render(message='<p>Module <i><a href="https://bitbucket.org/spirit/guess_language/">guess_language</a></i> not installed.</p>')
- result = {}
- feeds = self.mongo.get_all_feeds()
- for feed in feeds:
- for article in self.mongo.get_articles(feed["feed_id"]):
- language = guess_language_name(utils.clear_string(article["article_content"]))
- result.setdefault(language, defaultdict(list))
- result[language][feed["feed_id"]].append(article)
- tmpl = lookup.get_template("languages.html")
- return tmpl.render(articles_sorted_by_languages=result, mongo=self.mongo)
-
- languages.exposed = True
-
- @auth.require()
- def add_feed(self, url):
- """
- Add a new feed with the URL of a page.
- """
- # search the feed in the HTML page with BeautifulSoup
- feed_url = utils.search_feed(url)
- if feed_url is None:
- return self.error("<p>Impossible to find a feed at this URL.</p>")
- # if a feed exists
- else:
- result = utils.add_feed(feed_url)
- # if the feed is not in the file feed.lst
- import hashlib
- sha1_hash = hashlib.sha1()
- sha1_hash.update(feed_url.encode('utf-8'))
- feed_id = sha1_hash.hexdigest()
- if result is False:
- message = """<p>You are already following <a href="/feed/%s">this feed</a>!</p>""" % (feed_id,)
- else:
- message = """<p><a href="/feed/%s">Feed added</a>. You can now <a href="/fetch/">fetch your feeds</a>.</p>""" % (feed_id,)
- tmpl = lookup.get_template("confirmation.html")
- return tmpl.render(message=message)
-
- add_feed.exposed = True
-
- @auth.require()
- def remove_feed(self, feed_id):
- """
- Remove a feed from the file feed.lst and from the MongoDB database.
- """
- feed = self.mongo.get_feed(feed_id)
- self.mongo.delete_feed(feed_id)
- utils.remove_feed(feed["feed_link"])
- message = """<p>All articles from the feed <i>%s</i> are now removed from the base.</p>""" % (feed["feed_title"],)
- tmpl = lookup.get_template("confirmation.html")
- return tmpl.render(message=message)
-
- remove_feed.exposed = True
-
- @auth.require()
- def change_site_url(self, feed_id, old_site_url, new_site_url):
- """
- Enables to change the URL of a site present in the database.
- """
- try:
- self.mongo.update_feed(feed_id, {"site_link":new_site_url})
- tmpl = lookup.get_template("confirmation.html")
- return tmpl.render(message="<p>The URL of the site has been changed.</p>")
- except:
- return self.error("<p>Error when changing the URL of the site.</p>")
-
- change_site_url.exposed = True
-
- @auth.require()
- def change_feed_url(self, feed_id, old_feed_url, new_feed_url):
- """
- Enables to change the URL of a feed already present in the database.
- """
- import hashlib
- sha1_hash = hashlib.sha1()
- sha1_hash.update(new_feed_url.encode('utf-8'))
- new_feed_id = sha1_hash.hexdigest()
- self.mongo.update_feed(feed_id, {"feed_id":new_feed_id,
- "feed_link":new_feed_url})
- result = utils.change_feed_url(old_feed_url, new_feed_url)
- if result:
- tmpl = lookup.get_template("confirmation.html")
- return tmpl.render(message="<p>The URL of the feed has been changed.</p>")
- else:
- return self.error("<p>Error when changing the URL of the feed.</p>")
-
- change_feed_url.exposed = True
-
- @auth.require()
- def change_feed_name(self, feed_id, new_feed_name):
- """
- Enables to change the name of a feed.
- """
- try:
- self.mongo.update_feed(feed_id, {"feed_title":new_feed_name})
- tmpl = lookup.get_template("confirmation.html")
- return tmpl.render(message="<p>The name of the feed has been changed.</p>")
- except:
- return self.error("<p>Error when changing the name of the feed.</p>")
-
- change_feed_name.exposed = True
-
- @auth.require()
- def change_feed_logo(self, feed_id, new_feed_logo):
- """
- Enables to change the name of a feed.
- """
- try:
- self.mongo.update_feed(feed_id, {"feed_image":new_feed_logo})
- tmpl = lookup.get_template("confirmation.html")
- return tmpl.render(message="<p>The logo of the feed has been changed.</p>")
- except:
- return self.error("<p>Error when changing the logo of the feed.</p>")
-
- change_feed_logo.exposed = True
-
- @auth.require()
- def change_username(self, new_username):
- """
- Enables to change the username of a user.
- """
- result = auth.change_username(self.auth.username, new_username)
- if result:
- self.auth.username = new_username
- tmpl = lookup.get_template("confirmation.html")
- return tmpl.render(message="<p>Your username has been changed.</p>")
- else:
- return self.error("<p>Impossible to change the username.</p>")
-
- change_username.exposed = True
-
- @auth.require()
- def change_password(self, new_password):
- """
- Enables to change the password of a user.
- """
- result = auth.change_password(self.auth.username, new_password)
- if result:
- tmpl = lookup.get_template("confirmation.html")
- return tmpl.render(message="<p>Your password has been changed.</p>")
- else:
- return self.error("<p>Impossible to change the password.</p>")
-
- change_password.exposed = True
-
- @auth.require()
- def delete_article(self, param):
- """
- Delete an article.
- """
- try:
- feed_id, article_id = param.split(':')
- # Delete from the MonfoDB database
- self.mongo.delete_article(feed_id, article_id)
- # Delete from the Whoosh index
- search.delete_article(feed_id, article_id)
- except:
- return self.error("<p>Bad URL. This article do not exists.</p>")
-
- return self.index()
-
- delete_article.exposed = True
-
- @auth.require()
- def logout(self):
- """
- Close the session.
- """
- return self.auth.logout()
-
- logout.exposed = True
-
- @auth.require()
- def drop_base(self):
- """
- Delete all articles.
- """
- self.mongo.drop_database()
- return self.index()
-
- drop_base.exposed = True
-
- @auth.require()
- def index_base(self):
- """
- Launches the indexing of the database.
- """
- search.create_index()
- return self.index()
-
- index_base.exposed = True
-
- @auth.require()
- def export(self, export_method):
- """
- Export articles currently loaded from the MongoDB database with
- the appropriate function of the 'export' module.
- """
- getattr(export, export_method)(self.mongo)
- try:
- getattr(export, export_method)(self.mongo)
- except Exception as e:
- return self.error(e)
- tmpl = lookup.get_template("confirmation.html")
- return tmpl.render(message="<p>Export successfully terminated.<br />Check the folder: <b>" + conf.path + "/var/export/</b>.</p>")
-
- export.exposed = True
-
- @auth.require()
- def epub(self, param):
- """
- Export an article to EPUB.
- """
- try:
- from epub import ez_epub
- except Exception as e:
- return self.error(e)
- try:
- feed_id, article_id = param.split(':')
- except:
- return self.error("Bad URL.")
- try:
- feed_id, article_id = param.split(':')
- feed = self.mongo.get_feed(feed_id)
- articles = self.mongo.get_articles(feed_id)
- article = self.mongo.get_articles(feed_id, article_id)
- except:
- self.error("<p>This article do not exists.</p>")
- try:
- folder = conf.path + "/var/export/epub/"
- os.makedirs(folder)
- except OSError:
- # directories already exists (not a problem)
- pass
- section = ez_epub.Section()
- section.title = article["article_title"]
- section.paragraphs = [utils.clear_string(article["article_content"])]
- ez_epub.makeBook(article["article_title"], [feed["feed_title"]], [section], \
- os.path.normpath(folder) + "article.epub", lang='en-US', cover=None)
- return self.article(param)
-
- epub.exposed = True
-
-
-if __name__ == '__main__':
- # Point of entry in execution mode
- root = pyAggr3g470r()
- root.favicon_ico = cherrypy.tools.staticfile.handler(filename=os.path.join(conf.path + "/static/img/favicon.png"))
- cherrypy.config.update({'error_page.404': error_404})
- cherrypy.quickstart(root, "/" ,config=conf.path + "/cfg/cherrypy.cfg")
diff --git a/source/search.py b/source/search.py
deleted file mode 100644
index a9248a09..00000000
--- a/source/search.py
+++ /dev/null
@@ -1,129 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/
-#
-# For more information : https://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.2 $"
-__date__ = "$Date: 2013/06/24 $"
-__revision__ = "$Date: 2013/06/25 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "GPLv3"
-
-import os
-
-from whoosh.index import create_in, open_dir
-from whoosh.index import EmptyIndexError
-from whoosh.fields import *
-from whoosh.query import *
-from whoosh.qparser import QueryParser
-from whoosh.writing import AsyncWriter
-
-import conf
-import utils
-import mongodb
-
-indexdir = "./var/indexdir"
-
-schema = Schema(title=TEXT(stored=True), \
- content=TEXT, \
- article_id=TEXT(stored=True), \
- feed_id=TEXT(stored=True))
-
-def create_index():
- """
- Creates the index.
- """
- mongo = mongodb.Articles(conf.MONGODB_ADDRESS, conf.MONGODB_PORT, \
- conf.MONGODB_DBNAME, conf.MONGODB_USER, conf.MONGODB_PASSWORD)
- feeds = mongo.get_all_feeds()
- if not os.path.exists(indexdir):
- os.mkdir(indexdir)
- ix = create_in(indexdir, schema)
- writer = ix.writer()
- for feed in feeds:
- for article in mongo.get_articles(feed["feed_id"]):
- writer.add_document(title=article["article_title"], \
- content=utils.clear_string(article["article_content"]), \
- article_id=article["article_id"] , \
- feed_id=feed["feed_id"])
- writer.commit()
-
-def add_to_index(articles, feed):
- """
- Add a list of articles to the index.
- Here an AsyncWriter is used because the function will
- be called in multiple threads by the feedgetter module.
- """
- try:
- ix = open_dir(indexdir)
- except (EmptyIndexError, OSError) as e:
- raise EmptyIndexError
- writer = AsyncWriter(ix)
- for article in articles:
- writer.add_document(title=article["article_title"], \
- content=utils.clear_string(article["article_content"]), \
- article_id=article["article_id"] , \
- feed_id=feed["feed_id"])
- writer.commit()
-
-def delete_article(feed_id, article_id):
- """
- Delete an article from the index.
- """
- try:
- ix = open_dir(indexdir)
- except (EmptyIndexError, OSError) as e:
- raise EmptyIndexError
- writer = ix.writer()
- document = And([Term("feed_id", feed_id), Term("article_id", article_id)])
- writer.delete_by_query(document)
- writer.commit()
-
-def search(term):
- """
- Search for `term` in the index.
- Returns a list of articles.
- """
- try:
- ix = open_dir(indexdir)
- except (EmptyIndexError, OSError) as e:
- raise EmptyIndexError
- with ix.searcher() as searcher:
- query = QueryParser("content", ix.schema).parse(term)
- results = searcher.search(query, limit=None)
- return [(article["feed_id"], article["article_id"]) for article in results]
-
-def nb_documents():
- """
- Return the number of undeleted documents.
- """
- try:
- ix = open_dir(indexdir)
- except (EmptyIndexError, OSError) as e:
- raise EmptyIndexError
- return ix.doc_count()
-
-if __name__ == "__main__":
- # Point of entry in execution mode.
- #create_index()
- print(nb_documents())
- results = search("Nothomb")
- for article in results:
- print(article)
diff --git a/source/testbinarytree.py b/source/testbinarytree.py
deleted file mode 100644
index 84670ca1..00000000
--- a/source/testbinarytree.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import time
-import sys
-import resource
-# Increases Python's recursion limit and the size of the stack.
-resource.setrlimit(resource.RLIMIT_STACK, (2**29,-1))
-sys.setrecursionlimit(10**6)
-
-import mongodb
-import binarytree
-import conf
-
-print("Loading articles from the database...")
-database = mongodb.Articles(conf.MONGODB_ADDRESS, conf.MONGODB_PORT, \
- conf.MONGODB_DBNAME, conf.MONGODB_USER, \
- conf.MONGODB_PASSWORD)
-begin = time.time()
-articles = database.get_articles()
-end = time.time()
-print(("{} articles loaded in {} seconds.".format(len(articles), end-begin)))
-
-print("Generating the binary tree...")
-begin = time.time()
-root = binarytree.Node(articles[0])
-tree = binarytree.OrderedBinaryTree(root)
-# add the root node (first article of the list)
-#root = tree.addNode(articles[0])
-for article in articles[1:]:
- tree.insert(tree.root, article)
-end = time.time()
-print(("Generation done in {0:2f} seconds.".format(end-begin)))
-
-print("Maximum depth of the tree:")
-print(tree.maxDepth(tree.root))
-print("Oldest article:")
-oldest_article = tree.minValue(tree.root)
-print((oldest_article["article_date"].strftime('%Y-%m-%d %H:%M') + \
- " - " + oldest_article["article_title"]))
-print("Newest article:")
-newest_article = tree.maxValue(tree.root)
-print((newest_article["article_date"].strftime('%Y-%m-%d %H:%M') + \
- " - " + newest_article["article_title"]))
-#print(tree) \ No newline at end of file
diff --git a/source/utils.py b/source/utils.py
deleted file mode 100755
index d39e402f..00000000
--- a/source/utils.py
+++ /dev/null
@@ -1,317 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/
-#
-# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 1.5 $"
-__date__ = "$Date: 2010/12/07 $"
-__revision__ = "$Date: 2013/07/24 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "GPLv3"
-
-#
-# This file provides functions used for:
-# - the database management;
-# - generation of tags cloud;
-# - HTML processing;
-# - e-mail notifications.
-#
-
-import os
-import re
-import glob
-import operator
-import calendar
-import html.entities
-
-try:
- from qrcode.pyqrnative.PyQRNative import QRCode, QRErrorCorrectLevel, CodeOverflowException
- from qrcode import qr
-except:
- pass
-
-import smtplib
-from email.mime.multipart import MIMEMultipart
-from email.mime.text import MIMEText
-
-import urllib.request, urllib.error, urllib.parse
-import http.server
-from bs4 import BeautifulSoup
-
-from collections import Counter
-from contextlib import contextmanager
-
-import conf
-
-# regular expression to check URL
-url_finders = [ \
- re.compile("([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}|(((news|telnet|nttp|file|http|ftp|https)://)|(www|ftp)[-A-Za-z0-9]*\\.)[-A-Za-z0-9\\.]+)(:[0-9]*)?/[-A-Za-z0-9_\\$\\.\\+\\!\\*\\(\\),;:@&=\\?/~\\#\\%]*[^]'\\.}>\\),\\\"]"), \
- re.compile("([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}|(((news|telnet|nttp|file|http|ftp|https)://)|(www|ftp)[-A-Za-z0-9]*\\.)[-A-Za-z0-9\\.]+)(:[0-9]*)?"), \
- re.compile("(~/|/|\\./)([-A-Za-z0-9_\\$\\.\\+\\!\\*\\(\\),;:@&=\\?/~\\#\\%]|\\\\)+"), \
- re.compile("'\\<((mailto:)|)[-A-Za-z0-9\\.]+@[-A-Za-z0-9\\.]+") \
-]
-
-import log
-pyaggr3g470r_log = log.Log()
-
-@contextmanager
-def opened_w_error(filename, mode="r"):
- try:
- f = open(filename, mode)
- except IOError as err:
- yield None, err
- else:
- try:
- yield f, None
- finally:
- f.close()
-
-def open_url(url):
- """
- Open an URL with the proxy and the user-agent
- specified in the configuration file.
- """
- if conf.HTTP_PROXY == "":
- proxy = {}
- else:
- proxy = {"http" : conf.HTTP_PROXY}
- opener = urllib.request.FancyURLopener(proxy)
- try:
- opener = urllib.request.build_opener()
- opener.addheaders = [('User-agent', conf.USER_AGENT)]
- return (True, opener.open(url))
- except urllib.error.HTTPError as e:
- # server couldn't fulfill the request
- error = (url, e.code, \
- http.server.BaseHTTPRequestHandler.responses[e.code][1])
- pyaggr3g470r_log.error(url + " " + str(e.code) + " " + \
- http.server.BaseHTTPRequestHandler.responses[e.code][1])
- return (False, error)
- except urllib.error.URLError as e:
- # failed to reach the server
- if type(e.reason) == str:
- error = (url, e.reason, e.reason)
- pyaggr3g470r_log.error(url + " " + e.reason)
- else:
- error = (url, e.reason.errno, e.reason.strerror)
- pyaggr3g470r_log.error(url + " " + str(e.reason.errno) + " " + \
- e.reason.strerror)
- return (False, error)
-
-def generate_qr_code(article):
- """
- Generated a QR Code for the article given in parameter.
- """
- try:
- os.makedirs("./var/qrcode/")
- except OSError:
- pass
- if not os.path.isfile("./var/qrcode/" + article["article_id"] + ".png"):
- # QR Code generation
- try:
- f = qr.QRUrl(url = article["article_link"])
- f.make()
- f.save("./var/qrcode/" + article["article_id"] + ".png")
- except:
- pass
-
-def clear_string(data):
- """
- Clear a string by removing HTML tags, HTML special caracters
- and consecutive white spaces (more that one).
- """
- p = re.compile(b'<[^>]+>') # HTML tags
- q = re.compile(b'\s') # consecutive white spaces
- return p.sub(b'', q.sub(b' ', bytes(data, "utf-8"))).decode("utf-8", "strict")
-
-def normalize_filename(name):
- """
- Normalize a file name.
- """
- file_name = re.sub("[,'!?|&]", "", name)
- file_name = re.sub("[\s.]", "_", file_name)
- file_name = file_name.strip('_')
- file_name = file_name.strip('.')
- return os.path.normpath(file_name)
-
-def load_stop_words():
- """
- Load the stop words and return them in a list.
- """
- stop_words_lists = glob.glob('./var/stop_words/*.txt')
- stop_words = []
-
- for stop_wods_list in stop_words_lists:
- with opened_w_error(stop_wods_list, "r") as (stop_wods_file, err):
- if err:
- stop_words = []
- else:
- stop_words += stop_wods_file.read().split(";")
- return stop_words
-
-def top_words(articles, n=10, size=5):
- """
- Return the n most frequent words in a list.
- """
- stop_words = load_stop_words()
- words = Counter()
- wordre = re.compile(r'\b\w{%s,}\b' % size, re.I)
- for article in articles:
- for word in [elem.lower() for elem in
- wordre.findall(clear_string(article["article_content"])) \
- if elem.lower() not in stop_words]:
- words[word] += 1
- return words.most_common(n)
-
-def tag_cloud(tags, query="word_count"):
- """
- Generates a tags cloud.
- """
- tags.sort(key=operator.itemgetter(0))
- if query == "word_count":
- # tags cloud from the management page
- return ' '.join([('<font size=%d><a href="/search/?query=%s" title="Count: %s">%s</a></font>\n' % \
- (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), word, format(count, ',d'), word)) \
- for (word, count) in tags])
- if query == "year":
- # tags cloud for the history
- return ' '.join([('<font size=%d><a href="/history/?query=%s:%s" title="Count: %s">%s</a></font>\n' % \
- (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), query, word, format(count, ',d'), word)) \
- for (word, count) in tags])
- return ' '.join([('<font size=%d><a href="/history/?query=%s:%s" title="Count: %s">%s</a></font>\n' % \
- (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), query, word, format(count, ',d'), calendar.month_name[int(word)])) \
- for (word, count) in tags])
-
-def send_mail(mfrom, mto, feed_title, article_title, description):
- """
- Send the article via mail.
- """
- # Create the body of the message (a plain-text and an HTML version).
- html = """<html>\n<head>\n<title>%s</title>\n</head>\n<body>\n%s\n</body>\n</html>""" % \
- (feed_title + ": " + article_title, description)
- text = clear_string(description)
-
- # Create message container - the correct MIME type is multipart/alternative.
- msg = MIMEMultipart('alternative')
- msg['Subject'] = '[pyAggr3g470r] ' + feed_title + ": " + article_title
- msg['From'] = mfrom
- msg['To'] = mto
-
- # Record the MIME types of both parts - text/plain and text/html.
- part1 = MIMEText(text, 'plain', 'utf-8')
- part2 = MIMEText(html, 'html', 'utf-8')
-
- # Attach parts into message container.
- # According to RFC 2046, the last part of a multipart message, in this case
- # the HTML message, is best and preferred.
- msg.attach(part1)
- msg.attach(part2)
-
- # Send the message via local SMTP server.
- try:
- s = smtplib.SMTP(conf.smtp_server)
- s.login(conf.username, conf.password)
- except Exception as e:
- print(e)
- else:
- s.send_message(msg)
- s.quit()
-
-def add_feed(feed_url):
- """
- Add the URL feed_url in the file feed.lst.
- """
- with opened_w_error(conf.FEED_LIST, "r") as (f, err):
- if err:
- return False
- else:
- lines = f.readlines()
- lines = list(map(str.strip, lines))
- if feed_url in lines:
- return False
- lines.append(feed_url)
- with open(conf.FEED_LIST, "w") as f:
- f.write("\n".join(lines))
- return True
-
-def change_feed_url(old_feed_url, new_feed_url):
- """
- Change the URL of a feed given in parameter.
- """
- # Replace the URL in the text file
- with opened_w_error(conf.FEED_LIST, "r") as (f, err):
- if err:
- return False
- else:
- lines = f.readlines()
- lines = list(map(str.strip, lines))
- try:
- lines[lines.index(old_feed_url)] = new_feed_url
- except:
- return False
- with opened_w_error(conf.FEED_LIST, "w") as (f, err):
- if err:
- return False
- else:
- f.write("\n".join(lines))
- return True
-
-def remove_feed(feed_url):
- """
- Remove a feed from the file feed.lst and from the database.
- """
- with opened_w_error(conf.FEED_LIST, "r") as (f, err):
- if err:
- return False
- else:
- lines = f.readlines()
- lines = list(map(str.strip, lines))
- try:
- del lines[lines.index(feed_url)]
- except:
- return False
- with opened_w_error(conf.FEED_LIST, "w") as (f, err):
- if err:
- return False
- else:
- f.write("\n".join(lines))
- return True
-
-def search_feed(url):
- """
- Search a feed in a HTML page.
- """
- soup, page = None, None
- try:
- result = open_url(url)
- if result[0] == True:
- page = open_url(url)[1]
- else:
- return None
- soup = BeautifulSoup(page)
- except:
- return None
- feed_links = soup('link', type='application/atom+xml')
- feed_links.extend(soup('link', type='application/rss+xml'))
- for feed_link in feed_links:
- if url not in feed_link['href']:
- return urllib.parse.urljoin(url, feed_link['href'])
- return feed_link['href']
- return None
bgstack15