diff options
author | Cédric Bonhomme <kimble.mandel+bitbucket@gmail.com> | 2014-06-09 19:05:32 +0200 |
---|---|---|
committer | Cédric Bonhomme <kimble.mandel+bitbucket@gmail.com> | 2014-06-09 19:05:32 +0200 |
commit | 1b73a3a4a73f231f6c1c2a3258e561b1301ca6af (patch) | |
tree | ea21bacea92a0a267f8286eb12f2d81cefa1a3c4 | |
parent | A problem with urllib.quote() has been detected with the url: http://standblo... (diff) | |
parent | fixing parsing for already quoted url as well (diff) | |
download | newspipe-1b73a3a4a73f231f6c1c2a3258e561b1301ca6af.tar.gz newspipe-1b73a3a4a73f231f6c1c2a3258e561b1301ca6af.tar.bz2 newspipe-1b73a3a4a73f231f6c1c2a3258e561b1301ca6af.zip |
Merged in jaesivsm/pyaggr3g470r (pull request #2)
making pyagregator runnable by apache
-rw-r--r-- | bootstrap.py | 22 | ||||
-rw-r--r-- | conf.py | 12 | ||||
-rw-r--r-- | conf/conf.cfg-sample | 6 | ||||
-rwxr-xr-x | db_create.py | 4 | ||||
-rwxr-xr-x | fetch.py | 5 | ||||
-rw-r--r-- | pyaggr3g470r/__init__.py | 6 | ||||
-rw-r--r-- | pyaggr3g470r/crawler.py | 42 | ||||
-rw-r--r-- | pyaggr3g470r/emails.py | 23 | ||||
-rwxr-xr-x | pyaggr3g470r/log.py | 69 | ||||
-rwxr-xr-x | pyaggr3g470r/utils.py | 15 | ||||
-rw-r--r-- | pyaggr3g470r/views.py | 11 | ||||
-rwxr-xr-x | runserver.py | 10 |
12 files changed, 96 insertions, 129 deletions
diff --git a/bootstrap.py b/bootstrap.py new file mode 100644 index 00000000..cadefcca --- /dev/null +++ b/bootstrap.py @@ -0,0 +1,22 @@ +# required imports and code exection for basic functionning + +import sys +if 'threading' in sys.modules: + raise Exception('threading module loaded before patching!') +import gevent.monkey +gevent.monkey.patch_thread() + +import conf +import logging + + +def set_logging(log_path, log_level=logging.INFO, + log_format='%(asctime)s %(levelname)s %(message)s'): + logger = logging.getLogger('pyaggr3g470r') + formater = logging.Formatter(log_format) + handler = logging.FileHandler(log_path) + handler.setFormatter(formater) + logger.addHandler(handler) + logger.setLevel(log_level) + +set_logging(conf.LOG_PATH) @@ -6,7 +6,7 @@ This file contain the variables used by the application. """ -import os, sys +import os basedir = os.path.abspath(os.path.dirname(__file__)) PATH = os.path.abspath(".") @@ -31,11 +31,12 @@ if not ON_HEROKU: import ConfigParser as confparser # load the configuration config = confparser.SafeConfigParser() - config.read("./conf/conf.cfg") + config.read(os.path.join(basedir, "conf/conf.cfg")) PLATFORM_URL = config.get('misc', 'platform_url') RECAPTCHA_PUBLIC_KEY = config.get('misc', 'recaptcha_public_key') RECAPTCHA_PRIVATE_KEY = config.get('misc', 'recaptcha_private_key') + LOG_PATH = config.get('misc', 'log_path') WHOOSH_ENABLED = True @@ -48,6 +49,7 @@ if not ON_HEROKU: WEBSERVER_DEBUG = int(config.get('webserver', 'debug')) == 1 WEBSERVER_HOST = config.get('webserver', 'host') WEBSERVER_PORT = int(config.get('webserver', 'port')) + WEBSERVER_SECRET = config.get('webserver', 'secret') ADMIN_EMAIL = config.get('mail', 'admin_email') MAIL_ENABLED = int(config.get('mail', 'enabled')) == 1 @@ -57,14 +59,15 @@ if not ON_HEROKU: MAIL_SSL = int(config.get('mail', 'ssl')) == 1 MAIL_USERNAME = config.get('mail', 'username') MAIL_PASSWORD = config.get('mail', 'password') - + WEBZINE_ROOT = PATH + "/pyaggr3g470r/var/export/" else: PLATFORM_URL = os.environ.get('PLATFORM_URL', 'https://pyaggr3g470r.herokuapp.com/') RECAPTCHA_PUBLIC_KEY = os.environ.get('RECAPTCHA_PUBLIC_KEY', '') RECAPTCHA_PRIVATE_KEY = os.environ.get('RECAPTCHA_PRIVATE_KEY', '') - + LOG_PATH = os.environ.get('LOG_PATH', 'pyaggr3g470r.log') + SQLALCHEMY_DATABASE_URI = os.environ['DATABASE_URL'] HTTP_PROXY = "" @@ -74,6 +77,7 @@ else: WEBSERVER_DEBUG = False WEBSERVER_HOST = '0.0.0.0' WEBSERVER_PORT = int(os.environ.get('PORT', 5000)) + WEBSERVER_SECRET = os.environ.get('SECRET_KEY', None) MAIL_ENABLED = True ADMIN_EMAIL = os.environ.get('ADMIN_EMAIL', '') diff --git a/conf/conf.cfg-sample b/conf/conf.cfg-sample index a7a3d4cf..8f0b7aec 100644 --- a/conf/conf.cfg-sample +++ b/conf/conf.cfg-sample @@ -1,18 +1,20 @@ [misc] platform_url = https://pyaggr3g470r.herokuapp.com/ -admin_platform_email = +admin_platform_email = recaptcha_public_key = recaptcha_private_key = +log_path = ./pyaggr3g470r/var/pyaggr3g470r.log [database] uri = postgres://cedric:password@127.0.0.1:5432/pyAggr3g470r [feedparser] -http_proxy = +http_proxy = user_agent = pyAggr3g470r (https://bitbucket.org/cedricbonhomme/pyaggr3g470r) resolve_article_url = 0 [webserver] debug = 1 host = 0.0.0.0 port = 5000 +secret = a secret only you know [mail] enabled = 0 admin_email = pyAggr3g470r@no-reply.com diff --git a/db_create.py b/db_create.py index f92108b6..03bbb7f0 100755 --- a/db_create.py +++ b/db_create.py @@ -8,6 +8,8 @@ __revision__ = "$Date: 2014/04/12 $" __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "AGPLv3" +import bootstrap + from pyaggr3g470r import db from pyaggr3g470r.models import User, Role from werkzeug import generate_password_hash @@ -74,4 +76,4 @@ user1 = User(nickname="admin", user1.roles.extend([role_admin, role_user]) db.session.add(user1) -db.session.commit()
\ No newline at end of file +db.session.commit() @@ -7,10 +7,7 @@ # */30 * * * * cd ~/.pyaggr3g470r/ ; python fetch.py # to fetch articles every 30 minutes. import sys -if 'threading' in sys.modules: - raise Exception('threading module loaded before patching!') -import gevent.monkey -gevent.monkey.patch_thread() +import bootstrap from pyaggr3g470r import crawler from pyaggr3g470r.models import User diff --git a/pyaggr3g470r/__init__.py b/pyaggr3g470r/__init__.py index 9dae02cb..5044d344 100644 --- a/pyaggr3g470r/__init__.py +++ b/pyaggr3g470r/__init__.py @@ -12,10 +12,12 @@ import conf # Create Flask application app = Flask(__name__) -app.debug = True +app.debug = conf.WEBSERVER_DEBUG # Create dummy secrey key so we can use sessions -app.config['SECRET_KEY'] = os.urandom(12) +app.config['SECRET_KEY'] = getattr(conf, 'WEBSERVER_SECRET', None) +if not app.config['SECRET_KEY']: + app.config['SECRET_KEY'] = os.urandom(12) app.config['SQLALCHEMY_DATABASE_URI'] = conf.SQLALCHEMY_DATABASE_URI db = SQLAlchemy(app) diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index 8f88e2d5..b505ff2a 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -27,6 +27,7 @@ __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "AGPLv3" import re +import logging import feedparser import urllib2 import requests @@ -34,14 +35,13 @@ import dateutil.parser from bs4 import BeautifulSoup from datetime import datetime from sqlalchemy.exc import IntegrityError -from requests.exceptions import * +#from requests.exceptions import * import gevent.monkey gevent.monkey.patch_all() from gevent import Timeout from gevent.pool import Pool -import log import utils import conf import emails @@ -51,7 +51,7 @@ if not conf.ON_HEROKU: import search as fastsearch -pyaggr3g470r_log = log.Log("feedgetter") +logger = logging.getLogger(__name__) class TooLong(Exception): @@ -59,7 +59,7 @@ class TooLong(Exception): """ Log a when greenlet took to long to fetch a resource. """ - pyaggr3g470r_log.warning("Greenlet took to long") + logger.warning("Greenlet took to long") class FeedGetter(object): @@ -88,7 +88,7 @@ class FeedGetter(object): """ Launch the processus. """ - pyaggr3g470r_log.info("Starting to retrieve feeds.") + logger.info("Starting to retrieve feeds.") # 1 - Get the list of feeds to fetch user = User.query.filter(User.email == self.user.email).first() @@ -113,7 +113,7 @@ class FeedGetter(object): if not conf.ON_HEROKU and conf.MAIL_ENABLED: self.mail_notification(new_articles) - pyaggr3g470r_log.info("All articles retrieved. End of the processus.") + logger.info("All articles retrieved. End of the processus.") def retrieve_async(self, feeds): """ @@ -124,7 +124,7 @@ class FeedGetter(object): """ Fetch a feed. """ - pyaggr3g470r_log.info("Fetching the feed:" + feed.title) + logger.info("Fetching the feed:" + feed.title) a_feed = feedparser.parse(feed.link, handlers=[self.proxy]) if a_feed['entries'] == []: return @@ -153,12 +153,12 @@ class FeedGetter(object): proxies=self.proxies) nice_url = r.url.encode("utf-8") except Timeout: - pyaggr3g470r_log.warning( + logger.warning( "Timeout when getting the real URL of %s.", article.link) continue except Exception as error: - pyaggr3g470r_log.warning( + logger.warning( "Unable to get the real URL of %s. Error: %s", article.link, error) continue @@ -177,7 +177,7 @@ class FeedGetter(object): try: description = BeautifulSoup(description, "lxml").decode() except: - pyaggr3g470r_log.error("Problem when sanitizing the content of the article %s (%s)", + logger.error("Problem when sanitizing the content of the article %s (%s)", article_title, nice_url) post_date = None @@ -219,7 +219,7 @@ class FeedGetter(object): """ Insert articles in the database. """ - pyaggr3g470r_log.info("Database insertion...") + logger.info("Database insertion...") new_articles = [] for feed, articles in elements: @@ -230,8 +230,8 @@ class FeedGetter(object): Article.feed_id == feed.id, Article.link == article.link).first() if exist is not None: - pyaggr3g470r_log.error("Article %s (%s) already in the database." % - (article.title, article.link)) + logger.debug("Article %r (%r) already in the database.", + article.title, article.link) continue if article.date is None: article.date = datetime.now(dateutil.tz.tzlocal()) @@ -242,15 +242,16 @@ class FeedGetter(object): feed.articles.append(article) #db.session.merge(article) db.session.commit() - pyaggr3g470r_log.info("New article %s (%s) added." % (article.title, article.link)) + logger.info("New article %r (%r) added.", + article.title, article.link) except IntegrityError: - pyaggr3g470r_log.error("Article %s (%s) already in the database." % - (article.title, article.link)) + logger.debug("Article %r (%r) already in the database.", + article.title, article.link) articles.remove(article) db.session.rollback() continue except Exception as e: - pyaggr3g470r_log.error("Error when inserting article in database: " + str(e)) + logger.error("Error when inserting article in database: " + str(e)) continue #db.session.close() return new_articles @@ -259,7 +260,7 @@ class FeedGetter(object): """ Index new articles. """ - pyaggr3g470r_log.info("Indexing new articles.") + logger.info("Indexing new articles.") for element in new_articles: article = Article.query.filter(Article.user_id == self.user.id, Article.link == element.link).first() @@ -267,16 +268,15 @@ class FeedGetter(object): fastsearch.add_to_index(self.user.id, [article], article.source) except: - pyaggr3g470r_log.error("Problem during indexation.") + logger.exception("Problem during indexation:") return True def mail_notification(self, new_articles): """ Mail notification. """ - pyaggr3g470r_log.info("Starting mail notification.") + logger.info("Starting mail notification.") for element in new_articles: if element.source.email_notification: emails.new_article_notification(self.user, element.source, element) - return True diff --git a/pyaggr3g470r/emails.py b/pyaggr3g470r/emails.py index 6f424c31..ae73c0e1 100644 --- a/pyaggr3g470r/emails.py +++ b/pyaggr3g470r/emails.py @@ -1,26 +1,27 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- +import logging import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from postmark import PMMail -import log import utils import conf from decorators import async -pyaggr3g470r_log = log.Log("mail") +logger = logging.getLogger(__name__) + @async def send_async_email(mfrom, mto, msg): try: s = smtplib.SMTP(conf.MAIL_HOST) s.login(conf.MAIL_USERNAME, conf.MAIL_PASSWORD) - except Exception as e: - pyaggr3g470r_log.error(str(e)) + except Exception: + logger.exception('send_async_email raised:') else: s.sendmail(mfrom, mto, msg.as_string()) s.quit() @@ -50,12 +51,12 @@ def send_email(mfrom, mto, feed, article): # the HTML message, is best and preferred. msg.attach(part1) msg.attach(part2) - + try: s = smtplib.SMTP(conf.MAIL_HOST) s.login(conf.MAIL_USERNAME, conf.MAIL_PASSWORD) - except Exception as e: - pyaggr3g470r_log.error(str(e)) + except Exception: + logger.exception("send_email raised:") else: s.sendmail(mfrom, mto, msg.as_string()) s.quit() @@ -81,9 +82,10 @@ def send_heroku(user=None, bcc="", subject="", plaintext=""): message.to = user.email message.send() except Exception as e: - pyaggr3g470r_log.error(str(e)) + logger.exception("send_heroku raised:") raise e + def information_message(subject, plaintext): """ Send an information message to the users of the platform. @@ -103,6 +105,7 @@ def information_message(subject, plaintext): else: pass + def new_account_notification(user): """ Account creation notification. @@ -114,6 +117,7 @@ def new_account_notification(user): else: pass + def new_account_activation(user): """ Account activation notification. @@ -125,8 +129,9 @@ def new_account_activation(user): else: pass + def new_article_notification(user, feed, article): if conf.ON_HEROKU: pass else: - send_email(conf.ADMIN_EMAIL, user.email, feed, article)
\ No newline at end of file + send_email(conf.ADMIN_EMAIL, user.email, feed, article) diff --git a/pyaggr3g470r/log.py b/pyaggr3g470r/log.py deleted file mode 100755 index 22834e71..00000000 --- a/pyaggr3g470r/log.py +++ /dev/null @@ -1,69 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -# pyAggr3g470r - A Web based news aggregator. -# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/ -# -# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/ -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/> - -__author__ = "Cedric Bonhomme" -__version__ = "$Revision: 0.2 $" -__date__ = "$Date: 2012/10/12 $" -__revision__ = "$Date: 2013/12/26 $" -__copyright__ = "Copyright (c) Cedric Bonhomme" -__license__ = "GPLv3" - -import logging - -class Log(object): - """ - Log events. Especially events relative to authentication. - """ - def __init__(self, module_name): - """ - Initialization of the logger. - """ - self.logger = logging.getLogger(module_name) - self.logger.propagate = False - hdlr = logging.FileHandler('./pyaggr3g470r/var/pyaggr3g470r.log') - formater = logging.Formatter('%(asctime)s %(levelname)s %(message)s') - hdlr.setFormatter(formater) - self.logger.addHandler(hdlr) - self.logger.setLevel(logging.INFO) - - def info(self, message): - """ - Log notices. - """ - self.logger.info(message) - - def warning(self, message): - """ - Log warnings. - """ - self.logger.warning(message) - - def error(self, message): - """ - Log errors. - """ - self.logger.warning(message) - - def critical(self, message): - """ - Log critical errors. - """ - self.logger.critical(message) diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py index 1880aaa0..50549587 100755 --- a/pyaggr3g470r/utils.py +++ b/pyaggr3g470r/utils.py @@ -38,6 +38,7 @@ import re import glob import opml import json +import logging import datetime import operator import urllib @@ -59,8 +60,7 @@ url_finders = [ re.compile("'\\<((mailto:)|)[-A-Za-z0-9\\.]+@[-A-Za-z0-9\\.]+") \ ] -#import log -#pyaggr3g470r_log = log.Log() +logger = logging.getLogger(__name__) @contextmanager @@ -76,15 +76,16 @@ def opened_w_error(filename, mode="r"): f.close() -def import_opml(email, opml_file): +def import_opml(email, opml_content): """ Import new feeds from an OPML file. """ user = User.query.filter(User.email == email).first() try: - subscriptions = opml.parse(opml_file) - except Exception as e: - raise e + subscriptions = opml.from_string(opml_content) + except: + logger.exception("Parsing OPML file failed:") + raise def read(subsubscription, nb=0): """ @@ -191,7 +192,7 @@ def clean_url(url): return urlunparse([ parsed_url.scheme, parsed_url.netloc, - parsed_url.path, + urllib.quote(urllib.unquote(parsed_url.path)), parsed_url.params, urllib.urlencode(filtered, doseq=True), parsed_url.fragment diff --git a/pyaggr3g470r/views.py b/pyaggr3g470r/views.py index eda9c399..1dd651e5 100644 --- a/pyaggr3g470r/views.py +++ b/pyaggr3g470r/views.py @@ -152,6 +152,7 @@ def login(): return redirect(url_for('home')) return render_template('login.html', form=form) + @app.route('/logout/') @login_required def logout(): @@ -298,14 +299,14 @@ def article(article_id=None): if not article.readed: article.readed = True db.session.commit() - + previous_article = article.previous_article() if previous_article is None: previous_article = article.source.articles[0] next_article = article.next_article() if next_article is None: next_article = article.source.articles[-1] - + return render_template('article.html', head_title=utils.clear_string(article.title), article=article, previous_article=previous_article, next_article=next_article) @@ -558,12 +559,10 @@ def management(): if not allowed_file(data.filename): flash(gettext('File not allowed.'), 'danger') else: - opml_path = os.path.join("./pyaggr3g470r/var/", data.filename) - data.save(opml_path) try: - nb = utils.import_opml(g.user.email, opml_path) + nb = utils.import_opml(g.user.email, data.read()) flash(str(nb) + ' ' + gettext('feeds imported.'), "success") - except Exception as e: + except: flash(gettext("Impossible to import the new feeds."), "danger") elif None != request.files.get('jsonfile', None): # Import an account diff --git a/runserver.py b/runserver.py index 92c02609..b4e138f3 100755 --- a/runserver.py +++ b/runserver.py @@ -19,8 +19,10 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -import conf -from pyaggr3g470r import app +from bootstrap import conf +from pyaggr3g470r import app as application -app.run(host=conf.WEBSERVER_HOST, port=conf.WEBSERVER_PORT, - debug=conf.WEBSERVER_DEBUG) +if __name__ == '__main__': + application.run(host=conf.WEBSERVER_HOST, + port=conf.WEBSERVER_PORT, + debug=conf.WEBSERVER_DEBUG) |