From 16ec162838eb8ab891f5b04351bb202d84a2b834 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Mon, 9 Jun 2014 14:04:38 +0200 Subject: making pyagregator runnable by apache * adding bootstrap module for basic import * redoing logging (config, proper use of the logging module) * making secret part of config (random wouldn't work with apache since it uses different instances of python) * making server entry point not executing application if just imported * not writing file for opml when we can read it from memory --- pyaggr3g470r/__init__.py | 6 +++-- pyaggr3g470r/crawler.py | 42 ++++++++++++++--------------- pyaggr3g470r/emails.py | 23 +++++++++------- pyaggr3g470r/log.py | 69 ------------------------------------------------ pyaggr3g470r/utils.py | 13 ++++----- pyaggr3g470r/views.py | 11 ++++---- 6 files changed, 51 insertions(+), 113 deletions(-) delete mode 100755 pyaggr3g470r/log.py (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/__init__.py b/pyaggr3g470r/__init__.py index 9dae02cb..5044d344 100644 --- a/pyaggr3g470r/__init__.py +++ b/pyaggr3g470r/__init__.py @@ -12,10 +12,12 @@ import conf # Create Flask application app = Flask(__name__) -app.debug = True +app.debug = conf.WEBSERVER_DEBUG # Create dummy secrey key so we can use sessions -app.config['SECRET_KEY'] = os.urandom(12) +app.config['SECRET_KEY'] = getattr(conf, 'WEBSERVER_SECRET', None) +if not app.config['SECRET_KEY']: + app.config['SECRET_KEY'] = os.urandom(12) app.config['SQLALCHEMY_DATABASE_URI'] = conf.SQLALCHEMY_DATABASE_URI db = SQLAlchemy(app) diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index 8f88e2d5..b505ff2a 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -27,6 +27,7 @@ __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "AGPLv3" import re +import logging import feedparser import urllib2 import requests @@ -34,14 +35,13 @@ import dateutil.parser from bs4 import BeautifulSoup from datetime import datetime from sqlalchemy.exc import IntegrityError -from requests.exceptions import * +#from requests.exceptions import * import gevent.monkey gevent.monkey.patch_all() from gevent import Timeout from gevent.pool import Pool -import log import utils import conf import emails @@ -51,7 +51,7 @@ if not conf.ON_HEROKU: import search as fastsearch -pyaggr3g470r_log = log.Log("feedgetter") +logger = logging.getLogger(__name__) class TooLong(Exception): @@ -59,7 +59,7 @@ class TooLong(Exception): """ Log a when greenlet took to long to fetch a resource. """ - pyaggr3g470r_log.warning("Greenlet took to long") + logger.warning("Greenlet took to long") class FeedGetter(object): @@ -88,7 +88,7 @@ class FeedGetter(object): """ Launch the processus. """ - pyaggr3g470r_log.info("Starting to retrieve feeds.") + logger.info("Starting to retrieve feeds.") # 1 - Get the list of feeds to fetch user = User.query.filter(User.email == self.user.email).first() @@ -113,7 +113,7 @@ class FeedGetter(object): if not conf.ON_HEROKU and conf.MAIL_ENABLED: self.mail_notification(new_articles) - pyaggr3g470r_log.info("All articles retrieved. End of the processus.") + logger.info("All articles retrieved. End of the processus.") def retrieve_async(self, feeds): """ @@ -124,7 +124,7 @@ class FeedGetter(object): """ Fetch a feed. """ - pyaggr3g470r_log.info("Fetching the feed:" + feed.title) + logger.info("Fetching the feed:" + feed.title) a_feed = feedparser.parse(feed.link, handlers=[self.proxy]) if a_feed['entries'] == []: return @@ -153,12 +153,12 @@ class FeedGetter(object): proxies=self.proxies) nice_url = r.url.encode("utf-8") except Timeout: - pyaggr3g470r_log.warning( + logger.warning( "Timeout when getting the real URL of %s.", article.link) continue except Exception as error: - pyaggr3g470r_log.warning( + logger.warning( "Unable to get the real URL of %s. Error: %s", article.link, error) continue @@ -177,7 +177,7 @@ class FeedGetter(object): try: description = BeautifulSoup(description, "lxml").decode() except: - pyaggr3g470r_log.error("Problem when sanitizing the content of the article %s (%s)", + logger.error("Problem when sanitizing the content of the article %s (%s)", article_title, nice_url) post_date = None @@ -219,7 +219,7 @@ class FeedGetter(object): """ Insert articles in the database. """ - pyaggr3g470r_log.info("Database insertion...") + logger.info("Database insertion...") new_articles = [] for feed, articles in elements: @@ -230,8 +230,8 @@ class FeedGetter(object): Article.feed_id == feed.id, Article.link == article.link).first() if exist is not None: - pyaggr3g470r_log.error("Article %s (%s) already in the database." % - (article.title, article.link)) + logger.debug("Article %r (%r) already in the database.", + article.title, article.link) continue if article.date is None: article.date = datetime.now(dateutil.tz.tzlocal()) @@ -242,15 +242,16 @@ class FeedGetter(object): feed.articles.append(article) #db.session.merge(article) db.session.commit() - pyaggr3g470r_log.info("New article %s (%s) added." % (article.title, article.link)) + logger.info("New article %r (%r) added.", + article.title, article.link) except IntegrityError: - pyaggr3g470r_log.error("Article %s (%s) already in the database." % - (article.title, article.link)) + logger.debug("Article %r (%r) already in the database.", + article.title, article.link) articles.remove(article) db.session.rollback() continue except Exception as e: - pyaggr3g470r_log.error("Error when inserting article in database: " + str(e)) + logger.error("Error when inserting article in database: " + str(e)) continue #db.session.close() return new_articles @@ -259,7 +260,7 @@ class FeedGetter(object): """ Index new articles. """ - pyaggr3g470r_log.info("Indexing new articles.") + logger.info("Indexing new articles.") for element in new_articles: article = Article.query.filter(Article.user_id == self.user.id, Article.link == element.link).first() @@ -267,16 +268,15 @@ class FeedGetter(object): fastsearch.add_to_index(self.user.id, [article], article.source) except: - pyaggr3g470r_log.error("Problem during indexation.") + logger.exception("Problem during indexation:") return True def mail_notification(self, new_articles): """ Mail notification. """ - pyaggr3g470r_log.info("Starting mail notification.") + logger.info("Starting mail notification.") for element in new_articles: if element.source.email_notification: emails.new_article_notification(self.user, element.source, element) - return True diff --git a/pyaggr3g470r/emails.py b/pyaggr3g470r/emails.py index 6f424c31..ae73c0e1 100644 --- a/pyaggr3g470r/emails.py +++ b/pyaggr3g470r/emails.py @@ -1,26 +1,27 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- +import logging import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from postmark import PMMail -import log import utils import conf from decorators import async -pyaggr3g470r_log = log.Log("mail") +logger = logging.getLogger(__name__) + @async def send_async_email(mfrom, mto, msg): try: s = smtplib.SMTP(conf.MAIL_HOST) s.login(conf.MAIL_USERNAME, conf.MAIL_PASSWORD) - except Exception as e: - pyaggr3g470r_log.error(str(e)) + except Exception: + logger.exception('send_async_email raised:') else: s.sendmail(mfrom, mto, msg.as_string()) s.quit() @@ -50,12 +51,12 @@ def send_email(mfrom, mto, feed, article): # the HTML message, is best and preferred. msg.attach(part1) msg.attach(part2) - + try: s = smtplib.SMTP(conf.MAIL_HOST) s.login(conf.MAIL_USERNAME, conf.MAIL_PASSWORD) - except Exception as e: - pyaggr3g470r_log.error(str(e)) + except Exception: + logger.exception("send_email raised:") else: s.sendmail(mfrom, mto, msg.as_string()) s.quit() @@ -81,9 +82,10 @@ def send_heroku(user=None, bcc="", subject="", plaintext=""): message.to = user.email message.send() except Exception as e: - pyaggr3g470r_log.error(str(e)) + logger.exception("send_heroku raised:") raise e + def information_message(subject, plaintext): """ Send an information message to the users of the platform. @@ -103,6 +105,7 @@ def information_message(subject, plaintext): else: pass + def new_account_notification(user): """ Account creation notification. @@ -114,6 +117,7 @@ def new_account_notification(user): else: pass + def new_account_activation(user): """ Account activation notification. @@ -125,8 +129,9 @@ def new_account_activation(user): else: pass + def new_article_notification(user, feed, article): if conf.ON_HEROKU: pass else: - send_email(conf.ADMIN_EMAIL, user.email, feed, article) \ No newline at end of file + send_email(conf.ADMIN_EMAIL, user.email, feed, article) diff --git a/pyaggr3g470r/log.py b/pyaggr3g470r/log.py deleted file mode 100755 index 22834e71..00000000 --- a/pyaggr3g470r/log.py +++ /dev/null @@ -1,69 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -# pyAggr3g470r - A Web based news aggregator. -# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/ -# -# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/ -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see - -__author__ = "Cedric Bonhomme" -__version__ = "$Revision: 0.2 $" -__date__ = "$Date: 2012/10/12 $" -__revision__ = "$Date: 2013/12/26 $" -__copyright__ = "Copyright (c) Cedric Bonhomme" -__license__ = "GPLv3" - -import logging - -class Log(object): - """ - Log events. Especially events relative to authentication. - """ - def __init__(self, module_name): - """ - Initialization of the logger. - """ - self.logger = logging.getLogger(module_name) - self.logger.propagate = False - hdlr = logging.FileHandler('./pyaggr3g470r/var/pyaggr3g470r.log') - formater = logging.Formatter('%(asctime)s %(levelname)s %(message)s') - hdlr.setFormatter(formater) - self.logger.addHandler(hdlr) - self.logger.setLevel(logging.INFO) - - def info(self, message): - """ - Log notices. - """ - self.logger.info(message) - - def warning(self, message): - """ - Log warnings. - """ - self.logger.warning(message) - - def error(self, message): - """ - Log errors. - """ - self.logger.warning(message) - - def critical(self, message): - """ - Log critical errors. - """ - self.logger.critical(message) diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py index 320c49ce..4acaafc8 100755 --- a/pyaggr3g470r/utils.py +++ b/pyaggr3g470r/utils.py @@ -38,6 +38,7 @@ import re import glob import opml import json +import logging import datetime import operator import urllib @@ -59,8 +60,7 @@ url_finders = [ re.compile("'\\<((mailto:)|)[-A-Za-z0-9\\.]+@[-A-Za-z0-9\\.]+") \ ] -#import log -#pyaggr3g470r_log = log.Log() +logger = logging.getLogger(__name__) @contextmanager @@ -76,15 +76,16 @@ def opened_w_error(filename, mode="r"): f.close() -def import_opml(email, opml_file): +def import_opml(email, opml_content): """ Import new feeds from an OPML file. """ user = User.query.filter(User.email == email).first() try: - subscriptions = opml.parse(opml_file) - except Exception as e: - raise e + subscriptions = opml.from_string(opml_content) + except: + logger.exception("Parsing OPML file failed:") + raise def read(subsubscription, nb=0): """ diff --git a/pyaggr3g470r/views.py b/pyaggr3g470r/views.py index eda9c399..1dd651e5 100644 --- a/pyaggr3g470r/views.py +++ b/pyaggr3g470r/views.py @@ -152,6 +152,7 @@ def login(): return redirect(url_for('home')) return render_template('login.html', form=form) + @app.route('/logout/') @login_required def logout(): @@ -298,14 +299,14 @@ def article(article_id=None): if not article.readed: article.readed = True db.session.commit() - + previous_article = article.previous_article() if previous_article is None: previous_article = article.source.articles[0] next_article = article.next_article() if next_article is None: next_article = article.source.articles[-1] - + return render_template('article.html', head_title=utils.clear_string(article.title), article=article, previous_article=previous_article, next_article=next_article) @@ -558,12 +559,10 @@ def management(): if not allowed_file(data.filename): flash(gettext('File not allowed.'), 'danger') else: - opml_path = os.path.join("./pyaggr3g470r/var/", data.filename) - data.save(opml_path) try: - nb = utils.import_opml(g.user.email, opml_path) + nb = utils.import_opml(g.user.email, data.read()) flash(str(nb) + ' ' + gettext('feeds imported.'), "success") - except Exception as e: + except: flash(gettext("Impossible to import the new feeds."), "danger") elif None != request.files.get('jsonfile', None): # Import an account -- cgit