aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <kimble.mandel+bitbucket@gmail.com>2014-06-09 19:05:32 +0200
committerCédric Bonhomme <kimble.mandel+bitbucket@gmail.com>2014-06-09 19:05:32 +0200
commit1b73a3a4a73f231f6c1c2a3258e561b1301ca6af (patch)
treeea21bacea92a0a267f8286eb12f2d81cefa1a3c4
parentA problem with urllib.quote() has been detected with the url: http://standblo... (diff)
parentfixing parsing for already quoted url as well (diff)
downloadnewspipe-1b73a3a4a73f231f6c1c2a3258e561b1301ca6af.tar.gz
newspipe-1b73a3a4a73f231f6c1c2a3258e561b1301ca6af.tar.bz2
newspipe-1b73a3a4a73f231f6c1c2a3258e561b1301ca6af.zip
Merged in jaesivsm/pyaggr3g470r (pull request #2)
making pyagregator runnable by apache
-rw-r--r--bootstrap.py22
-rw-r--r--conf.py12
-rw-r--r--conf/conf.cfg-sample6
-rwxr-xr-xdb_create.py4
-rwxr-xr-xfetch.py5
-rw-r--r--pyaggr3g470r/__init__.py6
-rw-r--r--pyaggr3g470r/crawler.py42
-rw-r--r--pyaggr3g470r/emails.py23
-rwxr-xr-xpyaggr3g470r/log.py69
-rwxr-xr-xpyaggr3g470r/utils.py15
-rw-r--r--pyaggr3g470r/views.py11
-rwxr-xr-xrunserver.py10
12 files changed, 96 insertions, 129 deletions
diff --git a/bootstrap.py b/bootstrap.py
new file mode 100644
index 00000000..cadefcca
--- /dev/null
+++ b/bootstrap.py
@@ -0,0 +1,22 @@
+# required imports and code exection for basic functionning
+
+import sys
+if 'threading' in sys.modules:
+ raise Exception('threading module loaded before patching!')
+import gevent.monkey
+gevent.monkey.patch_thread()
+
+import conf
+import logging
+
+
+def set_logging(log_path, log_level=logging.INFO,
+ log_format='%(asctime)s %(levelname)s %(message)s'):
+ logger = logging.getLogger('pyaggr3g470r')
+ formater = logging.Formatter(log_format)
+ handler = logging.FileHandler(log_path)
+ handler.setFormatter(formater)
+ logger.addHandler(handler)
+ logger.setLevel(log_level)
+
+set_logging(conf.LOG_PATH)
diff --git a/conf.py b/conf.py
index 2ad949a8..18ca9ef9 100644
--- a/conf.py
+++ b/conf.py
@@ -6,7 +6,7 @@
This file contain the variables used by the application.
"""
-import os, sys
+import os
basedir = os.path.abspath(os.path.dirname(__file__))
PATH = os.path.abspath(".")
@@ -31,11 +31,12 @@ if not ON_HEROKU:
import ConfigParser as confparser
# load the configuration
config = confparser.SafeConfigParser()
- config.read("./conf/conf.cfg")
+ config.read(os.path.join(basedir, "conf/conf.cfg"))
PLATFORM_URL = config.get('misc', 'platform_url')
RECAPTCHA_PUBLIC_KEY = config.get('misc', 'recaptcha_public_key')
RECAPTCHA_PRIVATE_KEY = config.get('misc', 'recaptcha_private_key')
+ LOG_PATH = config.get('misc', 'log_path')
WHOOSH_ENABLED = True
@@ -48,6 +49,7 @@ if not ON_HEROKU:
WEBSERVER_DEBUG = int(config.get('webserver', 'debug')) == 1
WEBSERVER_HOST = config.get('webserver', 'host')
WEBSERVER_PORT = int(config.get('webserver', 'port'))
+ WEBSERVER_SECRET = config.get('webserver', 'secret')
ADMIN_EMAIL = config.get('mail', 'admin_email')
MAIL_ENABLED = int(config.get('mail', 'enabled')) == 1
@@ -57,14 +59,15 @@ if not ON_HEROKU:
MAIL_SSL = int(config.get('mail', 'ssl')) == 1
MAIL_USERNAME = config.get('mail', 'username')
MAIL_PASSWORD = config.get('mail', 'password')
-
+
WEBZINE_ROOT = PATH + "/pyaggr3g470r/var/export/"
else:
PLATFORM_URL = os.environ.get('PLATFORM_URL', 'https://pyaggr3g470r.herokuapp.com/')
RECAPTCHA_PUBLIC_KEY = os.environ.get('RECAPTCHA_PUBLIC_KEY', '')
RECAPTCHA_PRIVATE_KEY = os.environ.get('RECAPTCHA_PRIVATE_KEY', '')
-
+ LOG_PATH = os.environ.get('LOG_PATH', 'pyaggr3g470r.log')
+
SQLALCHEMY_DATABASE_URI = os.environ['DATABASE_URL']
HTTP_PROXY = ""
@@ -74,6 +77,7 @@ else:
WEBSERVER_DEBUG = False
WEBSERVER_HOST = '0.0.0.0'
WEBSERVER_PORT = int(os.environ.get('PORT', 5000))
+ WEBSERVER_SECRET = os.environ.get('SECRET_KEY', None)
MAIL_ENABLED = True
ADMIN_EMAIL = os.environ.get('ADMIN_EMAIL', '')
diff --git a/conf/conf.cfg-sample b/conf/conf.cfg-sample
index a7a3d4cf..8f0b7aec 100644
--- a/conf/conf.cfg-sample
+++ b/conf/conf.cfg-sample
@@ -1,18 +1,20 @@
[misc]
platform_url = https://pyaggr3g470r.herokuapp.com/
-admin_platform_email =
+admin_platform_email =
recaptcha_public_key =
recaptcha_private_key =
+log_path = ./pyaggr3g470r/var/pyaggr3g470r.log
[database]
uri = postgres://cedric:password@127.0.0.1:5432/pyAggr3g470r
[feedparser]
-http_proxy =
+http_proxy =
user_agent = pyAggr3g470r (https://bitbucket.org/cedricbonhomme/pyaggr3g470r)
resolve_article_url = 0
[webserver]
debug = 1
host = 0.0.0.0
port = 5000
+secret = a secret only you know
[mail]
enabled = 0
admin_email = pyAggr3g470r@no-reply.com
diff --git a/db_create.py b/db_create.py
index f92108b6..03bbb7f0 100755
--- a/db_create.py
+++ b/db_create.py
@@ -8,6 +8,8 @@ __revision__ = "$Date: 2014/04/12 $"
__copyright__ = "Copyright (c) Cedric Bonhomme"
__license__ = "AGPLv3"
+import bootstrap
+
from pyaggr3g470r import db
from pyaggr3g470r.models import User, Role
from werkzeug import generate_password_hash
@@ -74,4 +76,4 @@ user1 = User(nickname="admin",
user1.roles.extend([role_admin, role_user])
db.session.add(user1)
-db.session.commit() \ No newline at end of file
+db.session.commit()
diff --git a/fetch.py b/fetch.py
index cc3068e0..8684b917 100755
--- a/fetch.py
+++ b/fetch.py
@@ -7,10 +7,7 @@
# */30 * * * * cd ~/.pyaggr3g470r/ ; python fetch.py
# to fetch articles every 30 minutes.
import sys
-if 'threading' in sys.modules:
- raise Exception('threading module loaded before patching!')
-import gevent.monkey
-gevent.monkey.patch_thread()
+import bootstrap
from pyaggr3g470r import crawler
from pyaggr3g470r.models import User
diff --git a/pyaggr3g470r/__init__.py b/pyaggr3g470r/__init__.py
index 9dae02cb..5044d344 100644
--- a/pyaggr3g470r/__init__.py
+++ b/pyaggr3g470r/__init__.py
@@ -12,10 +12,12 @@ import conf
# Create Flask application
app = Flask(__name__)
-app.debug = True
+app.debug = conf.WEBSERVER_DEBUG
# Create dummy secrey key so we can use sessions
-app.config['SECRET_KEY'] = os.urandom(12)
+app.config['SECRET_KEY'] = getattr(conf, 'WEBSERVER_SECRET', None)
+if not app.config['SECRET_KEY']:
+ app.config['SECRET_KEY'] = os.urandom(12)
app.config['SQLALCHEMY_DATABASE_URI'] = conf.SQLALCHEMY_DATABASE_URI
db = SQLAlchemy(app)
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py
index 8f88e2d5..b505ff2a 100644
--- a/pyaggr3g470r/crawler.py
+++ b/pyaggr3g470r/crawler.py
@@ -27,6 +27,7 @@ __copyright__ = "Copyright (c) Cedric Bonhomme"
__license__ = "AGPLv3"
import re
+import logging
import feedparser
import urllib2
import requests
@@ -34,14 +35,13 @@ import dateutil.parser
from bs4 import BeautifulSoup
from datetime import datetime
from sqlalchemy.exc import IntegrityError
-from requests.exceptions import *
+#from requests.exceptions import *
import gevent.monkey
gevent.monkey.patch_all()
from gevent import Timeout
from gevent.pool import Pool
-import log
import utils
import conf
import emails
@@ -51,7 +51,7 @@ if not conf.ON_HEROKU:
import search as fastsearch
-pyaggr3g470r_log = log.Log("feedgetter")
+logger = logging.getLogger(__name__)
class TooLong(Exception):
@@ -59,7 +59,7 @@ class TooLong(Exception):
"""
Log a when greenlet took to long to fetch a resource.
"""
- pyaggr3g470r_log.warning("Greenlet took to long")
+ logger.warning("Greenlet took to long")
class FeedGetter(object):
@@ -88,7 +88,7 @@ class FeedGetter(object):
"""
Launch the processus.
"""
- pyaggr3g470r_log.info("Starting to retrieve feeds.")
+ logger.info("Starting to retrieve feeds.")
# 1 - Get the list of feeds to fetch
user = User.query.filter(User.email == self.user.email).first()
@@ -113,7 +113,7 @@ class FeedGetter(object):
if not conf.ON_HEROKU and conf.MAIL_ENABLED:
self.mail_notification(new_articles)
- pyaggr3g470r_log.info("All articles retrieved. End of the processus.")
+ logger.info("All articles retrieved. End of the processus.")
def retrieve_async(self, feeds):
"""
@@ -124,7 +124,7 @@ class FeedGetter(object):
"""
Fetch a feed.
"""
- pyaggr3g470r_log.info("Fetching the feed:" + feed.title)
+ logger.info("Fetching the feed:" + feed.title)
a_feed = feedparser.parse(feed.link, handlers=[self.proxy])
if a_feed['entries'] == []:
return
@@ -153,12 +153,12 @@ class FeedGetter(object):
proxies=self.proxies)
nice_url = r.url.encode("utf-8")
except Timeout:
- pyaggr3g470r_log.warning(
+ logger.warning(
"Timeout when getting the real URL of %s.",
article.link)
continue
except Exception as error:
- pyaggr3g470r_log.warning(
+ logger.warning(
"Unable to get the real URL of %s. Error: %s",
article.link, error)
continue
@@ -177,7 +177,7 @@ class FeedGetter(object):
try:
description = BeautifulSoup(description, "lxml").decode()
except:
- pyaggr3g470r_log.error("Problem when sanitizing the content of the article %s (%s)",
+ logger.error("Problem when sanitizing the content of the article %s (%s)",
article_title, nice_url)
post_date = None
@@ -219,7 +219,7 @@ class FeedGetter(object):
"""
Insert articles in the database.
"""
- pyaggr3g470r_log.info("Database insertion...")
+ logger.info("Database insertion...")
new_articles = []
for feed, articles in elements:
@@ -230,8 +230,8 @@ class FeedGetter(object):
Article.feed_id == feed.id,
Article.link == article.link).first()
if exist is not None:
- pyaggr3g470r_log.error("Article %s (%s) already in the database." %
- (article.title, article.link))
+ logger.debug("Article %r (%r) already in the database.",
+ article.title, article.link)
continue
if article.date is None:
article.date = datetime.now(dateutil.tz.tzlocal())
@@ -242,15 +242,16 @@ class FeedGetter(object):
feed.articles.append(article)
#db.session.merge(article)
db.session.commit()
- pyaggr3g470r_log.info("New article %s (%s) added." % (article.title, article.link))
+ logger.info("New article %r (%r) added.",
+ article.title, article.link)
except IntegrityError:
- pyaggr3g470r_log.error("Article %s (%s) already in the database." %
- (article.title, article.link))
+ logger.debug("Article %r (%r) already in the database.",
+ article.title, article.link)
articles.remove(article)
db.session.rollback()
continue
except Exception as e:
- pyaggr3g470r_log.error("Error when inserting article in database: " + str(e))
+ logger.error("Error when inserting article in database: " + str(e))
continue
#db.session.close()
return new_articles
@@ -259,7 +260,7 @@ class FeedGetter(object):
"""
Index new articles.
"""
- pyaggr3g470r_log.info("Indexing new articles.")
+ logger.info("Indexing new articles.")
for element in new_articles:
article = Article.query.filter(Article.user_id == self.user.id,
Article.link == element.link).first()
@@ -267,16 +268,15 @@ class FeedGetter(object):
fastsearch.add_to_index(self.user.id, [article],
article.source)
except:
- pyaggr3g470r_log.error("Problem during indexation.")
+ logger.exception("Problem during indexation:")
return True
def mail_notification(self, new_articles):
"""
Mail notification.
"""
- pyaggr3g470r_log.info("Starting mail notification.")
+ logger.info("Starting mail notification.")
for element in new_articles:
if element.source.email_notification:
emails.new_article_notification(self.user, element.source, element)
-
return True
diff --git a/pyaggr3g470r/emails.py b/pyaggr3g470r/emails.py
index 6f424c31..ae73c0e1 100644
--- a/pyaggr3g470r/emails.py
+++ b/pyaggr3g470r/emails.py
@@ -1,26 +1,27 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
+import logging
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from postmark import PMMail
-import log
import utils
import conf
from decorators import async
-pyaggr3g470r_log = log.Log("mail")
+logger = logging.getLogger(__name__)
+
@async
def send_async_email(mfrom, mto, msg):
try:
s = smtplib.SMTP(conf.MAIL_HOST)
s.login(conf.MAIL_USERNAME, conf.MAIL_PASSWORD)
- except Exception as e:
- pyaggr3g470r_log.error(str(e))
+ except Exception:
+ logger.exception('send_async_email raised:')
else:
s.sendmail(mfrom, mto, msg.as_string())
s.quit()
@@ -50,12 +51,12 @@ def send_email(mfrom, mto, feed, article):
# the HTML message, is best and preferred.
msg.attach(part1)
msg.attach(part2)
-
+
try:
s = smtplib.SMTP(conf.MAIL_HOST)
s.login(conf.MAIL_USERNAME, conf.MAIL_PASSWORD)
- except Exception as e:
- pyaggr3g470r_log.error(str(e))
+ except Exception:
+ logger.exception("send_email raised:")
else:
s.sendmail(mfrom, mto, msg.as_string())
s.quit()
@@ -81,9 +82,10 @@ def send_heroku(user=None, bcc="", subject="", plaintext=""):
message.to = user.email
message.send()
except Exception as e:
- pyaggr3g470r_log.error(str(e))
+ logger.exception("send_heroku raised:")
raise e
+
def information_message(subject, plaintext):
"""
Send an information message to the users of the platform.
@@ -103,6 +105,7 @@ def information_message(subject, plaintext):
else:
pass
+
def new_account_notification(user):
"""
Account creation notification.
@@ -114,6 +117,7 @@ def new_account_notification(user):
else:
pass
+
def new_account_activation(user):
"""
Account activation notification.
@@ -125,8 +129,9 @@ def new_account_activation(user):
else:
pass
+
def new_article_notification(user, feed, article):
if conf.ON_HEROKU:
pass
else:
- send_email(conf.ADMIN_EMAIL, user.email, feed, article) \ No newline at end of file
+ send_email(conf.ADMIN_EMAIL, user.email, feed, article)
diff --git a/pyaggr3g470r/log.py b/pyaggr3g470r/log.py
deleted file mode 100755
index 22834e71..00000000
--- a/pyaggr3g470r/log.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# pyAggr3g470r - A Web based news aggregator.
-# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/
-#
-# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.2 $"
-__date__ = "$Date: 2012/10/12 $"
-__revision__ = "$Date: 2013/12/26 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "GPLv3"
-
-import logging
-
-class Log(object):
- """
- Log events. Especially events relative to authentication.
- """
- def __init__(self, module_name):
- """
- Initialization of the logger.
- """
- self.logger = logging.getLogger(module_name)
- self.logger.propagate = False
- hdlr = logging.FileHandler('./pyaggr3g470r/var/pyaggr3g470r.log')
- formater = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
- hdlr.setFormatter(formater)
- self.logger.addHandler(hdlr)
- self.logger.setLevel(logging.INFO)
-
- def info(self, message):
- """
- Log notices.
- """
- self.logger.info(message)
-
- def warning(self, message):
- """
- Log warnings.
- """
- self.logger.warning(message)
-
- def error(self, message):
- """
- Log errors.
- """
- self.logger.warning(message)
-
- def critical(self, message):
- """
- Log critical errors.
- """
- self.logger.critical(message)
diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py
index 1880aaa0..50549587 100755
--- a/pyaggr3g470r/utils.py
+++ b/pyaggr3g470r/utils.py
@@ -38,6 +38,7 @@ import re
import glob
import opml
import json
+import logging
import datetime
import operator
import urllib
@@ -59,8 +60,7 @@ url_finders = [
re.compile("'\\<((mailto:)|)[-A-Za-z0-9\\.]+@[-A-Za-z0-9\\.]+") \
]
-#import log
-#pyaggr3g470r_log = log.Log()
+logger = logging.getLogger(__name__)
@contextmanager
@@ -76,15 +76,16 @@ def opened_w_error(filename, mode="r"):
f.close()
-def import_opml(email, opml_file):
+def import_opml(email, opml_content):
"""
Import new feeds from an OPML file.
"""
user = User.query.filter(User.email == email).first()
try:
- subscriptions = opml.parse(opml_file)
- except Exception as e:
- raise e
+ subscriptions = opml.from_string(opml_content)
+ except:
+ logger.exception("Parsing OPML file failed:")
+ raise
def read(subsubscription, nb=0):
"""
@@ -191,7 +192,7 @@ def clean_url(url):
return urlunparse([
parsed_url.scheme,
parsed_url.netloc,
- parsed_url.path,
+ urllib.quote(urllib.unquote(parsed_url.path)),
parsed_url.params,
urllib.urlencode(filtered, doseq=True),
parsed_url.fragment
diff --git a/pyaggr3g470r/views.py b/pyaggr3g470r/views.py
index eda9c399..1dd651e5 100644
--- a/pyaggr3g470r/views.py
+++ b/pyaggr3g470r/views.py
@@ -152,6 +152,7 @@ def login():
return redirect(url_for('home'))
return render_template('login.html', form=form)
+
@app.route('/logout/')
@login_required
def logout():
@@ -298,14 +299,14 @@ def article(article_id=None):
if not article.readed:
article.readed = True
db.session.commit()
-
+
previous_article = article.previous_article()
if previous_article is None:
previous_article = article.source.articles[0]
next_article = article.next_article()
if next_article is None:
next_article = article.source.articles[-1]
-
+
return render_template('article.html', head_title=utils.clear_string(article.title),
article=article,
previous_article=previous_article, next_article=next_article)
@@ -558,12 +559,10 @@ def management():
if not allowed_file(data.filename):
flash(gettext('File not allowed.'), 'danger')
else:
- opml_path = os.path.join("./pyaggr3g470r/var/", data.filename)
- data.save(opml_path)
try:
- nb = utils.import_opml(g.user.email, opml_path)
+ nb = utils.import_opml(g.user.email, data.read())
flash(str(nb) + ' ' + gettext('feeds imported.'), "success")
- except Exception as e:
+ except:
flash(gettext("Impossible to import the new feeds."), "danger")
elif None != request.files.get('jsonfile', None):
# Import an account
diff --git a/runserver.py b/runserver.py
index 92c02609..b4e138f3 100755
--- a/runserver.py
+++ b/runserver.py
@@ -19,8 +19,10 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-import conf
-from pyaggr3g470r import app
+from bootstrap import conf
+from pyaggr3g470r import app as application
-app.run(host=conf.WEBSERVER_HOST, port=conf.WEBSERVER_PORT,
- debug=conf.WEBSERVER_DEBUG)
+if __name__ == '__main__':
+ application.run(host=conf.WEBSERVER_HOST,
+ port=conf.WEBSERVER_PORT,
+ debug=conf.WEBSERVER_DEBUG)
bgstack15