From 6ee91b3713a29df2dc6fc7d4f417ae4dbefa4972 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Thu, 23 Apr 2015 08:41:12 +0200 Subject: Automatically use the good Python executable for the classic crawler. --- conf.py | 18 ++++++++---------- conf/conf.cfg-sample | 17 ++++++++--------- pyaggr3g470r/utils.py | 3 ++- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/conf.py b/conf.py index 5a549f3b..e71009bf 100644 --- a/conf.py +++ b/conf.py @@ -7,7 +7,7 @@ This file contain the variables used by the application. import os import logging -basedir = os.path.abspath(os.path.dirname(__file__)) +BASE_DIR = os.path.abspath(os.path.dirname(__file__)) PATH = os.path.abspath(".") # available languages @@ -22,8 +22,7 @@ TIME_ZONE = { } ON_HEROKU = int(os.environ.get('HEROKU', 0)) == 1 -DEFAULTS = {"python": "/usr/bin/python3.4", - "platform_url": "https://pyaggr3g470r.herokuapp.com/", +DEFAULTS = {"platform_url": "https://pyaggr3g470r.herokuapp.com/", "postmark_api_key": "", "recaptcha_public_key": "", "recaptcha_private_key": "", @@ -53,7 +52,7 @@ if not ON_HEROKU: import ConfigParser as confparser # load the configuration config = confparser.SafeConfigParser(defaults=DEFAULTS) - config.read(os.path.join(basedir, "conf/conf.cfg")) + config.read(os.path.join(BASE_DIR, "conf/conf.cfg")) else: class Config(object): def get(self, _, name): @@ -78,20 +77,19 @@ RECAPTCHA_PUBLIC_KEY = config.get('misc', 'recaptcha_public_key') RECAPTCHA_PRIVATE_KEY = config.get('misc', 'recaptcha_private_key') LOG_PATH = config.get('misc', 'log_path') -PYTHON = config.get('misc', 'python') NB_WORKER = config.getint('misc', 'nb_worker') WHOOSH_ENABLED = True SQLALCHEMY_DATABASE_URI = config.get('database', 'database_url') -HTTP_PROXY = config.get('feedparser', 'http_proxy') -USER_AGENT = config.get('feedparser', 'user_agent') -RESOLVE_ARTICLE_URL = config.getboolean('feedparser', +HTTP_PROXY = config.get('crawler', 'http_proxy') +USER_AGENT = config.get('crawler', 'user_agent') +RESOLVE_ARTICLE_URL = config.getboolean('crawler', 'resolve_article_url') -DEFAULT_MAX_ERROR = config.getint('feedparser', +DEFAULT_MAX_ERROR = config.getint('crawler', 'default_max_error') -CRAWLING_METHOD = config.get('feedparser', 'crawling_method') +CRAWLING_METHOD = config.get('crawler', 'crawling_method') LOG_LEVEL = {'debug': logging.DEBUG, 'info': logging.INFO, diff --git a/conf/conf.cfg-sample b/conf/conf.cfg-sample index 76a91323..b141e93c 100644 --- a/conf/conf.cfg-sample +++ b/conf/conf.cfg-sample @@ -1,24 +1,23 @@ +[webserver] +host = 0.0.0.0 +port = 5000 +secret = a secret only you know [misc] -platform_url = http://127.0.0.1:5000/ +platform_url = http://0.0.0.0:5000/ admin_email = recaptcha_public_key = recaptcha_private_key = log_path = ./pyaggr3g470r/var/pyaggr3g470r.log -python = python3.3 nb_worker = 5 log_level = info [database] database_url = postgres://pgsqluser:pgsqlpwd@127.0.0.1:5432/aggregator -[feedparser] +[crawler] +crawling_method = classic +default_max_error = 6 http_proxy = user_agent = pyAggr3g470r (https://bitbucket.org/cedricbonhomme/pyaggr3g470r) resolve_article_url = false -default_max_error = 6 -crawling_method = classic -[webserver] -host = 0.0.0.0 -port = 5000 -secret = a secret only you know [notification] email = pyAggr3g470r@no-reply.com host = smtp.googlemail.com diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py index 628703d2..3d8bb483 100755 --- a/pyaggr3g470r/utils.py +++ b/pyaggr3g470r/utils.py @@ -35,6 +35,7 @@ __license__ = "AGPLv3" # import re +import sys import glob import opml import json @@ -88,7 +89,7 @@ def fetch(id, feed_id=None): Fetch the feeds in a new processus. The "asyncio" crawler is launched with the manager. """ - cmd = [conf.PYTHON, conf.basedir+'/manager.py', 'fetch_asyncio', str(id), + cmd = [sys.executable, conf.BASE_DIR+'/manager.py', 'fetch_asyncio', str(id), str(feed_id)] p = subprocess.Popen(cmd, stdout=subprocess.PIPE) -- cgit