From 3d091baa4292c8145564b345492fb60d6db72a52 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Sun, 14 Feb 2016 12:24:27 +0100 Subject: article URL resolving has been removed (wasn't used) --- src/conf.py | 3 --- src/conf/conf.cfg-sample | 2 -- src/web/lib/article_utils.py | 10 ---------- 3 files changed, 15 deletions(-) (limited to 'src') diff --git a/src/conf.py b/src/conf.py index 5bf831a7..a9109fba 100644 --- a/src/conf.py +++ b/src/conf.py @@ -35,7 +35,6 @@ DEFAULTS = {"platform_url": "https://JARR.herokuapp.com/", "log_path": "jarr.log", "log_level": "info", "user_agent": "JARR (https://github.com/JARR-aggregator)", - "resolve_article_url": "false", "secret": "", "enabled": "false", "notification_email": "jarr@no-reply.com", @@ -86,8 +85,6 @@ API_PASSWD = config.get('crawler', 'api_passwd') SQLALCHEMY_DATABASE_URI = config.get('database', 'database_url') USER_AGENT = config.get('crawler', 'user_agent') -RESOLVE_ARTICLE_URL = config.getboolean('crawler', - 'resolve_article_url') DEFAULT_MAX_ERROR = config.getint('crawler', 'default_max_error') ERROR_THRESHOLD = int(DEFAULT_MAX_ERROR / 2) diff --git a/src/conf/conf.cfg-sample b/src/conf/conf.cfg-sample index bbbf5996..ab8c4730 100644 --- a/src/conf/conf.cfg-sample +++ b/src/conf/conf.cfg-sample @@ -17,9 +17,7 @@ database_url = postgres://pgsqluser:pgsqlpwd@127.0.0.1:5432/aggregator [crawler] crawling_method = classic default_max_error = 6 -http_proxy = user_agent = JARR (https://github.com/JARR-aggregator/JARR) -resolve_article_url = false api_login = api_passwd = [notification] diff --git a/src/web/lib/article_utils.py b/src/web/lib/article_utils.py index 176f6a98..46bb9461 100644 --- a/src/web/lib/article_utils.py +++ b/src/web/lib/article_utils.py @@ -1,5 +1,4 @@ import logging -import requests import dateutil.parser from datetime import datetime @@ -53,15 +52,6 @@ def construct_article(entry, feed): content = get_article_content(entry) article_link = entry.get('link') - if conf.RESOLVE_ARTICLE_URL and article_link: - try: - # resolves URL behind proxies - # (like feedproxy.google.com) - response = requests.get(article_link, verify=False, timeout=5.0) - article_link = response.url - except Exception as error: - logger.warning("Unable to get the real URL of %s. Error: %s", - article_link, error) return {'feed_id': feed['id'], 'user_id': feed['user_id'], -- cgit