diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2016-02-14 12:24:27 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2016-02-14 12:24:27 +0100 |
commit | 3d091baa4292c8145564b345492fb60d6db72a52 (patch) | |
tree | 0ca93b688810f1768c9db9a1dbff2acb0e2ee0a9 | |
parent | Updated CHANGELOG. (diff) | |
download | newspipe-3d091baa4292c8145564b345492fb60d6db72a52.tar.gz newspipe-3d091baa4292c8145564b345492fb60d6db72a52.tar.bz2 newspipe-3d091baa4292c8145564b345492fb60d6db72a52.zip |
article URL resolving has been removed (wasn't used)
-rw-r--r-- | CHANGELOG.rst | 3 | ||||
-rw-r--r-- | src/conf.py | 3 | ||||
-rw-r--r-- | src/conf/conf.cfg-sample | 2 | ||||
-rw-r--r-- | src/web/lib/article_utils.py | 10 |
4 files changed, 2 insertions, 16 deletions
diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b6bc51a5..c18cbbcc 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,7 +8,8 @@ current * Redoing entierly the home page with react, JARR is going on toward a one page app; * Implementing categorie; * The classic crawler is now taking into account updated articles from feeds; - * Support of HTTP proxy has been removed. + * Support of HTTP proxy has been removed; + * article URL resolving has been removed (wasn't used). Improvements: * Code re-arangement: move all code to /src/ diff --git a/src/conf.py b/src/conf.py index 5bf831a7..a9109fba 100644 --- a/src/conf.py +++ b/src/conf.py @@ -35,7 +35,6 @@ DEFAULTS = {"platform_url": "https://JARR.herokuapp.com/", "log_path": "jarr.log", "log_level": "info", "user_agent": "JARR (https://github.com/JARR-aggregator)", - "resolve_article_url": "false", "secret": "", "enabled": "false", "notification_email": "jarr@no-reply.com", @@ -86,8 +85,6 @@ API_PASSWD = config.get('crawler', 'api_passwd') SQLALCHEMY_DATABASE_URI = config.get('database', 'database_url') USER_AGENT = config.get('crawler', 'user_agent') -RESOLVE_ARTICLE_URL = config.getboolean('crawler', - 'resolve_article_url') DEFAULT_MAX_ERROR = config.getint('crawler', 'default_max_error') ERROR_THRESHOLD = int(DEFAULT_MAX_ERROR / 2) diff --git a/src/conf/conf.cfg-sample b/src/conf/conf.cfg-sample index bbbf5996..ab8c4730 100644 --- a/src/conf/conf.cfg-sample +++ b/src/conf/conf.cfg-sample @@ -17,9 +17,7 @@ database_url = postgres://pgsqluser:pgsqlpwd@127.0.0.1:5432/aggregator [crawler] crawling_method = classic default_max_error = 6 -http_proxy = user_agent = JARR (https://github.com/JARR-aggregator/JARR) -resolve_article_url = false api_login = api_passwd = [notification] diff --git a/src/web/lib/article_utils.py b/src/web/lib/article_utils.py index 176f6a98..46bb9461 100644 --- a/src/web/lib/article_utils.py +++ b/src/web/lib/article_utils.py @@ -1,5 +1,4 @@ import logging -import requests import dateutil.parser from datetime import datetime @@ -53,15 +52,6 @@ def construct_article(entry, feed): content = get_article_content(entry) article_link = entry.get('link') - if conf.RESOLVE_ARTICLE_URL and article_link: - try: - # resolves URL behind proxies - # (like feedproxy.google.com) - response = requests.get(article_link, verify=False, timeout=5.0) - article_link = response.url - except Exception as error: - logger.warning("Unable to get the real URL of %s. Error: %s", - article_link, error) return {'feed_id': feed['id'], 'user_id': feed['user_id'], |