From 918790b5e51fb21343ec001a24b770ab188203f9 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Tue, 21 Jan 2014 07:51:49 +0100 Subject: Added an option to choose if URL of articles behind proxies should be resolved. --- conf.py | 1 + conf/conf.cfg-sample | 1 + pyaggr3g470r/feedgetter.py | 21 +++++++++++---------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/conf.py b/conf.py index 3f406477..3b723f76 100644 --- a/conf.py +++ b/conf.py @@ -25,6 +25,7 @@ DATABASE_ADDRESS = config.get('database', 'address') HTTP_PROXY = config.get('feedparser', 'http_proxy') USER_AGENT = config.get('feedparser', 'user_agent') +RESOLVE_ARTICLE_URL = int(config.get('feedparser', 'resolve_article_url')) == 1 WEBSERVER_DEBUG = int(config.get('webserver', 'debug')) == 1 WEBSERVER_HOST = config.get('webserver', 'host') diff --git a/conf/conf.cfg-sample b/conf/conf.cfg-sample index 07027bfd..3ed7a1b1 100644 --- a/conf/conf.cfg-sample +++ b/conf/conf.cfg-sample @@ -7,6 +7,7 @@ password = root [feedparser] http_proxy = user_agent = pyAggr3g470r (https://bitbucket.org/cedricbonhomme/pyaggr3g470r) +resolve_article_url = 0 [webserver] debug = 1 host = 0.0.0.0 diff --git a/pyaggr3g470r/feedgetter.py b/pyaggr3g470r/feedgetter.py index 3b77509f..d29d3c9e 100644 --- a/pyaggr3g470r/feedgetter.py +++ b/pyaggr3g470r/feedgetter.py @@ -110,16 +110,17 @@ class FeedGetter(object): for article in a_feed['entries']: nice_url = article.link.encode("utf-8") - try: - # resolves URL behind proxies (like feedproxy.google.com) - r = requests.get(article.link, timeout=5.0, proxies=self.proxies) - nice_url = r.url.encode("utf-8") - except Timeout: - pyaggr3g470r_log.warning("Timeout when getting the real URL of %s." % (article.link,)) - continue - except Exception as e: - pyaggr3g470r_log.warning("Unable to get the real URL of %s. Error: %s" % (article.link, str(e))) - continue + if conf.RESOLVE_ARTICLE_URL: + try: + # resolves URL behind proxies (like feedproxy.google.com) + r = requests.get(article.link, timeout=5.0, proxies=self.proxies) + nice_url = r.url.encode("utf-8") + except Timeout: + pyaggr3g470r_log.warning("Timeout when getting the real URL of %s." % (article.link,)) + continue + except Exception as e: + pyaggr3g470r_log.warning("Unable to get the real URL of %s. Error: %s" % (article.link, str(e))) + continue # remove utm_* parameters parsed_url = urlparse(nice_url) qd = parse_qs(parsed_url.query, keep_blank_values=True) -- cgit