From 9f18772bf22c2bca731ab9e350e1cb283252eec5 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Mon, 20 Jan 2014 07:48:26 +0100 Subject: Finally always try to get the real URL... --- pyaggr3g470r/feedgetter.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/feedgetter.py b/pyaggr3g470r/feedgetter.py index cc9f8aa4..3eb69938 100644 --- a/pyaggr3g470r/feedgetter.py +++ b/pyaggr3g470r/feedgetter.py @@ -110,18 +110,17 @@ class FeedGetter(object): for article in a_feed['entries']: nice_url = article.link.encode("utf-8") - if "feedproxy" in urlparse(nice_url).netloc: - try: - # resolves URL behind proxies (like feedproxy.google.com) - r = requests.get(article.link, timeout=5.0, proxies=self.proxies) - nice_url = r.url.encode("utf-8") - except Timeout: - pyaggr3g470r_log.warning("Timeout when getting the real URL of %s." % (article.link,)) - print "Time out" - continue - except Exception as e: - pyaggr3g470r_log.warning("Unable to get the real URL of %s. Error: %s" % (article.link, str(e))) - continue + try: + # resolves URL behind proxies (like feedproxy.google.com) + r = requests.get(article.link, timeout=5.0, proxies=self.proxies) + nice_url = r.url.encode("utf-8") + except Timeout: + pyaggr3g470r_log.warning("Timeout when getting the real URL of %s." % (article.link,)) + print "Time out" + continue + except Exception as e: + pyaggr3g470r_log.warning("Unable to get the real URL of %s. Error: %s" % (article.link, str(e))) + continue # remove utm_* parameters parsed_url = urlparse(nice_url) qd = parse_qs(parsed_url.query, keep_blank_values=True) -- cgit