diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2014-01-19 11:30:44 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2014-01-19 11:30:44 +0100 |
commit | b733818e2ae53ecac926a764c575db4037c5a8af (patch) | |
tree | b1eeb3bdc796b2018b8cc53fc4b48ebcda3a6ef4 /pyaggr3g470r | |
parent | Replaced eye-open icon by info-sign. (diff) | |
download | newspipe-b733818e2ae53ecac926a764c575db4037c5a8af.tar.gz newspipe-b733818e2ae53ecac926a764c575db4037c5a8af.tar.bz2 newspipe-b733818e2ae53ecac926a764c575db4037c5a8af.zip |
Only get the URL with requests if the string 'feedproxy' is found in the netloc part of the original URL.
Diffstat (limited to 'pyaggr3g470r')
-rw-r--r-- | pyaggr3g470r/feedgetter.py | 21 |
1 files changed, 11 insertions, 10 deletions
diff --git a/pyaggr3g470r/feedgetter.py b/pyaggr3g470r/feedgetter.py index 7bf87fea..da28663c 100644 --- a/pyaggr3g470r/feedgetter.py +++ b/pyaggr3g470r/feedgetter.py @@ -108,16 +108,17 @@ class FeedGetter(object): for article in a_feed['entries']: nice_url = article.link.encode("utf-8") - try: - # resolves URL behind proxies (like feedproxy.google.com) - r = requests.get(article.link, timeout=10.0, proxies=self.proxies) - nice_url = r.url.encode("utf-8") - except Timeout: - pyaggr3g470r_log.warning("Timeout when getting the real URL of %s." % (article.link,)) - continue - except Exception as e: - pyaggr3g470r_log.warning("Unable to get the real URL of %s. Error: %s" % (article.link, str(e))) - continue + if "feedproxy" in urlparse(nice_url).netloc: + try: + # resolves URL behind proxies (like feedproxy.google.com) + r = requests.get(article.link, timeout=10.0, proxies=self.proxies) + nice_url = r.url.encode("utf-8") + except Timeout: + pyaggr3g470r_log.warning("Timeout when getting the real URL of %s." % (article.link,)) + continue + except Exception as e: + pyaggr3g470r_log.warning("Unable to get the real URL of %s. Error: %s" % (article.link, str(e))) + continue # remove utm_* parameters parsed_url = urlparse(nice_url) qd = parse_qs(parsed_url.query, keep_blank_values=True) |