From 18414fc58852c0e41091dfc6c1c13d67170f9b12 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Wed, 21 Jan 2015 21:52:57 +0100 Subject: clean_url is now working with Python3 --- pyaggr3g470r/crawler.py | 2 +- pyaggr3g470r/utils.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index 96da898d..42430b5e 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -96,7 +96,7 @@ def fetch(user, feed): article.link, error) continue # remove utm_* parameters - #nice_url = utils.clean_url(nice_url) + nice_url = utils.clean_url(nice_url) description = "" article_title = article.get('title', '') diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py index 1a3f55ff..c6264106 100755 --- a/pyaggr3g470r/utils.py +++ b/pyaggr3g470r/utils.py @@ -188,7 +188,6 @@ def clean_url(url): """ Remove utm_* parameters """ - return url parsed_url = urlparse(url) qd = parse_qs(parsed_url.query, keep_blank_values=True) filtered = dict((k, v) for k, v in qd.items() @@ -196,9 +195,9 @@ def clean_url(url): return urlunparse([ parsed_url.scheme, parsed_url.netloc, - urllib.quote(urllib.unquote(parsed_url.path)), + urllib.parse.quote(urllib.parse.unquote(parsed_url.path)), parsed_url.params, - urllib.urlencode(filtered, doseq=True), + urllib.parse.urlencode(filtered, doseq=True), parsed_url.fragment ]).rstrip('=') -- cgit