diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2015-01-21 21:52:57 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2015-01-21 21:52:57 +0100 |
commit | 18414fc58852c0e41091dfc6c1c13d67170f9b12 (patch) | |
tree | 62ac3286c8a8acc6c1914359237e0c932babbca7 | |
parent | Set the version of Python used by the crawler in the configuration file. (diff) | |
download | newspipe-18414fc58852c0e41091dfc6c1c13d67170f9b12.tar.gz newspipe-18414fc58852c0e41091dfc6c1c13d67170f9b12.tar.bz2 newspipe-18414fc58852c0e41091dfc6c1c13d67170f9b12.zip |
clean_url is now working with Python3
-rw-r--r-- | pyaggr3g470r/crawler.py | 2 | ||||
-rwxr-xr-x | pyaggr3g470r/utils.py | 5 |
2 files changed, 3 insertions, 4 deletions
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index 96da898d..42430b5e 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -96,7 +96,7 @@ def fetch(user, feed): article.link, error) continue # remove utm_* parameters - #nice_url = utils.clean_url(nice_url) + nice_url = utils.clean_url(nice_url) description = "" article_title = article.get('title', '') diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py index 1a3f55ff..c6264106 100755 --- a/pyaggr3g470r/utils.py +++ b/pyaggr3g470r/utils.py @@ -188,7 +188,6 @@ def clean_url(url): """ Remove utm_* parameters """ - return url parsed_url = urlparse(url) qd = parse_qs(parsed_url.query, keep_blank_values=True) filtered = dict((k, v) for k, v in qd.items() @@ -196,9 +195,9 @@ def clean_url(url): return urlunparse([ parsed_url.scheme, parsed_url.netloc, - urllib.quote(urllib.unquote(parsed_url.path)), + urllib.parse.quote(urllib.parse.unquote(parsed_url.path)), parsed_url.params, - urllib.urlencode(filtered, doseq=True), + urllib.parse.urlencode(filtered, doseq=True), parsed_url.fragment ]).rstrip('=') |