aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/utils.py
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2014-04-12 16:47:52 +0200
committerCédric Bonhomme <cedric@cedricbonhomme.org>2014-04-12 16:47:52 +0200
commite7056b3e9ce8d733348259d6e33dec36521f3984 (patch)
treeaedc1304b523e34616ed495914f9956e70f9d23b /pyaggr3g470r/utils.py
parentThe /home page is now loading faster. (diff)
downloadnewspipe-e7056b3e9ce8d733348259d6e33dec36521f3984.tar.gz
newspipe-e7056b3e9ce8d733348259d6e33dec36521f3984.tar.bz2
newspipe-e7056b3e9ce8d733348259d6e33dec36521f3984.zip
Improvements of the feedgetter module.
Diffstat (limited to 'pyaggr3g470r/utils.py')
-rwxr-xr-xpyaggr3g470r/utils.py20
1 files changed, 19 insertions, 1 deletions
diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py
index ab58e689..1a8a8387 100755
--- a/pyaggr3g470r/utils.py
+++ b/pyaggr3g470r/utils.py
@@ -40,7 +40,8 @@ import glob
import opml
import operator
import calendar
-
+from urllib import urlencode
+from urlparse import urlparse, parse_qs, urlunparse
from BeautifulSoup import BeautifulSoup
from collections import Counter
@@ -117,6 +118,23 @@ def import_opml(email, opml_file):
db.session.commit()
return nb
+def clean_url(url):
+ """
+ Remove utm_* parameters
+ """
+ parsed_url = urlparse(url)
+ qd = parse_qs(parsed_url.query, keep_blank_values=True)
+ filtered = dict((k, v) for k, v in qd.iteritems() if not k.startswith('utm_'))
+ nice_url = urlunparse([
+ parsed_url.scheme,
+ parsed_url.netloc,
+ parsed_url.path,
+ parsed_url.params,
+ urlencode(filtered, doseq=True),
+ parsed_url.fragment
+ ])
+ return nice_url
+
def open_url(url):
"""
Open an URL with the proxy and the user-agent
bgstack15