aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcedricbonhomme <devnull@localhost>2010-07-06 09:54:43 +0200
committercedricbonhomme <devnull@localhost>2010-07-06 09:54:43 +0200
commit64f56e2b4c75ee03f43bb253309fd61fe1f5c3a8 (patch)
tree4caa441a4469888952ea86017be09db781455638
parentUse of most_common() method of collections.Counter() object (new in Python 2.7) (diff)
downloadnewspipe-64f56e2b4c75ee03f43bb253309fd61fe1f5c3a8.tar.gz
newspipe-64f56e2b4c75ee03f43bb253309fd61fe1f5c3a8.tar.bz2
newspipe-64f56e2b4c75ee03f43bb253309fd61fe1f5c3a8.zip
Regular expression improvement.
-rwxr-xr-xutils.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/utils.py b/utils.py
index 482a59a7..91f61d72 100755
--- a/utils.py
+++ b/utils.py
@@ -81,8 +81,8 @@ def clear_string(data):
"""
p = re.compile(r'<[^<]*?/?>')
q = re.compile(r'&#[0-9]+;')
- r = re.compile(r's+')
- return p.sub('', q.sub('', r.sub('', data)))
+ r = re.compile(r'\s')
+ return p.sub('', q.sub('', r.sub(' ', data)))
def top_words(dic_articles, n=10, size=5):
"""
bgstack15