diff options
author | cedricbonhomme <devnull@localhost> | 2010-07-06 09:54:43 +0200 |
---|---|---|
committer | cedricbonhomme <devnull@localhost> | 2010-07-06 09:54:43 +0200 |
commit | 64f56e2b4c75ee03f43bb253309fd61fe1f5c3a8 (patch) | |
tree | 4caa441a4469888952ea86017be09db781455638 /utils.py | |
parent | Use of most_common() method of collections.Counter() object (new in Python 2.7) (diff) | |
download | newspipe-64f56e2b4c75ee03f43bb253309fd61fe1f5c3a8.tar.gz newspipe-64f56e2b4c75ee03f43bb253309fd61fe1f5c3a8.tar.bz2 newspipe-64f56e2b4c75ee03f43bb253309fd61fe1f5c3a8.zip |
Regular expression improvement.
Diffstat (limited to 'utils.py')
-rwxr-xr-x | utils.py | 4 |
1 files changed, 2 insertions, 2 deletions
@@ -81,8 +81,8 @@ def clear_string(data): """ p = re.compile(r'<[^<]*?/?>') q = re.compile(r'&#[0-9]+;') - r = re.compile(r's+') - return p.sub('', q.sub('', r.sub('', data))) + r = re.compile(r'\s') + return p.sub('', q.sub('', r.sub(' ', data))) def top_words(dic_articles, n=10, size=5): """ |