From 64f56e2b4c75ee03f43bb253309fd61fe1f5c3a8 Mon Sep 17 00:00:00 2001 From: cedricbonhomme Date: Tue, 6 Jul 2010 09:54:43 +0200 Subject: Regular expression improvement. --- utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils.py b/utils.py index 482a59a7..91f61d72 100755 --- a/utils.py +++ b/utils.py @@ -81,8 +81,8 @@ def clear_string(data): """ p = re.compile(r'<[^<]*?/?>') q = re.compile(r'&#[0-9]+;') - r = re.compile(r's+') - return p.sub('', q.sub('', r.sub('', data))) + r = re.compile(r'\s') + return p.sub('', q.sub('', r.sub(' ', data))) def top_words(dic_articles, n=10, size=5): """ -- cgit