diff options
author | cedricbonhomme <devnull@localhost> | 2010-07-05 21:39:53 +0200 |
---|---|---|
committer | cedricbonhomme <devnull@localhost> | 2010-07-05 21:39:53 +0200 |
commit | db632991434cf688012e2af0d877cd5a5a5b71a6 (patch) | |
tree | b9b51a92ff1ceda4c82cc5370451cb0e665835a9 /feedgetter.py | |
parent | Removed useless __future__ import (diff) | |
download | newspipe-db632991434cf688012e2af0d877cd5a5a5b71a6.tar.gz newspipe-db632991434cf688012e2af0d877cd5a5a5b71a6.tar.bz2 newspipe-db632991434cf688012e2af0d877cd5a5a5b71a6.zip |
Better regular expression to remove HTML tags, special caracters and consecutive white spaces.
Diffstat (limited to 'feedgetter.py')
-rwxr-xr-x | feedgetter.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/feedgetter.py b/feedgetter.py index bea01d28..267246db 100755 --- a/feedgetter.py +++ b/feedgetter.py @@ -98,7 +98,7 @@ class FeedGetter(object): feed_image = "/css/img/feed-icon-28x28.png" try: self.c.execute('insert into feeds values (?,?,?,?,?)', (\ - utils.remove_html_tags(a_feed.feed.title.encode('utf-8')), \ + utils.clear_string(a_feed.feed.title.encode('utf-8')), \ a_feed.feed.link.encode('utf-8'), \ feed_link, \ feed_image, @@ -115,7 +115,7 @@ class FeedGetter(object): try: self.c.execute('insert into articles values (?,?,?,?,?,?,?)', (\ datetime(*article.updated_parsed[:6]), \ - utils.remove_html_tags(article.title.encode('utf-8')), \ + utils.clear_string(article.title.encode('utf-8')), \ article.link.encode('utf-8'), \ description, \ "0", \ |