Better regular expression to remove HTML tags, special caracters and consecutive white spaces.

author: cedricbonhomme <devnull@localhost> 2010-07-05 21:39:53 +0200
committer: cedricbonhomme <devnull@localhost> 2010-07-05 21:39:53 +0200
commit: db632991434cf688012e2af0d877cd5a5a5b71a6 (patch)
tree: b9b51a92ff1ceda4c82cc5370451cb0e665835a9 /pyAggr3g470r.py
parent: Removed useless __future__ import (diff)
download: newspipe-db632991434cf688012e2af0d877cd5a5a5b71a6.tar.gz
newspipe-db632991434cf688012e2af0d877cd5a5a5b71a6.tar.bz2
newspipe-db632991434cf688012e2af0d877cd5a5a5b71a6.zip
1 files changed, 7 insertions, 7 deletions
diff --git a/pyAggr3g470r.py b/pyAggr3g470r.py
index abe1624c..cc649cf1 100755
--- a/pyAggr3g470r.py
+++ b/pyAggr3g470r.py
@@ -297,9 +297,9 @@ class Root:
 
         if feed_id is not None:
             for article in self.articles[rss_feed_id]:
-                article_content = utils.remove_html_tags(article[4].encode('utf-8'))
+                article_content = utils.clear_string(article[4].encode('utf-8'))
                 if not article_content:
-                    utils.remove_html_tags(article[2].encode('utf-8'))
+                    utils.clear_string(article[2].encode('utf-8'))
                 if querystring.lower() in article_content.lower():
                     if article[5] == "0":
                         # not readed articles are in bold
@@ -317,9 +317,9 @@ class Root:
         else:
             for rss_feed_id in self.articles.keys():
                 for article in self.articles[rss_feed_id]:
-                    article_content = utils.remove_html_tags(article[4].encode('utf-8'))
+                    article_content = utils.clear_string(article[4].encode('utf-8'))
                     if not article_content:
-                        utils.remove_html_tags(article[2].encode('utf-8'))
+                        utils.clear_string(article[2].encode('utf-8'))
                     if querystring.lower() in article_content.lower():
                         if article[5] == "0":
                             # not readed articles are in bold
@@ -479,7 +479,7 @@ class Root:
                     " - " + not_read_begin + \
                     """<a href="/description/%s:%s" rel="noreferrer" target="_blank">%s</a>""" % \
                             (feed_id, article[0].encode('utf-8'), \
-                            utils.remove_html_tags(article[2].encode('utf-8'))) + \
+                            utils.clear_string(article[2].encode('utf-8'))) + \
                     not_read_end + like + \
                     "<br />\n"
 
@@ -583,7 +583,7 @@ class Root:
                 html += """<h1><i>%s</i> from <a href="/all_articles/%s">%s</a></h1>\n<br />\n"""% \
                                     (article[2].encode('utf-8'), feed_id, \
                                     self.feeds[feed_id][3].encode('utf-8'))
-                description = utils.remove_html_tags(article[4].encode('utf-8'))
+                description = utils.clear_string(article[4].encode('utf-8'))
                 if description:
                     html += description
                 else:
@@ -828,7 +828,7 @@ class Root:
                         name = folder + "/" + article[1] + ".txt"
                         f = open(name.replace(' ', '_'), "w")
                         content = "Title: " + article[2].encode('utf-8') + "\n\n\n"
-                        content += utils.remove_html_tags(article[4].encode('utf-8'))
+                        content += utils.clear_string(article[4].encode('utf-8'))
                     f.write(content)
                 except IOError:
                     pass
author	cedricbonhomme <devnull@localhost>	2010-07-05 21:39:53 +0200
committer	cedricbonhomme <devnull@localhost>	2010-07-05 21:39:53 +0200
commit	db632991434cf688012e2af0d877cd5a5a5b71a6 (patch)
tree	b9b51a92ff1ceda4c82cc5370451cb0e665835a9 /pyAggr3g470r.py
parent	Removed useless __future__ import (diff)
download	newspipe-db632991434cf688012e2af0d877cd5a5a5b71a6.tar.gz newspipe-db632991434cf688012e2af0d877cd5a5a5b71a6.tar.bz2 newspipe-db632991434cf688012e2af0d877cd5a5a5b71a6.zip