From b4bb0655fe1baff718b51c2df73f40ea3f9b6862 Mon Sep 17 00:00:00 2001 From: cedricbonhomme Date: Wed, 1 Jun 2011 20:27:06 +0200 Subject: Minor improvements: 1) encoding of article description and article title, 2) HTML sanitization. --- feedgetter.py | 12 ++++++------ pyAggr3g470r.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/feedgetter.py b/feedgetter.py index f2d3f290..54183b08 100755 --- a/feedgetter.py +++ b/feedgetter.py @@ -122,21 +122,21 @@ class FeedGetter(object): description = "" try: # article content - description = article.content[0].value.encode('utf-8') + description = article.content[0].value except AttributeError: try: # article description - description = article.description.encode('utf-8') + description = article.description except Exception, e: description = "" description = str(BeautifulSoup(description)) - title = str(BeautifulSoup(article.title.encode('utf-8'))) + article_title = str(BeautifulSoup(article.title)) try: # try. Will only success if the article is not already in the data base self.c.execute('insert into articles values (?, ?, ?, ?, ?, ?, ?)', (\ datetime(*article.updated_parsed[:6]), \ - title, \ + article_title, \ article.link.encode('utf-8'), \ description, \ "0", \ @@ -150,7 +150,7 @@ class FeedGetter(object): try: threading.Thread(None, utils.send_mail, None, (utils.mail_from, utils.mail_to, \ a_feed.feed.title.encode('utf-8'), \ - utils.clear_string(article.title.encode('utf-8')), description) \ + article_title, description) \ ).start() except Exception, e: # SMTP acces denied, to many SMTP connections, etc. @@ -166,4 +166,4 @@ class FeedGetter(object): if __name__ == "__main__": # Point of entry in execution mode feed_getter = FeedGetter() - feed_getter.retrieve_feed() \ No newline at end of file + feed_getter.retrieve_feed() diff --git a/pyAggr3g470r.py b/pyAggr3g470r.py index 67beea86..3e26326b 100755 --- a/pyAggr3g470r.py +++ b/pyAggr3g470r.py @@ -180,7 +180,7 @@ class Root: # Descrition for the CSS ToolTips article_content = utils.clear_string(article.article_description) if article_content: - description = " ".join(article_content[:500].split(' ')[:-1]) + description = " ".join(article_content.split(' ')[:55]) description = str(BeautifulSoup(description)) else: description = "No description." -- cgit