From 3da3ae456ab543aa00cd193fbbce53c7198a82d7 Mon Sep 17 00:00:00 2001
From: Cédric Bonhomme <kimble.mandel@gmail.com>
Date: Tue, 2 Apr 2013 10:06:55 +0200
Subject: Test if BeautifulSoup failed to sanitize the HTML content.

---
 source/feedgetter.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'source')

diff --git a/source/feedgetter.py b/source/feedgetter.py
index f3fa5c07..3e3b7aef 100755
--- a/source/feedgetter.py
+++ b/source/feedgetter.py
@@ -123,6 +123,7 @@ class FeedGetter(object):
         articles = []
         for article in a_feed['entries']:
             description = ""
+            article_title = ""
             try:
                 # article content
                 description = article.content[0].value
@@ -132,8 +133,14 @@ class FeedGetter(object):
                     description = article.description
                 except Exception:
                     description = ""
-            description = BeautifulSoup(description, "html.parser").decode()
-            article_title = BeautifulSoup(article.title, "html.parser").decode()
+            try:
+                description = BeautifulSoup(description, "html.parser").decode()
+                article_title = BeautifulSoup(article.title, "html.parser").decode()
+            except Exception as E:
+                print("Problem when retrieving " + feed_link)
+                print(E)
+                article_title = article.title
+
             try:
                 post_date = datetime(*article.published_parsed[:6])
             except:
@@ -187,4 +194,4 @@ if __name__ == "__main__":
 
     # For a blogspot blog:
     #feed_getter.retrieve_feed("http://www.blogger.com/feeds/4195135246107166251/posts/default", "http://neopythonic.blogspot.com/feeds/posts/default")
-    #feed_getter.retrieve_feed("http://www.blogger.com/feeds/8699431508730375743/posts/default", "http://python-history.blogspot.com/feeds/posts/default")
\ No newline at end of file
+    #feed_getter.retrieve_feed("http://www.blogger.com/feeds/8699431508730375743/posts/default", "http://python-history.blogspot.com/feeds/posts/default")
-- 
cgit