Now using html.parser for BeautifulSoup since this parser makes no attempt to create a well-formed HTML document by adding a <body> tag. Unlike lxml, it doesn’t even bother to add an <html> tag.

author: Cédric Bonhomme <kimble.mandel@gmail.com> 2013-03-26 18:34:03 +0100
committer: Cédric Bonhomme <kimble.mandel@gmail.com> 2013-03-26 18:34:03 +0100
commit: 36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1 (patch)
tree: e2b5f010f8f3f164a343f62917521d89425c69cf /source
parent: The /subscriptions page refers now to the appropriate feeds page. (diff)
download: newspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.tar.gz
newspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.tar.bz2
newspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.zip
1 files changed, 2 insertions, 2 deletions
diff --git a/source/feedgetter.py b/source/feedgetter.py
index 31a2d674..bdb46238 100755
--- a/source/feedgetter.py
+++ b/source/feedgetter.py
@@ -132,8 +132,8 @@ class FeedGetter(object):
                     description = article.description
                 except Exception:
                     description = ""
-            description = str(BeautifulSoup(description))
-            article_title = str(BeautifulSoup(article.title))
+            description = str(BeautifulSoup(description, "html.parser"))
+            article_title = str(BeautifulSoup(article.title, "html.parser"))
             try:
                 post_date = datetime(*article.published_parsed[:6])
             except:
author	Cédric Bonhomme <kimble.mandel@gmail.com>	2013-03-26 18:34:03 +0100
committer	Cédric Bonhomme <kimble.mandel@gmail.com>	2013-03-26 18:34:03 +0100
commit	36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1 (patch)
tree	e2b5f010f8f3f164a343f62917521d89425c69cf /source
parent	The /subscriptions page refers now to the appropriate feeds page. (diff)
download	newspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.tar.gz newspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.tar.bz2 newspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.zip