aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <kimble.mandel@gmail.com>2013-03-26 18:34:03 +0100
committerCédric Bonhomme <kimble.mandel@gmail.com>2013-03-26 18:34:03 +0100
commit36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1 (patch)
treee2b5f010f8f3f164a343f62917521d89425c69cf
parentThe /subscriptions page refers now to the appropriate feeds page. (diff)
downloadnewspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.tar.gz
newspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.tar.bz2
newspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.zip
Now using html.parser for BeautifulSoup since this parser makes no attempt to create a well-formed HTML document by adding a <body> tag. Unlike lxml, it doesn’t even bother to add an <html> tag.
-rwxr-xr-xsource/feedgetter.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/source/feedgetter.py b/source/feedgetter.py
index 31a2d674..bdb46238 100755
--- a/source/feedgetter.py
+++ b/source/feedgetter.py
@@ -132,8 +132,8 @@ class FeedGetter(object):
description = article.description
except Exception:
description = ""
- description = str(BeautifulSoup(description))
- article_title = str(BeautifulSoup(article.title))
+ description = str(BeautifulSoup(description, "html.parser"))
+ article_title = str(BeautifulSoup(article.title, "html.parser"))
try:
post_date = datetime(*article.published_parsed[:6])
except:
bgstack15