diff options
author | Cédric Bonhomme <kimble.mandel@gmail.com> | 2013-03-26 18:34:03 +0100 |
---|---|---|
committer | Cédric Bonhomme <kimble.mandel@gmail.com> | 2013-03-26 18:34:03 +0100 |
commit | 36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1 (patch) | |
tree | e2b5f010f8f3f164a343f62917521d89425c69cf | |
parent | The /subscriptions page refers now to the appropriate feeds page. (diff) | |
download | newspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.tar.gz newspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.tar.bz2 newspipe-36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1.zip |
Now using html.parser for BeautifulSoup since this parser makes no attempt to create a well-formed HTML document by adding a <body> tag. Unlike lxml, it doesn’t even bother to add an <html> tag.
-rwxr-xr-x | source/feedgetter.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/source/feedgetter.py b/source/feedgetter.py index 31a2d674..bdb46238 100755 --- a/source/feedgetter.py +++ b/source/feedgetter.py @@ -132,8 +132,8 @@ class FeedGetter(object): description = article.description except Exception: description = "" - description = str(BeautifulSoup(description)) - article_title = str(BeautifulSoup(article.title)) + description = str(BeautifulSoup(description, "html.parser")) + article_title = str(BeautifulSoup(article.title, "html.parser")) try: post_date = datetime(*article.published_parsed[:6]) except: |