From 36c54ec0b66ba48e8c18cc2d7a16d652d296d1e1 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Tue, 26 Mar 2013 18:34:03 +0100 Subject: Now using html.parser for BeautifulSoup since this parser makes no attempt to create a well-formed HTML document by adding a tag. Unlike lxml, it doesn’t even bother to add an tag. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- source/feedgetter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'source/feedgetter.py') diff --git a/source/feedgetter.py b/source/feedgetter.py index 31a2d674..bdb46238 100755 --- a/source/feedgetter.py +++ b/source/feedgetter.py @@ -132,8 +132,8 @@ class FeedGetter(object): description = article.description except Exception: description = "" - description = str(BeautifulSoup(description)) - article_title = str(BeautifulSoup(article.title)) + description = str(BeautifulSoup(description, "html.parser")) + article_title = str(BeautifulSoup(article.title, "html.parser")) try: post_date = datetime(*article.published_parsed[:6]) except: -- cgit