diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2015-07-06 15:26:29 +0200 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2015-07-06 15:26:29 +0200 |
commit | a867c1243c80843f3736ee260b92d5b13ec510ec (patch) | |
tree | 18f4d724abb04d9a14ad7dd5c0b1aca8eb98b3ca /pyaggr3g470r | |
parent | Merged in jaesivsm/pyaggr3g470r (pull request #16) (diff) | |
download | newspipe-a867c1243c80843f3736ee260b92d5b13ec510ec.tar.gz newspipe-a867c1243c80843f3736ee260b92d5b13ec510ec.tar.bz2 newspipe-a867c1243c80843f3736ee260b92d5b13ec510ec.zip |
Minor fixes from a quick review. Need to test deeper.
Diffstat (limited to 'pyaggr3g470r')
-rw-r--r-- | pyaggr3g470r/crawler.py | 6 | ||||
-rw-r--r-- | pyaggr3g470r/lib/article_utils.py | 26 |
2 files changed, 6 insertions, 26 deletions
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index b70b4e70..4ebca1a3 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -64,7 +64,6 @@ def get(*args, **kwargs): data = feedparser.parse(args[0]) return data except Exception as e: - #print(e) raise e @asyncio.coroutine @@ -118,7 +117,8 @@ def insert_database(user, feed): new_articles = [] art_contr = ArticleController(user.id) for article in articles: - exist = art_contr.read(feed_id=feed.id, **extract_id(article)) + exist = art_contr.read(feed_id=feed.id, + **extract_id(article)).count() != 0 if exist: logger.debug("Article %r (%r) already in the database.", article.title, article.link) @@ -128,7 +128,7 @@ def insert_database(user, feed): new_articles.append(art_contr.create(**article)) logger.info("New article % (%r) added.", article.title, article.link) - except Exception: + except Exception as e: logger.exception("Error when inserting article in database:") continue return new_articles diff --git a/pyaggr3g470r/lib/article_utils.py b/pyaggr3g470r/lib/article_utils.py index 023be9a7..3c642167 100644 --- a/pyaggr3g470r/lib/article_utils.py +++ b/pyaggr3g470r/lib/article_utils.py @@ -52,25 +52,6 @@ def construct_article(entry, feed): elif entry.get('summary'): content = entry['summary'] - description = entry.get('description', '') - try: - description = entry.content[0].value - except Exception: - pass - - try: - soup = BeautifulSoup(description, "lxml") - # Prevents BeautifulSoup4 from adding extra <html><body> tags - # to the soup with the lxml parser. - if soup.html.body: - description = soup.html.body.decode_contents() - elif soup.html: - description = soup.html.decode_contents() - else: - description = soup.decode() - except Exception: - pass - article_link = entry.get('link') if conf.RESOLVE_ARTICLE_URL and article_link: try: @@ -82,13 +63,12 @@ def construct_article(entry, feed): logger.warning("Unable to get the real URL of %s. Error: %s", article_link, error) - return {'feed_id': feed['id'], - 'user_id': feed['user_id'], + return {'feed_id': feed.id, + 'user_id': feed.user_id, 'entry_id': extract_id(entry).get('entry_id', None), - 'link': entry.get('link', feed['site_link']), + 'link': entry.get('link', feed.site_link), 'title': entry.get('title', 'No title'), 'readed': False, 'like': False, - 'description': description, 'content': content, 'retrieved_date': now.isoformat(), 'date': (date or now).isoformat()} |