aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r
diff options
context:
space:
mode:
Diffstat (limited to 'pyaggr3g470r')
-rw-r--r--pyaggr3g470r/crawler.py6
-rw-r--r--pyaggr3g470r/lib/article_utils.py26
2 files changed, 6 insertions, 26 deletions
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py
index b70b4e70..4ebca1a3 100644
--- a/pyaggr3g470r/crawler.py
+++ b/pyaggr3g470r/crawler.py
@@ -64,7 +64,6 @@ def get(*args, **kwargs):
data = feedparser.parse(args[0])
return data
except Exception as e:
- #print(e)
raise e
@asyncio.coroutine
@@ -118,7 +117,8 @@ def insert_database(user, feed):
new_articles = []
art_contr = ArticleController(user.id)
for article in articles:
- exist = art_contr.read(feed_id=feed.id, **extract_id(article))
+ exist = art_contr.read(feed_id=feed.id,
+ **extract_id(article)).count() != 0
if exist:
logger.debug("Article %r (%r) already in the database.",
article.title, article.link)
@@ -128,7 +128,7 @@ def insert_database(user, feed):
new_articles.append(art_contr.create(**article))
logger.info("New article % (%r) added.",
article.title, article.link)
- except Exception:
+ except Exception as e:
logger.exception("Error when inserting article in database:")
continue
return new_articles
diff --git a/pyaggr3g470r/lib/article_utils.py b/pyaggr3g470r/lib/article_utils.py
index 023be9a7..3c642167 100644
--- a/pyaggr3g470r/lib/article_utils.py
+++ b/pyaggr3g470r/lib/article_utils.py
@@ -52,25 +52,6 @@ def construct_article(entry, feed):
elif entry.get('summary'):
content = entry['summary']
- description = entry.get('description', '')
- try:
- description = entry.content[0].value
- except Exception:
- pass
-
- try:
- soup = BeautifulSoup(description, "lxml")
- # Prevents BeautifulSoup4 from adding extra <html><body> tags
- # to the soup with the lxml parser.
- if soup.html.body:
- description = soup.html.body.decode_contents()
- elif soup.html:
- description = soup.html.decode_contents()
- else:
- description = soup.decode()
- except Exception:
- pass
-
article_link = entry.get('link')
if conf.RESOLVE_ARTICLE_URL and article_link:
try:
@@ -82,13 +63,12 @@ def construct_article(entry, feed):
logger.warning("Unable to get the real URL of %s. Error: %s",
article_link, error)
- return {'feed_id': feed['id'],
- 'user_id': feed['user_id'],
+ return {'feed_id': feed.id,
+ 'user_id': feed.user_id,
'entry_id': extract_id(entry).get('entry_id', None),
- 'link': entry.get('link', feed['site_link']),
+ 'link': entry.get('link', feed.site_link),
'title': entry.get('title', 'No title'),
'readed': False, 'like': False,
- 'description': description,
'content': content,
'retrieved_date': now.isoformat(),
'date': (date or now).isoformat()}
bgstack15