aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2013-12-24 13:56:31 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2013-12-24 13:56:31 +0100
commit580f409ac95e2b9bb95736a6d92105747b64096e (patch)
tree65591fd3e5f00f51bca457c03bbe3e1a8be2d83c
parentUpdated README. (diff)
downloadnewspipe-580f409ac95e2b9bb95736a6d92105747b64096e.tar.gz
newspipe-580f409ac95e2b9bb95736a6d92105747b64096e.tar.bz2
newspipe-580f409ac95e2b9bb95736a6d92105747b64096e.zip
Get the 'real' url.
-rw-r--r--pyaggr3g470r/feedgetter.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/pyaggr3g470r/feedgetter.py b/pyaggr3g470r/feedgetter.py
index 49467761..c0d93c50 100644
--- a/pyaggr3g470r/feedgetter.py
+++ b/pyaggr3g470r/feedgetter.py
@@ -29,6 +29,8 @@ __license__ = "GPLv3"
import threading
import urllib2
import feedparser
+import requests
+from urlparse import urlparse
from BeautifulSoup import BeautifulSoup
from datetime import datetime
@@ -93,7 +95,11 @@ class FeedGetter(object):
articles = []
for article in a_feed['entries']:
- if models.Article.objects(link=article.link).first() != None:
+ r = requests.get(article.link)
+ parsed_url = urlparse(r.url)
+ real_url = parsed_url.scheme + '://' + parsed_url.netloc + parsed_url.path
+
+ if models.Article.objects(link=real_url).first() != None:
# if article already in the database continue with the next article
continue
@@ -121,7 +127,7 @@ class FeedGetter(object):
post_date = datetime(*article.updated_parsed[:6])
# save the article
- article = models.Article(post_date, article.link, article_title, description, False, False)
+ article = models.Article(post_date, real_url, article_title, description, False, False)
article.save()
articles.append(article)
bgstack15