diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2013-12-24 13:56:31 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2013-12-24 13:56:31 +0100 |
commit | 580f409ac95e2b9bb95736a6d92105747b64096e (patch) | |
tree | 65591fd3e5f00f51bca457c03bbe3e1a8be2d83c | |
parent | Updated README. (diff) | |
download | newspipe-580f409ac95e2b9bb95736a6d92105747b64096e.tar.gz newspipe-580f409ac95e2b9bb95736a6d92105747b64096e.tar.bz2 newspipe-580f409ac95e2b9bb95736a6d92105747b64096e.zip |
Get the 'real' url.
-rw-r--r-- | pyaggr3g470r/feedgetter.py | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/pyaggr3g470r/feedgetter.py b/pyaggr3g470r/feedgetter.py index 49467761..c0d93c50 100644 --- a/pyaggr3g470r/feedgetter.py +++ b/pyaggr3g470r/feedgetter.py @@ -29,6 +29,8 @@ __license__ = "GPLv3" import threading import urllib2 import feedparser +import requests +from urlparse import urlparse from BeautifulSoup import BeautifulSoup from datetime import datetime @@ -93,7 +95,11 @@ class FeedGetter(object): articles = [] for article in a_feed['entries']: - if models.Article.objects(link=article.link).first() != None: + r = requests.get(article.link) + parsed_url = urlparse(r.url) + real_url = parsed_url.scheme + '://' + parsed_url.netloc + parsed_url.path + + if models.Article.objects(link=real_url).first() != None: # if article already in the database continue with the next article continue @@ -121,7 +127,7 @@ class FeedGetter(object): post_date = datetime(*article.updated_parsed[:6]) # save the article - article = models.Article(post_date, article.link, article_title, description, False, False) + article = models.Article(post_date, real_url, article_title, description, False, False) article.save() articles.append(article) |