diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2014-04-12 16:47:52 +0200 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2014-04-12 16:47:52 +0200 |
commit | e7056b3e9ce8d733348259d6e33dec36521f3984 (patch) | |
tree | aedc1304b523e34616ed495914f9956e70f9d23b /pyaggr3g470r | |
parent | The /home page is now loading faster. (diff) | |
download | newspipe-e7056b3e9ce8d733348259d6e33dec36521f3984.tar.gz newspipe-e7056b3e9ce8d733348259d6e33dec36521f3984.tar.bz2 newspipe-e7056b3e9ce8d733348259d6e33dec36521f3984.zip |
Improvements of the feedgetter module.
Diffstat (limited to 'pyaggr3g470r')
-rw-r--r-- | pyaggr3g470r/feedgetter.py | 22 | ||||
-rw-r--r-- | pyaggr3g470r/templates/unread.html | 8 | ||||
-rwxr-xr-x | pyaggr3g470r/utils.py | 20 |
3 files changed, 29 insertions, 21 deletions
diff --git a/pyaggr3g470r/feedgetter.py b/pyaggr3g470r/feedgetter.py index 8cf71255..cf0cd44c 100644 --- a/pyaggr3g470r/feedgetter.py +++ b/pyaggr3g470r/feedgetter.py @@ -31,8 +31,6 @@ import requests import threading import feedparser from datetime import datetime -from urllib import urlencode -from urlparse import urlparse, parse_qs, urlunparse from BeautifulSoup import BeautifulSoup from requests.exceptions import Timeout @@ -137,20 +135,11 @@ class FeedGetter(object): pyaggr3g470r_log.warning("Unable to get the real URL of %s. Error: %s" % (article.link, str(e))) continue # remove utm_* parameters - parsed_url = urlparse(nice_url) - qd = parse_qs(parsed_url.query, keep_blank_values=True) - filtered = dict((k, v) for k, v in qd.iteritems() if not k.startswith('utm_')) - nice_url = urlunparse([ - parsed_url.scheme, - parsed_url.netloc, - parsed_url.path, - parsed_url.params, - urlencode(filtered, doseq=True), - parsed_url.fragment - ]) - - list_articles = Article.query.filter(Article.link == nice_url).all() - if list_articles != [] and len([article1 for article1 in list_articles if article1.source.subscriber.id == self.user.id]) != 0: + nice_url = utils.clean_url(nice_url) + + exist1 = Article.query.filter(Article.user_id == self.user.id, Article.link == nice_url).first() + exist2 = Article.query.filter(Article.user_id == self.user.id, Article.link == utils.clean_url(article.link.encode("utf-8"))).first() + if exist1 != None or exist2 != None: continue description = "" @@ -213,6 +202,7 @@ class FeedGetter(object): except Exception as e: pyaggr3g470r_log.error("Error when inserting article in database: " + str(e)) continue + db.session.close() return True diff --git a/pyaggr3g470r/templates/unread.html b/pyaggr3g470r/templates/unread.html index 1a586435..e0ffd2dd 100644 --- a/pyaggr3g470r/templates/unread.html +++ b/pyaggr3g470r/templates/unread.html @@ -13,10 +13,10 @@ <div class="row"> <div class="col-md-6 col-md-offset-3"> <h1>{{ feed.title|safe }}</h1> - <a href="/articles/{{ feed.oid }}/100"><i class="glyphicon glyphicon-th-list" title="More articles"></i></a> - <a href="/feed/{{ feed.oid }}"><i class="glyphicon glyphicon-info-sign" title="Details"></i></a> - <a href="/edit_feed/{{ feed.oid }}"><i class="glyphicon glyphicon-edit" title="Edit this feed"></i></a> - <a href="/mark_as_read/{{ feed.oid }}"><i class="glyphicon glyphicon-check" title="Mark all as read"></i></a> + <a href="/articles/{{ feed.id }}/100"><i class="glyphicon glyphicon-th-list" title="More articles"></i></a> + <a href="/feed/{{ feed.id }}"><i class="glyphicon glyphicon-info-sign" title="Details"></i></a> + <a href="/edit_feed/{{ feed.id }}"><i class="glyphicon glyphicon-edit" title="Edit this feed"></i></a> + <a href="/mark_as_read/{{ feed.id }}"><i class="glyphicon glyphicon-check" title="Mark all as read"></i></a> <h3>{{ feed.articles.all()|length }} unread articles.</h3> </div> </div> diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py index ab58e689..1a8a8387 100755 --- a/pyaggr3g470r/utils.py +++ b/pyaggr3g470r/utils.py @@ -40,7 +40,8 @@ import glob import opml import operator import calendar - +from urllib import urlencode +from urlparse import urlparse, parse_qs, urlunparse from BeautifulSoup import BeautifulSoup from collections import Counter @@ -117,6 +118,23 @@ def import_opml(email, opml_file): db.session.commit() return nb +def clean_url(url): + """ + Remove utm_* parameters + """ + parsed_url = urlparse(url) + qd = parse_qs(parsed_url.query, keep_blank_values=True) + filtered = dict((k, v) for k, v in qd.iteritems() if not k.startswith('utm_')) + nice_url = urlunparse([ + parsed_url.scheme, + parsed_url.netloc, + parsed_url.path, + parsed_url.params, + urlencode(filtered, doseq=True), + parsed_url.fragment + ]) + return nice_url + def open_url(url): """ Open an URL with the proxy and the user-agent |