aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2014-04-12 16:47:52 +0200
committerCédric Bonhomme <cedric@cedricbonhomme.org>2014-04-12 16:47:52 +0200
commite7056b3e9ce8d733348259d6e33dec36521f3984 (patch)
treeaedc1304b523e34616ed495914f9956e70f9d23b
parentThe /home page is now loading faster. (diff)
downloadnewspipe-e7056b3e9ce8d733348259d6e33dec36521f3984.tar.gz
newspipe-e7056b3e9ce8d733348259d6e33dec36521f3984.tar.bz2
newspipe-e7056b3e9ce8d733348259d6e33dec36521f3984.zip
Improvements of the feedgetter module.
-rw-r--r--pyaggr3g470r/feedgetter.py22
-rw-r--r--pyaggr3g470r/templates/unread.html8
-rwxr-xr-xpyaggr3g470r/utils.py20
3 files changed, 29 insertions, 21 deletions
diff --git a/pyaggr3g470r/feedgetter.py b/pyaggr3g470r/feedgetter.py
index 8cf71255..cf0cd44c 100644
--- a/pyaggr3g470r/feedgetter.py
+++ b/pyaggr3g470r/feedgetter.py
@@ -31,8 +31,6 @@ import requests
import threading
import feedparser
from datetime import datetime
-from urllib import urlencode
-from urlparse import urlparse, parse_qs, urlunparse
from BeautifulSoup import BeautifulSoup
from requests.exceptions import Timeout
@@ -137,20 +135,11 @@ class FeedGetter(object):
pyaggr3g470r_log.warning("Unable to get the real URL of %s. Error: %s" % (article.link, str(e)))
continue
# remove utm_* parameters
- parsed_url = urlparse(nice_url)
- qd = parse_qs(parsed_url.query, keep_blank_values=True)
- filtered = dict((k, v) for k, v in qd.iteritems() if not k.startswith('utm_'))
- nice_url = urlunparse([
- parsed_url.scheme,
- parsed_url.netloc,
- parsed_url.path,
- parsed_url.params,
- urlencode(filtered, doseq=True),
- parsed_url.fragment
- ])
-
- list_articles = Article.query.filter(Article.link == nice_url).all()
- if list_articles != [] and len([article1 for article1 in list_articles if article1.source.subscriber.id == self.user.id]) != 0:
+ nice_url = utils.clean_url(nice_url)
+
+ exist1 = Article.query.filter(Article.user_id == self.user.id, Article.link == nice_url).first()
+ exist2 = Article.query.filter(Article.user_id == self.user.id, Article.link == utils.clean_url(article.link.encode("utf-8"))).first()
+ if exist1 != None or exist2 != None:
continue
description = ""
@@ -213,6 +202,7 @@ class FeedGetter(object):
except Exception as e:
pyaggr3g470r_log.error("Error when inserting article in database: " + str(e))
continue
+ db.session.close()
return True
diff --git a/pyaggr3g470r/templates/unread.html b/pyaggr3g470r/templates/unread.html
index 1a586435..e0ffd2dd 100644
--- a/pyaggr3g470r/templates/unread.html
+++ b/pyaggr3g470r/templates/unread.html
@@ -13,10 +13,10 @@
<div class="row">
<div class="col-md-6 col-md-offset-3">
<h1>{{ feed.title|safe }}</h1>
- <a href="/articles/{{ feed.oid }}/100"><i class="glyphicon glyphicon-th-list" title="More articles"></i></a>
- <a href="/feed/{{ feed.oid }}"><i class="glyphicon glyphicon-info-sign" title="Details"></i></a>
- <a href="/edit_feed/{{ feed.oid }}"><i class="glyphicon glyphicon-edit" title="Edit this feed"></i></a>
- <a href="/mark_as_read/{{ feed.oid }}"><i class="glyphicon glyphicon-check" title="Mark all as read"></i></a>
+ <a href="/articles/{{ feed.id }}/100"><i class="glyphicon glyphicon-th-list" title="More articles"></i></a>
+ <a href="/feed/{{ feed.id }}"><i class="glyphicon glyphicon-info-sign" title="Details"></i></a>
+ <a href="/edit_feed/{{ feed.id }}"><i class="glyphicon glyphicon-edit" title="Edit this feed"></i></a>
+ <a href="/mark_as_read/{{ feed.id }}"><i class="glyphicon glyphicon-check" title="Mark all as read"></i></a>
<h3>{{ feed.articles.all()|length }} unread articles.</h3>
</div>
</div>
diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py
index ab58e689..1a8a8387 100755
--- a/pyaggr3g470r/utils.py
+++ b/pyaggr3g470r/utils.py
@@ -40,7 +40,8 @@ import glob
import opml
import operator
import calendar
-
+from urllib import urlencode
+from urlparse import urlparse, parse_qs, urlunparse
from BeautifulSoup import BeautifulSoup
from collections import Counter
@@ -117,6 +118,23 @@ def import_opml(email, opml_file):
db.session.commit()
return nb
+def clean_url(url):
+ """
+ Remove utm_* parameters
+ """
+ parsed_url = urlparse(url)
+ qd = parse_qs(parsed_url.query, keep_blank_values=True)
+ filtered = dict((k, v) for k, v in qd.iteritems() if not k.startswith('utm_'))
+ nice_url = urlunparse([
+ parsed_url.scheme,
+ parsed_url.netloc,
+ parsed_url.path,
+ parsed_url.params,
+ urlencode(filtered, doseq=True),
+ parsed_url.fragment
+ ])
+ return nice_url
+
def open_url(url):
"""
Open an URL with the proxy and the user-agent
bgstack15