aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <kimble.mandel+bitbucket@gmail.com>2015-04-22 11:06:27 +0200
committerCédric Bonhomme <kimble.mandel+bitbucket@gmail.com>2015-04-22 11:06:27 +0200
commit4fa09afdb7465db6730cb69a9f99279afdb0cf87 (patch)
treeec221bc4fc68389fb58672cd01b34bf1740c43b0
parentUpdated NEWS.rst (diff)
parentimpacting wosh suppression to controller (diff)
downloadnewspipe-4fa09afdb7465db6730cb69a9f99279afdb0cf87.tar.gz
newspipe-4fa09afdb7465db6730cb69a9f99279afdb0cf87.tar.bz2
newspipe-4fa09afdb7465db6730cb69a9f99279afdb0cf87.zip
Merged in jaesivsm/pyaggr3g470r (pull request #11)
misc improvement in http crawler
-rwxr-xr-xmanager.py6
-rw-r--r--pyaggr3g470r/controllers/abstract.py4
-rw-r--r--pyaggr3g470r/controllers/article.py8
-rw-r--r--pyaggr3g470r/controllers/feed.py10
-rw-r--r--pyaggr3g470r/lib/crawler.py15
-rw-r--r--pyaggr3g470r/models/feed.py1
-rw-r--r--pyaggr3g470r/templates/layout.html9
-rwxr-xr-xpyaggr3g470r/utils.py21
-rw-r--r--pyaggr3g470r/views/api/article.py3
-rw-r--r--pyaggr3g470r/views/api/common.py7
-rw-r--r--pyaggr3g470r/views/api/feed.py21
-rw-r--r--pyaggr3g470r/views/article.py2
-rw-r--r--pyaggr3g470r/views/feed.py2
-rw-r--r--pyaggr3g470r/views/views.py17
14 files changed, 74 insertions, 52 deletions
diff --git a/manager.py b/manager.py
index 020a0f4c..e2dc863b 100755
--- a/manager.py
+++ b/manager.py
@@ -27,11 +27,11 @@ def db_create():
pyaggr3g470r.models.db_create(db)
@manager.command
-def fetch(user, password, limit=100):
+def fetch(user, password, limit=100, retreive_all=False):
"Crawl the feeds with the client crawler."
from pyaggr3g470r.lib.crawler import CrawlerScheduler
scheduler = CrawlerScheduler(user, password)
- scheduler.run(limit=limit)
+ scheduler.run(limit=limit, retreive_all=retreive_all)
scheduler.wait()
@manager.command
@@ -61,4 +61,4 @@ def fetch_asyncio(user_id, feed_id):
feed_getter = crawler.retrieve_feed(user, feed_id)
if __name__ == '__main__':
- manager.run() \ No newline at end of file
+ manager.run()
diff --git a/pyaggr3g470r/controllers/abstract.py b/pyaggr3g470r/controllers/abstract.py
index 9a9004af..95f9e211 100644
--- a/pyaggr3g470r/controllers/abstract.py
+++ b/pyaggr3g470r/controllers/abstract.py
@@ -70,7 +70,9 @@ class AbstractController(object):
def create(self, **attrs):
assert self._user_id_key in attrs or self.user_id is not None, \
"You must provide user_id one way or another"
- attrs[self._user_id_key] = self.user_id or attrs.get(self._user_id_key)
+
+ if self._user_id_key not in attrs:
+ attrs[self._user_id_key] = self.user_id
obj = self._db_cls(**attrs)
db.session.add(obj)
db.session.commit()
diff --git a/pyaggr3g470r/controllers/article.py b/pyaggr3g470r/controllers/article.py
index 0ec53a2f..bcd73e99 100644
--- a/pyaggr3g470r/controllers/article.py
+++ b/pyaggr3g470r/controllers/article.py
@@ -1,7 +1,6 @@
from sqlalchemy import func
from bootstrap import db
-import conf
from .abstract import AbstractController
from pyaggr3g470r.models import Article
@@ -15,13 +14,6 @@ class ArticleController(AbstractController):
self.update({'id': article.id}, {'readed': True})
return article
- def delete(self, obj_id):
- obj = super(ArticleController, self).delete(obj_id)
- if not conf.ON_HEROKU:
- import pyaggr3g470r.search as fastsearch
- fastsearch.delete_article(self.user_id, obj.feed_id, obj_id)
- return obj
-
def challenge(self, ids):
"""Will return each id that wasn't found in the database."""
for id_ in ids:
diff --git a/pyaggr3g470r/controllers/feed.py b/pyaggr3g470r/controllers/feed.py
index 8db279ae..82714e39 100644
--- a/pyaggr3g470r/controllers/feed.py
+++ b/pyaggr3g470r/controllers/feed.py
@@ -27,8 +27,9 @@ from .abstract import AbstractController
from pyaggr3g470r.models import Feed
logger = logging.getLogger(__name__)
-DEFAULT_MAX_ERROR = conf.DEFAULT_MAX_ERROR
DEFAULT_LIMIT = 5
+DEFAULT_REFRESH_RATE = 60
+DEFAULT_MAX_ERROR = conf.DEFAULT_MAX_ERROR
class FeedController(AbstractController):
@@ -42,11 +43,10 @@ class FeedController(AbstractController):
.order_by('Feed.last_retrieved')
.limit(limit)]
- def list_fetchable(self, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT):
- from pyaggr3g470r.controllers import UserController
+ def list_fetchable(self, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT,
+ refresh_rate=DEFAULT_REFRESH_RATE):
now = datetime.now()
- user = UserController(self.user_id).get(id=self.user_id)
- max_last = now - timedelta(minutes=user.refresh_rate or 60)
+ max_last = now - timedelta(minutes=refresh_rate)
feeds = self.list_late(max_last, max_error, limit)
if feeds:
self.update({'id__in': [feed.id for feed in feeds]},
diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py
index 1cb61973..339c4b12 100644
--- a/pyaggr3g470r/lib/crawler.py
+++ b/pyaggr3g470r/lib/crawler.py
@@ -16,7 +16,6 @@ import time
import conf
import json
import logging
-import requests
import feedparser
import dateutil.parser
from hashlib import md5
@@ -97,6 +96,7 @@ class AbstractCrawler:
@classmethod
def get_counter_callback(cls):
cls.__counter__ += 1
+
def debump(*args, **kwargs):
cls.__counter__ -= 1
return debump
@@ -157,6 +157,7 @@ class PyAggUpdater(AbstractCrawler):
content = entry['summary']
return {'feed_id': self.feed['id'],
+ 'user_id': self.feed['user_id'],
'entry_id': extract_id(entry).get('entry_id', None),
'link': entry.get('link', self.feed['site_link']),
'title': entry.get('title', 'No title'),
@@ -176,11 +177,11 @@ class PyAggUpdater(AbstractCrawler):
for id_to_create in results:
entry = self.to_article(
self.entries[tuple(sorted(id_to_create.items()))])
- logger.warn('%r %r - creating %r - %r', self.feed['id'],
- self.feed['title'], entry['title'], id_to_create)
+ logger.warn('%r %r - creating %r for %r - %r', self.feed['id'],
+ self.feed['title'], entry['title'], entry['user_id'],
+ id_to_create)
self.query_pyagg('post', 'article', entry)
- now = datetime.now()
logger.debug('%r %r - updating feed etag %r last_mod %r',
self.feed['id'], self.feed['title'],
self.headers.get('etag', ''),
@@ -264,8 +265,10 @@ class FeedCrawler(AbstractCrawler):
ids, entries = [], {}
parsed_response = feedparser.parse(response.text)
for entry in parsed_response['entries']:
- entries[tuple(sorted(extract_id(entry).items()))] = entry
- ids.append(extract_id(entry))
+ entry_ids = extract_id(entry)
+ entry_ids['feed_id'] = self.feed['id']
+ entries[tuple(sorted(entry_ids.items()))] = entry
+ ids.append(entry_ids)
logger.debug('%r %r - found %d entries %r',
self.feed['id'], self.feed['title'], len(ids), ids)
future = self.query_pyagg('get', 'articles/challenge', {'ids': ids})
diff --git a/pyaggr3g470r/models/feed.py b/pyaggr3g470r/models/feed.py
index aff11460..e43045f1 100644
--- a/pyaggr3g470r/models/feed.py
+++ b/pyaggr3g470r/models/feed.py
@@ -63,6 +63,7 @@ class Feed(db.Model):
def dump(self):
return {"id": self.id,
+ "user_id": self.user_id,
"title": self.title,
"description": self.description,
"link": self.link,
diff --git a/pyaggr3g470r/templates/layout.html b/pyaggr3g470r/templates/layout.html
index fba29ae3..1be20f35 100644
--- a/pyaggr3g470r/templates/layout.html
+++ b/pyaggr3g470r/templates/layout.html
@@ -6,7 +6,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="description" content="pyAggr3g470r is a web-based news aggregator." />
<meta name="author" content="" />
- <title>pyAggr3g470r{% if head_title %} - {{ head_title }}{% endif %}</title>
+ <title>pyAggr3g470r{% if head_titles %} - {{ ' - '.join(head_titles) }}{% endif %}</title>
<link rel="shortcut icon" href="{{ url_for('static', filename='img/favicon.png') }}" />
<!-- Bootstrap core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap.css') }}" rel="stylesheet" media="screen" />
@@ -26,7 +26,12 @@
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="{{ url_for("home") }}">pyAggr3g470r</a>
- <span class="navbar-brand">{% if head_title %} - {{ head_title }}{% endif %}</span>
+ {% if head_titles %}
+ {% for head_title in head_titles %}
+ <span class="navbar-brand"> - </span>
+ <span class="navbar-brand">{{ head_title }}</span>
+ {% endfor %}
+ {% endif %}
</div>
<!-- Collect the nav links, forms, and other content for toggling -->
diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py
index ea8a87bf..628703d2 100755
--- a/pyaggr3g470r/utils.py
+++ b/pyaggr3g470r/utils.py
@@ -172,7 +172,7 @@ def import_json(email, json_content):
description="",
link=feed["link"],
site_link=feed["site_link"],
- created_date=datetime.datetime.\
+ created_date=datetime.datetime.
fromtimestamp(int(feed["created_date"])),
enabled=feed["enabled"])
user.feeds.append(new_feed)
@@ -191,10 +191,10 @@ def import_json(email, json_content):
title=article["title"],
content=article["content"],
readed=article["readed"],
- like=article["like"], \
- retrieved_date=datetime.datetime.\
+ like=article["like"],
+ retrieved_date=datetime.datetime.
fromtimestamp(int(article["retrieved_date"])),
- date=datetime.datetime.\
+ date=datetime.datetime.
fromtimestamp(int(article["date"])),
user_id=user.id,
feed_id=user_feed.id)
@@ -228,7 +228,7 @@ def open_url(url):
if conf.HTTP_PROXY == "":
proxy = {}
else:
- proxy = {"http" : conf.HTTP_PROXY}
+ proxy = {"http": conf.HTTP_PROXY}
opener = urllib.request.FancyURLopener(proxy)
try:
opener = urllib.request.build_opener()
@@ -236,27 +236,28 @@ def open_url(url):
return (True, opener.open(url))
except urllib.error.HTTPError as e:
# server couldn't fulfill the request
- error = (url, e.code, \
- http.server.BaseHTTPRequestHandler.responses[e.code][1])
+ error = (url, e.code,
+ http.server.BaseHTTPRequestHandler.responses[e.code][1])
return (False, error)
except urllib.error.URLError as e:
# failed to reach the server
if type(e.reason) == str:
error = (url, e.reason, e.reason)
- #pyaggr3g470r_log.error(url + " " + e.reason)
else:
error = (url, e.reason.errno, e.reason.strerror)
return (False, error)
+
def clear_string(data):
"""
Clear a string by removing HTML tags, HTML special caracters
and consecutive white spaces (more that one).
"""
- p = re.compile('<[^>]+>') # HTML tags
- q = re.compile('\s') # consecutive white spaces
+ p = re.compile('<[^>]+>') # HTML tags
+ q = re.compile('\s') # consecutive white spaces
return p.sub('', q.sub(' ', data))
+
def load_stop_words():
"""
Load the stop words and return them in a list.
diff --git a/pyaggr3g470r/views/api/article.py b/pyaggr3g470r/views/api/article.py
index c3ec2d34..516eef8f 100644
--- a/pyaggr3g470r/views/api/article.py
+++ b/pyaggr3g470r/views/api/article.py
@@ -11,7 +11,8 @@ from pyaggr3g470r.views.api.common import PyAggAbstractResource,\
PyAggResourceMulti
-ARTICLE_ATTRS = {'feed_id': {'type': str},
+ARTICLE_ATTRS = {'user_id': {'type': int},
+ 'feed_id': {'type': int},
'entry_id': {'type': str},
'link': {'type': str},
'title': {'type': str},
diff --git a/pyaggr3g470r/views/api/common.py b/pyaggr3g470r/views/api/common.py
index b8477d4b..ca344c04 100644
--- a/pyaggr3g470r/views/api/common.py
+++ b/pyaggr3g470r/views/api/common.py
@@ -51,7 +51,8 @@ def authenticate(func):
# authentication via HTTP only
auth = request.authorization
if auth is not None:
- user = User.query.filter(User.nickname == auth.username).first()
+ user = User.query.filter(
+ User.nickname == auth.username).first()
if user and user.check_password(auth.password) \
and user.activation_key == "":
g.user = user
@@ -61,6 +62,7 @@ def authenticate(func):
raise Unauthorized({'WWWAuthenticate': 'Basic realm="Login Required"'})
return wrapper
+
def to_response(func):
"""Will cast results of func as a result, and try to extract
a status_code for the Response object"""
@@ -158,7 +160,8 @@ class PyAggResourceMulti(PyAggAbstractResource):
return [res for res in self.controller.read().limit(limit)]
if not limit:
return [res for res in self.controller.read(**request.json).all()]
- return [res for res in self.controller.read(**request.json).limit(limit)]
+ return [res
+ for res in self.controller.read(**request.json).limit(limit)]
def post(self):
"""creating several objects. payload should be a list of dict.
diff --git a/pyaggr3g470r/views/api/feed.py b/pyaggr3g470r/views/api/feed.py
index 7d0e2862..ad185de9 100644
--- a/pyaggr3g470r/views/api/feed.py
+++ b/pyaggr3g470r/views/api/feed.py
@@ -3,8 +3,10 @@
from flask import g
-from pyaggr3g470r.controllers.feed import FeedController, \
- DEFAULT_MAX_ERROR, DEFAULT_LIMIT
+from pyaggr3g470r.controllers.feed import (FeedController,
+ DEFAULT_MAX_ERROR,
+ DEFAULT_LIMIT,
+ DEFAULT_REFRESH_RATE)
from pyaggr3g470r.views.api.common import PyAggAbstractResource, \
PyAggResourceNew, \
@@ -41,11 +43,20 @@ class FetchableFeedAPI(PyAggAbstractResource):
controller_cls = FeedController
to_date = ['date', 'last_retrieved']
attrs = {'max_error': {'type': int, 'default': DEFAULT_MAX_ERROR},
- 'limit': {'type': int, 'default': DEFAULT_LIMIT}}
+ 'limit': {'type': int, 'default': DEFAULT_LIMIT},
+ 'refresh_rate': {'type': int, 'default': DEFAULT_REFRESH_RATE},
+ 'retreive_all': {'type': bool, 'default': False}}
def get(self):
- return [feed for feed in self.controller.list_fetchable(
- **self.reqparse_args())]
+ args = self.reqparse_args()
+ if g.user.refresh_rate:
+ args['refresh_rate'] = g.user.refresh_rate
+
+ dont_filter_by_user = args.pop('retreive_all') and g.user.is_admin()
+
+ contr = self.controller_cls() if dont_filter_by_user \
+ else self.controller
+ return [feed for feed in contr.list_fetchable(**args)]
g.api.add_resource(FeedNewAPI, '/feed', endpoint='feed_new.json')
g.api.add_resource(FeedAPI, '/feed/<int:obj_id>', endpoint='feed.json')
diff --git a/pyaggr3g470r/views/article.py b/pyaggr3g470r/views/article.py
index 08c92686..75360485 100644
--- a/pyaggr3g470r/views/article.py
+++ b/pyaggr3g470r/views/article.py
@@ -49,7 +49,7 @@ def article(article_id=None):
next_article = article.source.articles[-1]
return render_template('article.html',
- head_title=utils.clear_string(article.title),
+ head_titles=[utils.clear_string(article.title)],
article=article,
previous_article=previous_article,
next_article=next_article)
diff --git a/pyaggr3g470r/views/feed.py b/pyaggr3g470r/views/feed.py
index 159dce64..bc75aa29 100644
--- a/pyaggr3g470r/views/feed.py
+++ b/pyaggr3g470r/views/feed.py
@@ -53,7 +53,7 @@ def feed(feed_id=None):
elapsed = today - last_article
return render_template('feed.html',
- head_title=utils.clear_string(feed.title),
+ head_titles=[utils.clear_string(feed.title)],
feed=feed, tag_cloud=tag_cloud,
first_post_date=first_article,
end_post_date=last_article,
diff --git a/pyaggr3g470r/views/views.py b/pyaggr3g470r/views/views.py
index 9f4ef0b7..f344ad61 100644
--- a/pyaggr3g470r/views/views.py
+++ b/pyaggr3g470r/views/views.py
@@ -228,15 +228,19 @@ def signup():
flash(gettext('Problem while sending activation email') + ': ' + str(e), 'danger')
return redirect(url_for('home'))
- flash(gettext('Your account has been created. Check your mail to confirm it.'), 'success')
+ flash(gettext('Your account has been created. '
+ 'Check your mail to confirm it.'), 'success')
return redirect(url_for('home'))
return render_template('signup.html', form=form)
-def render_home(filters=None, head_title='', page_to_render='home', **kwargs):
+def render_home(filters=None, head_titles=None,
+ page_to_render='home', **kwargs):
if filters is None:
filters = {}
+ if head_titles is None:
+ head_titles = []
feed_contr = FeedController(g.user.id)
arti_contr = ArticleController(g.user.id)
feeds = {feed.id: feed.title for feed in feed_contr.read()}
@@ -255,8 +259,7 @@ def render_home(filters=None, head_title='', page_to_render='home', **kwargs):
filters['readed'] = filter_ == 'read'
if feed_id:
filters['feed_id'] = feed_id
- head_title = "%s%s" % (feed_contr.get(id=feed_id).title,
- (' - %s' % head_title) if head_title else '')
+ head_titles.append(feed_contr.get(id=feed_id).title)
sort_param = {"feed": Article.title.desc(),
"date": Article.date.desc(),
@@ -286,7 +289,7 @@ def render_home(filters=None, head_title='', page_to_render='home', **kwargs):
return render_template('home.html', gen_url=gen_url, feed_id=feed_id,
filter_=filter_, limit=limit, feeds=feeds,
unread=unread, articles=articles, in_error=in_error,
- head_title=head_title, sort_=sort_, **kwargs)
+ head_titles=head_titles, sort_=sort_, **kwargs)
@app.route('/')
@@ -299,7 +302,7 @@ def home():
@app.route('/favorites')
@login_required
def favorites():
- return render_home({'like': True}, gettext('Favorites'), 'favorites')
+ return render_home({'like': True}, [gettext('Favorites')], 'favorites')
@app.route('/search', methods=['GET'])
@@ -319,7 +322,7 @@ def search():
filters['content__like'] = "%%%s%%" % query
if len(filters) > 1:
filters = {"__or__": filters}
- return render_home(filters, "%s %s" % (gettext('Search:'), query),
+ return render_home(filters, ["%s %s" % (gettext('Search:'), query)],
'search', search_query=query, search_title=search_title,
search_content=search_content)
bgstack15