aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <kimble.mandel+bitbucket@gmail.com>2015-08-06 23:36:13 +0200
committerCédric Bonhomme <kimble.mandel+bitbucket@gmail.com>2015-08-06 23:36:13 +0200
commit481d30d4b5d46608a7f0b5cf1122293bc61d3ea5 (patch)
tree4e8690425332988b84e62ebcd334f7bcb30bf89d
parentEnsuere that the arguments is an integer. (diff)
parentfixing bug preventing from bumping error count from api crawler for none admi... (diff)
downloadnewspipe-481d30d4b5d46608a7f0b5cf1122293bc61d3ea5.tar.gz
newspipe-481d30d4b5d46608a7f0b5cf1122293bc61d3ea5.tar.bz2
newspipe-481d30d4b5d46608a7f0b5cf1122293bc61d3ea5.zip
Merged in jaesivsm/pyaggr3g470r (pull request #21)
fixing bug on http crawler and light refact
-rw-r--r--pyaggr3g470r/controllers/feed.py5
-rw-r--r--pyaggr3g470r/lib/crawler.py94
-rw-r--r--pyaggr3g470r/templates/admin/user.html35
-rw-r--r--pyaggr3g470r/templates/feed_list.html47
-rw-r--r--pyaggr3g470r/templates/feeds.html44
-rw-r--r--pyaggr3g470r/views/api/article.py3
-rw-r--r--pyaggr3g470r/views/api/common.py16
-rw-r--r--pyaggr3g470r/views/api/feed.py3
-rw-r--r--pyaggr3g470r/views/views.py17
9 files changed, 106 insertions, 158 deletions
diff --git a/pyaggr3g470r/controllers/feed.py b/pyaggr3g470r/controllers/feed.py
index 6b3c4fb5..a8e96217 100644
--- a/pyaggr3g470r/controllers/feed.py
+++ b/pyaggr3g470r/controllers/feed.py
@@ -21,7 +21,6 @@
import logging
from datetime import datetime, timedelta
-from werkzeug.exceptions import NotFound
import conf
from .abstract import AbstractController
@@ -59,9 +58,7 @@ class FeedController(AbstractController):
if not attrs.get('icon_url'):
return
icon_contr = IconController()
- try:
- icon_contr.get(url=attrs['icon_url'])
- except NotFound:
+ if not icon_contr.read(url=attrs['icon_url']).count():
icon_contr.create(**{'url': attrs['icon_url']})
def create(self, **attrs):
diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py
index 216e7a96..91942c59 100644
--- a/pyaggr3g470r/lib/crawler.py
+++ b/pyaggr3g470r/lib/crawler.py
@@ -33,10 +33,8 @@ API_ROOT = "api/v2.0/"
class AbstractCrawler:
__session__ = None
- __counter__ = 0
def __init__(self, auth):
- AbstractCrawler.__counter__ += 1
self.auth = auth
self.session = self.get_session()
self.url = conf.PLATFORM_URL
@@ -50,30 +48,6 @@ class AbstractCrawler:
cls.__session__.verify = False
return cls.__session__
- @classmethod
- def count_on_me(cls, func):
- """A basic decorator which will count +1 at the begining of a call
- and -1 at the end. It kinda allows us to wait for the __counter__ value
- to be 0, meaning nothing is done anymore."""
- @wraps(func)
- def wrapper(*args, **kwargs):
- cls.__counter__ += 1
- try:
- return func(*args, **kwargs)
- except:
- logger.exception('an error occured while %r', func)
- finally:
- cls.__counter__ -= 1
- return wrapper
-
- @classmethod
- def get_counter_callback(cls):
- cls.__counter__ += 1
-
- def debump(*args, **kwargs):
- cls.__counter__ -= 1
- return debump
-
def query_pyagg(self, method, urn, data=None):
"""A wrapper for internal call, method should be ones you can find
on requests (header, post, get, options, ...), urn the distant
@@ -89,17 +63,23 @@ class AbstractCrawler:
'User-Agent': 'pyaggr3g470r'})
@classmethod
- def wait(cls, max_wait=600):
+ def wait(cls, max_wait=300, checks=5, wait_for=2):
"See count_on_me, that method will just wait for the counter to be 0"
- time.sleep(1)
- second_waited = 1
- while cls.__counter__:
+ checked, second_waited = 0, 0
+ checked = 0
+ while True:
+ time.sleep(wait_for)
+ second_waited += wait_for
if second_waited > max_wait:
logger.warn('Exiting after %d seconds, counter at %d',
- max_wait, cls.__counter__)
+ max_wait, len(cls.__counter__))
break
- time.sleep(1)
- second_waited += 1
+ if cls.get_session().executor._work_queue.queue:
+ checked = 0
+ continue
+ checked += 1
+ if checked == checks:
+ break
class PyAggUpdater(AbstractCrawler):
@@ -109,26 +89,26 @@ class PyAggUpdater(AbstractCrawler):
self.entries = entries
self.headers = headers
self.parsed_feed = parsed_feed
- super(PyAggUpdater, self).__init__(auth)
+ super().__init__(auth)
- @AbstractCrawler.count_on_me
def callback(self, response):
"""Will process the result from the challenge, creating missing article
and updating the feed"""
- AbstractCrawler.__counter__ -= 1
- results = response.result().json()
- logger.debug('%r %r - %d entries were not matched and will be created',
- self.feed['id'], self.feed['title'], len(results))
article_created = False
- for id_to_create in results:
- article_created = True
- entry = construct_article(
- self.entries[tuple(sorted(id_to_create.items()))],
- self.feed)
- logger.info('%r %r - creating %r for %r - %r', self.feed['id'],
- self.feed['title'], entry['title'], entry['user_id'],
- id_to_create)
- self.query_pyagg('post', 'article', entry)
+ if response.result().status_code != 204:
+ results = response.result().json()
+ logger.debug('%r %r - %d entries were not matched '
+ 'and will be created',
+ self.feed['id'], self.feed['title'], len(results))
+ for id_to_create in results:
+ article_created = True
+ entry = construct_article(
+ self.entries[tuple(sorted(id_to_create.items()))],
+ self.feed)
+ logger.info('%r %r - creating %r for %r - %r', self.feed['id'],
+ self.feed['title'], entry['title'],
+ entry['user_id'], id_to_create)
+ self.query_pyagg('post', 'article', entry)
logger.debug('%r %r - updating feed etag %r last_mod %r',
self.feed['id'], self.feed['title'],
@@ -160,27 +140,23 @@ class PyAggUpdater(AbstractCrawler):
future = self.query_pyagg('put',
'feed/%d' % self.feed['id'], up_feed)
- future.add_done_callback(self.get_counter_callback())
class FeedCrawler(AbstractCrawler):
def __init__(self, feed, auth):
self.feed = feed
- super(FeedCrawler, self).__init__(auth)
+ super().__init__(auth)
def clean_feed(self):
"""Will reset the errors counters on a feed that have known errors"""
if self.feed.get('error_count') or self.feed.get('last_error'):
future = self.query_pyagg('put', 'feed/%d' % self.feed['id'],
{'error_count': 0, 'last_error': ''})
- future.add_done_callback(self.get_counter_callback())
- @AbstractCrawler.count_on_me
def callback(self, response):
"""will fetch the feed and interprete results (304, etag) or will
challenge pyagg to compare gotten entries with existing ones"""
- AbstractCrawler.__counter__ -= 1
try:
response = response.result()
response.raise_for_status()
@@ -191,8 +167,8 @@ class FeedCrawler(AbstractCrawler):
self.feed['title'], error_count)
future = self.query_pyagg('put', 'feed/%d' % self.feed['id'],
{'error_count': error_count,
- 'last_error': str(error)})
- future.add_done_callback(self.get_counter_callback())
+ 'last_error': str(error),
+ 'user_id': self.feed['user_id']})
return
if response.status_code == 304:
@@ -244,7 +220,6 @@ class CrawlerScheduler(AbstractCrawler):
def __init__(self, username, password):
self.auth = (username, password)
super(CrawlerScheduler, self).__init__(self.auth)
- AbstractCrawler.__counter__ = 0
def prepare_headers(self, feed):
"""For a known feed, will construct some header dictionnary"""
@@ -257,12 +232,13 @@ class CrawlerScheduler(AbstractCrawler):
feed['id'], feed['title'], headers)
return headers
- @AbstractCrawler.count_on_me
def callback(self, response):
"""processes feeds that need to be fetched"""
- AbstractCrawler.__counter__ -= 1
response = response.result()
response.raise_for_status()
+ if response.status_code == 204:
+ logger.debug("No feed to fetch")
+ return
feeds = response.json()
logger.debug('%d to fetch %r', len(feeds), feeds)
for feed in feeds:
@@ -272,11 +248,9 @@ class CrawlerScheduler(AbstractCrawler):
headers=self.prepare_headers(feed))
future.add_done_callback(FeedCrawler(feed, self.auth).callback)
- @AbstractCrawler.count_on_me
def run(self, **kwargs):
"""entry point, will retreive feeds to be fetch
and launch the whole thing"""
logger.debug('retreving fetchable feed')
future = self.query_pyagg('get', 'feeds/fetchable', kwargs)
- AbstractCrawler.__counter__ += 1
future.add_done_callback(self.callback)
diff --git a/pyaggr3g470r/templates/admin/user.html b/pyaggr3g470r/templates/admin/user.html
index 21bcd6b6..d1e08c0d 100644
--- a/pyaggr3g470r/templates/admin/user.html
+++ b/pyaggr3g470r/templates/admin/user.html
@@ -14,39 +14,6 @@
</div>
</div>
</div>
- <div class="well">
- {% if user.feeds.all()|count == 0 %}
- <h1>{{ _('This user is not subscribed to any feed.') }}</h1>
- {% else %}
- <h1>{{ _('Feeds') }}</h1>
- <table class="table table-striped">
- <thead>
- <tr>
- <th>#</th>
- <th>{{ _('Title') }}</th>
- <th>{{ _('Feed link') }}</th>
- <th>{{ _('Site link') }}</th>
- <th>{{ _('(unread) articles') }}</th>
- <th>{{ _('Actions') }}</th>
- </tr>
- </thead>
- <tbody>
- {% for feed in user.feeds|sort(attribute="title") %}
- <tr>
- <td>{{ loop.index }}</td>
- <td><a href="{{ url_for("feed.feed", feed_id=feed.id) }}">{%if feed.icon%}<img src="{{ url_for('feed.icon', feed_id=feed.id) }}" width="16px" />{%endif%}{{ feed.title }}</a></td>
- <td>{{ feed.link }}</td>
- <td>{{ feed.site_link }}</td>
- <td>( {{ unread_article_count.get(feed.id, 0) }} ) {{ article_count.get(feed.id, 0) }}</td>
- <td>
- <a href="{{ url_for("feed.feed", feed_id=feed.id) }}"><i class="glyphicon glyphicon-th-list" title="{{ _('Feed') }}"></i></a>
- <a href="{{ url_for("feed.form", feed_id=feed.id) }}"><i class="glyphicon glyphicon-edit" title="{{ _('Edit this feed') }}"></i></a>
- <a href="{{ url_for("feed.delete", feed_id=feed.id) }}"><i class="glyphicon glyphicon-remove" title="{{ _('Delete this feed') }}" onclick="return confirm('{{ _('You are going to delete this feed.') }}');"></i></a>
- </td>
- {% endfor %}
- </tbody>
- </table>
- {% endif %}
- </div>
+ {% include "feed_list.html" %}
</div>
{% endblock %}
diff --git a/pyaggr3g470r/templates/feed_list.html b/pyaggr3g470r/templates/feed_list.html
new file mode 100644
index 00000000..c5cadab0
--- /dev/null
+++ b/pyaggr3g470r/templates/feed_list.html
@@ -0,0 +1,47 @@
+{% if feeds.all()| count == 0 %}
+ <h1>{{_("No feed")}}</h1>
+{% else %}
+<div class="table-responsive">
+ <table class="table table-striped">
+ <thead>
+ <tr>
+ <th>#</th>
+ <th>{{ _('Status') }}</th>
+ <th>{{ _('Title') }}</th>
+ <th>{{ _('Site') }}</th>
+ <th>{{ _('Articles') }}</th>
+ <th>{{ _('Actions') }}</th>
+ </tr>
+ </thead>
+ <tbody>
+ {% for feed in feeds|sort(attribute="title") %}
+ <tr {% if not feed.enabled %}class="warning"{% endif %}>
+ <td>{{ loop.index }}</td>
+ <td>
+ {% if feed.enabled %}
+ <i class="glyphicon glyphicon-eye-open" title="{{ _('Feed enabled') }}"></i>
+ {% else %}
+ <i class="glyphicon glyphicon-eye-close" title="{{ _('Feed disabled') }}"></i>
+ {% endif %}
+ {% if feed.error_count >= conf.DEFAULT_MAX_ERROR %}
+ <i class="glyphicon glyphicon-exclamation-sign" title="{{ _('Feed encountered too much errors.') }}"></i>
+ {% endif %}
+ </td>
+ <td><a href="{{ url_for("feed.feed", feed_id=feed.id) }}" {% if feed.description %}title="{{ feed.description }}"{% endif %}>
+ {% if feed.icon_url %}<img src="{{ url_for('icon.icon', url=feed.icon_url) }}" width="16px" />{% endif %}
+ {{ feed.title }}
+ </a></td>
+ <td><a href="{{ feed.site_link }}">{{ feed.site_link }}</a></td>
+ <td>( {{ unread_article_count.get(feed.id, 0) }} ) {{ article_count.get(feed.id, 0) }}</td>
+ <td>
+ <a href="{{ url_for("home", feed_id=feed.id, filter_="all") }}"><i class="glyphicon glyphicon-th-list" title="{{ _('Articles') }}"></i></a>
+ <a href="{{ url_for("feed.form", feed_id=feed.id) }}"><i class="glyphicon glyphicon-edit" title="{{ _('Edit this feed') }}"></i></a>
+ <a href="/duplicates/{{ feed.id }}"><i class="glyphicon glyphicon-book" title="{{ _('Duplicate articles') }}"></i></a>
+ <a href="{{ url_for("feed.delete", feed_id=feed.id) }}"><i class="glyphicon glyphicon-remove" title="{{ _('Delete this feed') }}" onclick="return confirm('{{ _('You are going to delete this feed.') }}');"></i></a>
+ </td>
+ </tr>
+ {% endfor %}
+ </tbody>
+ </table>
+</div>
+{% endif %}
diff --git a/pyaggr3g470r/templates/feeds.html b/pyaggr3g470r/templates/feeds.html
index 82af2411..9ba16359 100644
--- a/pyaggr3g470r/templates/feeds.html
+++ b/pyaggr3g470r/templates/feeds.html
@@ -2,48 +2,6 @@
{% block content %}
<div class="container">
<h1>{{ _('You are subscribed to') }} {{ feeds.count() }} {{ _('feeds') }} &middot; {{ _('Add a') }} <a href="{{ url_for("feed.form") }}">{{ _('feed') }}</a></h1>
- <div class="table-responsive">
- <table class="table table-striped">
- <thead>
- <tr>
- <th>#</th>
- <th>{{ _('Status') }}</th>
- <th>{{ _('Title') }}</th>
- <th>{{ _('Site') }}</th>
- <th>{{ _('Articles') }}</th>
- <th>{{ _('Actions') }}</th>
- </tr>
- </thead>
- <tbody>
- {% for feed in feeds|sort(attribute="title") %}
- <tr {% if not feed.enabled %}class="warning"{% endif %}>
- <td>{{ loop.index }}</td>
- <td>
- {% if feed.enabled %}
- <i class="glyphicon glyphicon-eye-open" title="{{ _('Feed enabled') }}"></i>
- {% else %}
- <i class="glyphicon glyphicon-eye-close" title="{{ _('Feed disabled') }}"></i>
- {% endif %}
- {% if feed.error_count > conf.DEFAULT_MAX_ERROR %}
- <i class="glyphicon glyphicon-exclamation-sign" title="{{ _('Feed encountered too much errors.') }}"></i>
- {% endif %}
- </td>
- <td><a href="{{ url_for("feed.feed", feed_id=feed.id) }}" {% if feed.description %}title="{{ feed.description }}"{% endif %}>
- {% if feed.icon_url %}<img src="{{ url_for('icon.icon', url=feed.icon_url) }}" width="16px" />{% endif %}
- {{ feed.title }}
- </a></td>
- <td><a href="{{ feed.site_link }}">{{ feed.site_link }}</a></td>
- <td>( {{ unread_article_count.get(feed.id, 0) }} ) {{ article_count.get(feed.id, 0) }}</td>
- <td>
- <a href="{{ url_for("home", feed_id=feed.id, filter_="all") }}"><i class="glyphicon glyphicon-th-list" title="{{ _('Articles') }}"></i></a>
- <a href="{{ url_for("feed.form", feed_id=feed.id) }}"><i class="glyphicon glyphicon-edit" title="{{ _('Edit this feed') }}"></i></a>
- <a href="/duplicates/{{ feed.id }}"><i class="glyphicon glyphicon-book" title="{{ _('Duplicate articles') }}"></i></a>
- <a href="{{ url_for("feed.delete", feed_id=feed.id) }}"><i class="glyphicon glyphicon-remove" title="{{ _('Delete this feed') }}" onclick="return confirm('{{ _('You are going to delete this feed.') }}');"></i></a>
- </td>
- </tr>
- {% endfor %}
- </tbody>
- </table>
- </div>
+ {% include "feed_list.html" %}
</div><!-- /.container -->
{% endblock %}
diff --git a/pyaggr3g470r/views/api/article.py b/pyaggr3g470r/views/api/article.py
index 03ecdb18..d2969cb0 100644
--- a/pyaggr3g470r/views/api/article.py
+++ b/pyaggr3g470r/views/api/article.py
@@ -51,7 +51,8 @@ class ArticlesChallenge(PyAggAbstractResource):
if key in id_dict:
id_dict[key] = dateutil.parser.parse(id_dict[key])
- return self.wider_controller.challenge(parsed_args['ids'])
+ result = list(self.wider_controller.challenge(parsed_args['ids']))
+ return result or None, 200 if result else 204
g.api.add_resource(ArticleNewAPI, '/article', endpoint='article_new.json')
diff --git a/pyaggr3g470r/views/api/common.py b/pyaggr3g470r/views/api/common.py
index a7068807..acb5dd68 100644
--- a/pyaggr3g470r/views/api/common.py
+++ b/pyaggr3g470r/views/api/common.py
@@ -161,17 +161,17 @@ class PyAggResourceMulti(PyAggAbstractResource):
"""retrieve several objects. filters can be set in the payload on the
different fields of the object, and a limit can be set in there as well
"""
- if 'application/json' not in request.headers.get('Content-Type'):
- raise BadRequest("Content-Type must be application/json")
- limit = 10
try:
limit = request.json.pop('limit', 10)
+ order_by = request.json.pop('order_by', None)
+ query = self.controller.read(**request.json)
except:
- return [res for res in self.controller.read().limit(limit)]
- if not limit:
- return [res for res in self.controller.read(**request.json).all()]
- return [res
- for res in self.controller.read(**request.json).limit(limit)]
+ limit, order_by, query = 10, None, self.controller.read()
+ if order_by:
+ query = query.order_by(order_by)
+ if limit:
+ query = query.limit(limit)
+ return [res for res in query]
def post(self):
"""creating several objects. payload should be a list of dict.
diff --git a/pyaggr3g470r/views/api/feed.py b/pyaggr3g470r/views/api/feed.py
index 7d8cdf38..c80e9a9b 100644
--- a/pyaggr3g470r/views/api/feed.py
+++ b/pyaggr3g470r/views/api/feed.py
@@ -58,7 +58,8 @@ class FetchableFeedAPI(PyAggAbstractResource):
contr = self.wider_controller
else:
contr = self.controller
- return [feed for feed in contr.list_fetchable(**args)]
+ result = [feed for feed in contr.list_fetchable(**args)]
+ return result or None, 200 if result else 204
g.api.add_resource(FeedNewAPI, '/feed', endpoint='feed_new.json')
diff --git a/pyaggr3g470r/views/views.py b/pyaggr3g470r/views/views.py
index 69c2b50b..560bd382 100644
--- a/pyaggr3g470r/views/views.py
+++ b/pyaggr3g470r/views/views.py
@@ -718,8 +718,10 @@ def create_user(user_id=None):
user.activation_key = ""
db.session.add(user)
db.session.commit()
- flash(gettext('User') + ' ' + user.nickname + ' ' + gettext('successfully created.'), 'success')
- return redirect("/admin/edit_user/"+str(user.id))
+ flash("%s %s %s" % (gettext('User'), user.nickname,
+ gettext('successfully created.')),
+ 'success')
+ return redirect(url_for('create_user', user_id=user.id))
else:
return redirect(url_for('create_user'))
@@ -727,11 +729,13 @@ def create_user(user_id=None):
if user_id is not None:
user = User.query.filter(User.id == user_id).first()
form = UserForm(obj=user)
- message = gettext('Edit the user') + ' <i>' + user.nickname + '</i>'
+ message = "%s <i>%s</i>" % (gettext('Edit the user'),
+ user.nickname)
else:
form = UserForm()
message = gettext('Add a new user')
- return render_template('/admin/create_user.html', form=form, message=message)
+ return render_template('/admin/create_user.html',
+ form=form, message=message)
@app.route('/admin/user/<int:user_id>', methods=['GET'])
@login_required
@@ -743,10 +747,9 @@ def user(user_id=None):
user = UserController().get(id=user_id)
if user is not None:
article_contr = ArticleController(user_id)
- return render_template('/admin/user.html', user=user,
+ return render_template('/admin/user.html', user=user, feeds=user.feeds,
article_count=article_contr.count_by_feed(),
- unread_article_count=article_contr.count_by_feed(readed=False),
- )
+ unread_article_count=article_contr.count_by_feed(readed=False))
else:
flash(gettext('This user does not exist.'), 'danger')
bgstack15