From 8c0c605ea0d59355ece2e4f2755acb535ab7b90d Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Thu, 5 Mar 2015 23:36:52 +0100 Subject: correcting wait counter and reactivating last_retrieved --- pyaggr3g470r/controllers/feed.py | 13 ++++++++----- pyaggr3g470r/lib/crawler.py | 41 +++++++++++++++++++++++++++++----------- pyaggr3g470r/models/article.py | 4 +--- pyaggr3g470r/models/feed.py | 4 +--- pyaggr3g470r/models/role.py | 4 +--- pyaggr3g470r/models/user.py | 3 +-- 6 files changed, 42 insertions(+), 27 deletions(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/controllers/feed.py b/pyaggr3g470r/controllers/feed.py index a3cdcddd..b8e28ee6 100644 --- a/pyaggr3g470r/controllers/feed.py +++ b/pyaggr3g470r/controllers/feed.py @@ -2,6 +2,9 @@ from datetime import datetime, timedelta from .abstract import AbstractController from pyaggr3g470r.models import Feed +import logging +logger = logging.getLogger(__name__) + DEFAULT_MAX_ERROR = 3 DEFAULT_LIMIT = 5 @@ -13,11 +16,11 @@ class FeedController(AbstractController): from pyaggr3g470r.controllers import UserController now = datetime.now() user = UserController(self.user_id).get(id=self.user_id) - #max_last = now - timedelta(minutes=user.refresh_rate or 60) + max_last = now - timedelta(minutes=user.refresh_rate or 60) feeds = [feed for feed in self.read(user_id=self.user_id, - error_count__lt=max_error, enabled=True).limit(limit)] - #last_retrieved__lt=max_last).limit(limit)] - """if feeds: + error_count__lt=max_error, enabled=True, + last_retrieved__lt=max_last).limit(limit)] + if feeds: self.update({'id__in': [feed.id for feed in feeds]}, - {'last_retrieved': now})""" + {'last_retrieved': now}) return feeds diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index 64ef8b6d..97f14363 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -58,6 +58,7 @@ class AbstractCrawler: __counter__ = 0 def __init__(self, auth): + AbstractCrawler.__counter__ += 1 self.auth = auth self.session = self.get_session() self.url = conf.PLATFORM_URL @@ -84,6 +85,13 @@ class AbstractCrawler: return result return wrapper + @classmethod + def get_counter_callback(cls): + cls.__counter__ += 1 + def debump(*args, **kwargs): + cls.__counter__ -= 1 + return debump + def query_pyagg(self, method, urn, data=None): """A wrapper for internal call, method should be ones you can find on requests (header, post, get, options, ...), urn the distant @@ -95,13 +103,15 @@ class AbstractCrawler: return method("%s%s%s" % (self.url, API_ROOT, urn), auth=self.auth, data=json.dumps(data, default=default_handler), - headers={'Content-Type': 'application/json'}) + headers={'Content-Type': 'application/json', + 'User-Agent': 'pyaggr3g470r'}) @classmethod def wait(cls): "See count_on_me, that method will just wait for the counter to be 0" time.sleep(1) while cls.__counter__: + print('running %d' % cls.__counter__) time.sleep(1) @@ -144,6 +154,7 @@ class PyAggUpdater(AbstractCrawler): def callback(self, response): """Will process the result from the challenge, creating missing article and updating the feed""" + AbstractCrawler.__counter__ -= 1 results = response.result().json() logger.debug('%r %r - %d entries were not matched and will be created', self.feed['id'], self.feed['title'], len(results)) @@ -158,10 +169,12 @@ class PyAggUpdater(AbstractCrawler): self.feed['id'], self.feed['title'], self.headers.get('etag'), now) - self.query_pyagg('put', 'feed/%d' % self.feed['id'], {'error_count': 0, - 'etag': self.headers.get('etag', ''), - 'last_error': '', - 'last_modified': self.headers.get('last-modified', '')}) + dico = {'error_count': 0, 'last_error': '', + 'etag': self.headers.get('etag', ''), + 'last_modified': self.headers.get('last-modified', '')} + if any([dico[key] == self.feed.get(key) for key in dico]): + future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], dico) + future.add_done_callback(self.get_counter_callback()) class FeedCrawler(AbstractCrawler): @@ -173,13 +186,15 @@ class FeedCrawler(AbstractCrawler): def clean_feed(self): """Will reset the errors counters on a feed that have known errors""" if self.feed.get('error_count') or self.feed.get('last_error'): - self.query_pyagg('put', 'feed/%d' % self.feed['id'], - {'error_count': 0, 'last_error': ''}) + future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], + {'error_count': 0, 'last_error': ''}) + future.add_done_callback(self.get_counter_callback()) @AbstractCrawler.count_on_me def callback(self, response): """will fetch the feed and interprete results (304, etag) or will challenge pyagg to compare gotten entries with existing ones""" + AbstractCrawler.__counter__ -= 1 try: response = response.result() response.raise_for_status() @@ -188,9 +203,10 @@ class FeedCrawler(AbstractCrawler): logger.warn('%r %r - an error occured while fetching feed; bumping' ' error count to %r', self.feed['id'], self.feed['title'], error_count) - self.query_pyagg('put', 'feed/%d' % self.feed['id'], - {'error_count': error_count, - 'last_error': str(error)}) + future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], + {'error_count': error_count, + 'last_error': str(error)}) + future.add_done_callback(self.get_counter_callback()) return if response.status_code == 304: @@ -222,10 +238,11 @@ class CrawlerScheduler(AbstractCrawler): def __init__(self, username, password): self.auth = (username, password) super(CrawlerScheduler, self).__init__(self.auth) + AbstractCrawler.__counter__ = 0 def prepare_headers(self, feed): """For a known feed, will construct some header dictionnary""" - headers = {} + headers = {'User-Agent': 'pyaggr3g470r/crawler'} if feed.get('etag', None): headers['If-None-Match'] = feed['etag'] if feed.get('last_modified'): @@ -237,6 +254,7 @@ class CrawlerScheduler(AbstractCrawler): @AbstractCrawler.count_on_me def callback(self, response): """processes feeds that need to be fetched""" + AbstractCrawler.__counter__ -= 1 response = response.result() response.raise_for_status() feeds = response.json() @@ -254,4 +272,5 @@ class CrawlerScheduler(AbstractCrawler): and launch the whole thing""" logger.debug('retreving fetchable feed') future = self.query_pyagg('get', 'feeds/fetchable', kwargs) + AbstractCrawler.__counter__ += 1 future.add_done_callback(self.callback) diff --git a/pyaggr3g470r/models/article.py b/pyaggr3g470r/models/article.py index 0466bc35..f8f9d2d8 100644 --- a/pyaggr3g470r/models/article.py +++ b/pyaggr3g470r/models/article.py @@ -27,12 +27,10 @@ __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "GPLv3" import json +from bootstrap import db from datetime import datetime -from flask import g from sqlalchemy import asc, desc -db = g.db - class Article(db.Model): """ diff --git a/pyaggr3g470r/models/feed.py b/pyaggr3g470r/models/feed.py index 24542c28..a37744d6 100644 --- a/pyaggr3g470r/models/feed.py +++ b/pyaggr3g470r/models/feed.py @@ -26,12 +26,10 @@ __revision__ = "$Date: 2014/04/12 $" __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "GPLv3" +from bootstrap import db from datetime import datetime -from flask import g from sqlalchemy import desc -db = g.db - class Feed(db.Model): """ diff --git a/pyaggr3g470r/models/role.py b/pyaggr3g470r/models/role.py index 71497caf..f5a18fdc 100644 --- a/pyaggr3g470r/models/role.py +++ b/pyaggr3g470r/models/role.py @@ -26,9 +26,7 @@ __revision__ = "$Date: 2014/04/12 $" __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "GPLv3" -from flask import g - -db = g.db +from bootstrap import db class Role(db.Model): diff --git a/pyaggr3g470r/models/user.py b/pyaggr3g470r/models/user.py index f2a268db..0bf9fe04 100644 --- a/pyaggr3g470r/models/user.py +++ b/pyaggr3g470r/models/user.py @@ -30,11 +30,10 @@ import re import random import hashlib from datetime import datetime -from flask import g from werkzeug import generate_password_hash, check_password_hash from flask.ext.login import UserMixin -db = g.db +from bootstrap import db class User(db.Model, UserMixin): -- cgit From 822e59f043ba7b12962c5e65f59f2fd33a339f54 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Fri, 6 Mar 2015 11:07:43 +0100 Subject: better crawling crontrol --- pyaggr3g470r/controllers/feed.py | 15 ++++++++++----- pyaggr3g470r/lib/crawler.py | 12 ++++++++---- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/controllers/feed.py b/pyaggr3g470r/controllers/feed.py index b8e28ee6..ff496efc 100644 --- a/pyaggr3g470r/controllers/feed.py +++ b/pyaggr3g470r/controllers/feed.py @@ -1,10 +1,10 @@ +import logging from datetime import datetime, timedelta + from .abstract import AbstractController from pyaggr3g470r.models import Feed -import logging logger = logging.getLogger(__name__) - DEFAULT_MAX_ERROR = 3 DEFAULT_LIMIT = 5 @@ -12,14 +12,19 @@ DEFAULT_LIMIT = 5 class FeedController(AbstractController): _db_cls = Feed + def list_late(self, max_last, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT): + return [feed for feed in self.read( + error_count__lt=max_error, enabled=True, + last_retrieved__lt=max_last) + .order_by('Feed.last_retrieved') + .limit(limit)] + def list_fetchable(self, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT): from pyaggr3g470r.controllers import UserController now = datetime.now() user = UserController(self.user_id).get(id=self.user_id) max_last = now - timedelta(minutes=user.refresh_rate or 60) - feeds = [feed for feed in self.read(user_id=self.user_id, - error_count__lt=max_error, enabled=True, - last_retrieved__lt=max_last).limit(limit)] + feeds = self.list_late(max_last, max_error, limit) if feeds: self.update({'id__in': [feed.id for feed in feeds]}, {'last_retrieved': now}) diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index 97f14363..8e61b7cf 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -111,16 +111,16 @@ class AbstractCrawler: "See count_on_me, that method will just wait for the counter to be 0" time.sleep(1) while cls.__counter__: - print('running %d' % cls.__counter__) time.sleep(1) class PyAggUpdater(AbstractCrawler): - def __init__(self, feed, entries, headers, auth): + def __init__(self, feed, entries, headers, parsed_feed, auth): self.feed = feed self.entries = entries self.headers = headers + self.parsed_feed = parsed_feed.get('feed', {}) super(PyAggUpdater, self).__init__(auth) def to_article(self, entry): @@ -171,7 +171,10 @@ class PyAggUpdater(AbstractCrawler): dico = {'error_count': 0, 'last_error': '', 'etag': self.headers.get('etag', ''), - 'last_modified': self.headers.get('last-modified', '')} + 'last_modified': self.headers.get('last-modified', ''), + 'site_link': self.parsed_feed.get('link')} + if not self.feed.get('title'): + dico['title'] = self.parsed_feed.get('title', '') if any([dico[key] == self.feed.get(key) for key in dico]): future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], dico) future.add_done_callback(self.get_counter_callback()) @@ -229,7 +232,8 @@ class FeedCrawler(AbstractCrawler): logger.debug('%r %r - found %d entries %r', self.feed['id'], self.feed['title'], len(ids), ids) future = self.query_pyagg('get', 'articles/challenge', {'ids': ids}) - updater = PyAggUpdater(self.feed, entries, response.headers, self.auth) + updater = PyAggUpdater(self.feed, entries, response.headers, + parsed_response, self.auth) future.add_done_callback(updater.callback) -- cgit From 0cc0e87d3f3bafba6a22c883cdf24e9962fafe37 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Tue, 10 Mar 2015 09:25:10 +0100 Subject: failover for bad counter manipulation --- pyaggr3g470r/lib/crawler.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index 8e61b7cf..9df37993 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -107,11 +107,17 @@ class AbstractCrawler: 'User-Agent': 'pyaggr3g470r'}) @classmethod - def wait(cls): + def wait(cls, max_wait=600): "See count_on_me, that method will just wait for the counter to be 0" time.sleep(1) + second_waited = 1 while cls.__counter__: + if second_waited > max_wait: + logger.warn('Exiting after %d seconds, counter at %d', + max_wait, cls.__counter__) + break time.sleep(1) + second_waited += 1 class PyAggUpdater(AbstractCrawler): -- cgit From 4142e50e512bf05bba7e47be232c3cc3ae4e32f5 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Tue, 10 Mar 2015 14:57:14 +0100 Subject: implementing cache construction on crawler side (limiting useless pushes) --- pyaggr3g470r/lib/crawler.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index 9df37993..99967671 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -19,6 +19,7 @@ import logging import requests import feedparser import dateutil.parser +from hashlib import md5 from functools import wraps from datetime import datetime from concurrent.futures import ThreadPoolExecutor @@ -29,6 +30,10 @@ logger = logging.getLogger(__name__) API_ROOT = "api/v2.0/" +def to_hash(text): + return md5(text.encode('utf8')).hexdigest() + + def extract_id(entry, keys=[('link', 'link'), ('published', 'retrieved_date'), ('updated', 'retrieved_date')], force_id=False): @@ -40,8 +45,8 @@ def extract_id(entry, keys=[('link', 'link'), if entry_id: return {'entry_id': entry_id} if not entry_id and force_id: - entry_id = hash("".join(entry[entry_key] for _, entry_key in keys - if entry_key in entry)) + entry_id = to_hash("".join(entry[entry_key] for _, entry_key in keys + if entry_key in entry).encode('utf8')) else: ids = {} for entry_key, pyagg_key in keys: @@ -218,18 +223,32 @@ class FeedCrawler(AbstractCrawler): future.add_done_callback(self.get_counter_callback()) return + etag_generated = False if response.status_code == 304: logger.info("%r %r - feed responded with 304", - self.feed['id'], self.feed['title']) + self.feed['id'], self.feed['title']) self.clean_feed() return - if self.feed['etag'] and response.headers.get('etag') \ - and response.headers.get('etag') == self.feed['etag']: - logger.info("%r %r - feed responded with same etag (%d)", - self.feed['id'], self.feed['title'], - response.status_code) + if not response.headers.get('etag'): + etag_generated = True + logger.debug('%r %r - manually generating etag', + self.feed['id'], self.feed['title']) + response.headers['etag'] = 'pyagg/"%s"' % to_hash(response.text) + if self.feed['etag'] and response.headers['etag'] == self.feed['etag']: + if etag_generated: + logger.info("%r %r - calculated hash matches (%d)", + self.feed['id'], self.feed['title'], + response.status_code) + else: + logger.info("%r %r - feed responded with same etag (%d)", + self.feed['id'], self.feed['title'], + response.status_code) self.clean_feed() return + else: + logger.info('%r %r - etag mismatch %r != %r', + self.feed['id'], self.feed['title'], + response.headers['etag'], self.feed['etag']) ids, entries = [], {} parsed_response = feedparser.parse(response.text) for entry in parsed_response['entries']: @@ -253,7 +272,7 @@ class CrawlerScheduler(AbstractCrawler): def prepare_headers(self, feed): """For a known feed, will construct some header dictionnary""" headers = {'User-Agent': 'pyaggr3g470r/crawler'} - if feed.get('etag', None): + if feed.get('etag') and 'pyagg' not in feed.get('etag', ''): headers['If-None-Match'] = feed['etag'] if feed.get('last_modified'): headers['If-Modified-Since'] = feed['last_modified'] -- cgit From f2463bc333cc207ffa9ab935b7edf59a9894720d Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Mon, 6 Apr 2015 10:19:58 +0200 Subject: misc update updating the way we maintain feed up to date in the database fixing the counter bumping the minimum error count --- pyaggr3g470r/controllers/feed.py | 5 ++-- pyaggr3g470r/lib/crawler.py | 49 ++++++++++++++++++++++++---------------- pyaggr3g470r/templates/home.html | 4 ++-- pyaggr3g470r/views/views.py | 2 +- 4 files changed, 36 insertions(+), 24 deletions(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/controllers/feed.py b/pyaggr3g470r/controllers/feed.py index ff496efc..b99a3a7f 100644 --- a/pyaggr3g470r/controllers/feed.py +++ b/pyaggr3g470r/controllers/feed.py @@ -5,14 +5,15 @@ from .abstract import AbstractController from pyaggr3g470r.models import Feed logger = logging.getLogger(__name__) -DEFAULT_MAX_ERROR = 3 +DEFAULT_MAX_ERROR = 6 DEFAULT_LIMIT = 5 class FeedController(AbstractController): _db_cls = Feed - def list_late(self, max_last, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT): + def list_late(self, max_last, max_error=DEFAULT_MAX_ERROR, + limit=DEFAULT_LIMIT): return [feed for feed in self.read( error_count__lt=max_error, enabled=True, last_retrieved__lt=max_last) diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index 99967671..1ac6029a 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -85,9 +85,12 @@ class AbstractCrawler: @wraps(func) def wrapper(*args, **kwargs): cls.__counter__ += 1 - result = func(*args, **kwargs) - cls.__counter__ -= 1 - return result + try: + return func(*args, **kwargs) + except: + logger.exception('an error occured while %r', func) + finally: + cls.__counter__ -= 1 return wrapper @classmethod @@ -172,21 +175,27 @@ class PyAggUpdater(AbstractCrawler): for id_to_create in results: entry = self.to_article( self.entries[tuple(sorted(id_to_create.items()))]) - logger.info('creating %r - %r', entry['title'], id_to_create) + logger.warn('%r %r - creating %r - %r', self.feed['id'], + self.feed['title'], entry['title'], id_to_create) self.query_pyagg('post', 'article', entry) now = datetime.now() logger.debug('%r %r - updating feed etag %r last_mod %r', self.feed['id'], self.feed['title'], - self.headers.get('etag'), now) + self.headers.get('etag', ''), + self.headers.get('last-modified', '')) - dico = {'error_count': 0, 'last_error': '', + dico = {'error_count': 0, 'last_error': None, 'etag': self.headers.get('etag', ''), 'last_modified': self.headers.get('last-modified', ''), 'site_link': self.parsed_feed.get('link')} if not self.feed.get('title'): dico['title'] = self.parsed_feed.get('title', '') - if any([dico[key] == self.feed.get(key) for key in dico]): + logger.info('%r %r - pushing feed attrs %r', + self.feed['id'], self.feed['title'], + {key: "%s -> %s" % (dico[key], self.feed.get(key)) + for key in dico if dico[key] != self.feed.get(key)}) + if any([dico[key] != self.feed.get(key) for key in dico]): future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], dico) future.add_done_callback(self.get_counter_callback()) @@ -223,19 +232,18 @@ class FeedCrawler(AbstractCrawler): future.add_done_callback(self.get_counter_callback()) return - etag_generated = False if response.status_code == 304: logger.info("%r %r - feed responded with 304", self.feed['id'], self.feed['title']) self.clean_feed() return - if not response.headers.get('etag'): - etag_generated = True + if 'etag' not in response.headers: logger.debug('%r %r - manually generating etag', self.feed['id'], self.feed['title']) response.headers['etag'] = 'pyagg/"%s"' % to_hash(response.text) - if self.feed['etag'] and response.headers['etag'] == self.feed['etag']: - if etag_generated: + if response.headers['etag'] and self.feed['etag'] \ + and response.headers['etag'] == self.feed['etag']: + if 'pyagg' in self.feed['etag']: logger.info("%r %r - calculated hash matches (%d)", self.feed['id'], self.feed['title'], response.status_code) @@ -246,9 +254,12 @@ class FeedCrawler(AbstractCrawler): self.clean_feed() return else: - logger.info('%r %r - etag mismatch %r != %r', - self.feed['id'], self.feed['title'], - response.headers['etag'], self.feed['etag']) + logger.debug('%r %r - etag mismatch %r != %r', + self.feed['id'], self.feed['title'], + response.headers['etag'], self.feed['etag']) + logger.info('%r %r - cache validation failed, challenging entries', + self.feed['id'], self.feed['title']) + ids, entries = [], {} parsed_response = feedparser.parse(response.text) for entry in parsed_response['entries']: @@ -272,10 +283,10 @@ class CrawlerScheduler(AbstractCrawler): def prepare_headers(self, feed): """For a known feed, will construct some header dictionnary""" headers = {'User-Agent': 'pyaggr3g470r/crawler'} - if feed.get('etag') and 'pyagg' not in feed.get('etag', ''): - headers['If-None-Match'] = feed['etag'] if feed.get('last_modified'): headers['If-Modified-Since'] = feed['last_modified'] + if feed.get('etag') and 'pyagg' not in feed['etag']: + headers['If-None-Match'] = feed['etag'] logger.debug('%r %r - calculated headers %r', feed['id'], feed['title'], headers) return headers @@ -289,8 +300,8 @@ class CrawlerScheduler(AbstractCrawler): feeds = response.json() logger.debug('%d to fetch %r', len(feeds), feeds) for feed in feeds: - logger.info('%r %r - fetching resources', - feed['id'], feed['title']) + logger.debug('%r %r - fetching resources', + feed['id'], feed['title']) future = self.session.get(feed['link'], headers=self.prepare_headers(feed)) future.add_done_callback(FeedCrawler(feed, self.auth).callback) diff --git a/pyaggr3g470r/templates/home.html b/pyaggr3g470r/templates/home.html index 3a9608d5..d2a961ab 100644 --- a/pyaggr3g470r/templates/home.html +++ b/pyaggr3g470r/templates/home.html @@ -22,7 +22,7 @@
  • {% if feed_id == fid %}{% endif %} {% if in_error.get(fid, 0) > 0 %} - 2 else "orange" }} ;" class="badge pull-right" title="Some errors occured while trying to retrieve that feed.">{{ in_error[fid] }} + 5 else "orange" }} ;" class="badge pull-right" title="Some errors occured while trying to retrieve that feed.">{{ in_error[fid] }} {% endif %} {{ nbunread }} {{ feeds[fid]|safe }} @@ -40,7 +40,7 @@ {% for fid, ftitle in feeds|dictsort(case_sensitive=False, by='value') if not fid in unread %}
  • {% if in_error.get(fid, 0) > 0 %} - 2 else "orange" }} ;" class="badge pull-right" title="Some errors occured while trying to retrieve that feed.">{{ in_error[fid] }} + 5 else "orange" }} ;" class="badge pull-right" title="Some errors occured while trying to retrieve that feed.">{{ in_error[fid] }} {% endif %} {% if feed_id == fid %}{% endif %} {{ ftitle|safe }} diff --git a/pyaggr3g470r/views/views.py b/pyaggr3g470r/views/views.py index 0f1f8765..7934eef8 100644 --- a/pyaggr3g470r/views/views.py +++ b/pyaggr3g470r/views/views.py @@ -248,7 +248,7 @@ def home(): .filter(Article.readed == False, Article.user_id == g.user.id)\ .group_by(Article.feed_id).all() in_error = {feed.id: feed.error_count for feed in - FeedController(g.user.id).read(error_count__gt=0).all()} + FeedController(g.user.id).read(error_count__gt=2).all()} def gen_url(filter_=filter_, limit=limit, feed=feed_id): return '?filter_=%s&limit=%s&feed=%d' % (filter_, limit, feed) return render_template('home.html', gen_url=gen_url, feed_id=feed_id, -- cgit From e41348abe7bb3d336cc474ea1d246dc25390a104 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Mon, 6 Apr 2015 12:05:59 +0200 Subject: correcting the way we use the controllers and adding documentation --- pyaggr3g470r/controllers/abstract.py | 26 ++++++++++++++++++++++---- pyaggr3g470r/controllers/user.py | 2 +- pyaggr3g470r/views/views.py | 20 +++++++++++--------- 3 files changed, 34 insertions(+), 14 deletions(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/controllers/abstract.py b/pyaggr3g470r/controllers/abstract.py index c084deb9..f1173817 100644 --- a/pyaggr3g470r/controllers/abstract.py +++ b/pyaggr3g470r/controllers/abstract.py @@ -9,11 +9,25 @@ class AbstractController(object): _db_cls = None # reference to the database class _user_id_key = 'user_id' - def __init__(self, user_id): + def __init__(self, user_id=None): + """User id is a right management mechanism that should be used to + filter objects in database on their denormalized "user_id" field + (or "id" field for users). + Should no user_id be provided, the Controller won't apply any filter + allowing for a kind of "super user" mode. + """ self.user_id = user_id def _to_filters(self, **filters): - if self.user_id: + """ + Will translate filters to sqlalchemy filter. + This method will also apply user_id restriction if available. + + each parameters of the function is treated as an equality unless the + name of the parameter ends with either "__gt", "__lt", "__ge", "__le", + "__ne" or "__in". + """ + if self.user_id is not None: filters[self._user_id_key] = self.user_id db_filters = set() for key, value in filters.items(): @@ -37,17 +51,21 @@ class AbstractController(object): return self._db_cls.query.filter(*self._to_filters(**filters)) def get(self, **filters): + """Will return one single objects corresponding to filters""" obj = self._get(**filters).first() if not obj: raise NotFound({'message': 'No %r (%r)' % (self._db_cls.__class__.__name__, filters)}) - if getattr(obj, self._user_id_key) != self.user_id: + if self.user_id is not None \ + and getattr(obj, self._user_id_key) != self.user_id: raise Forbidden({'message': 'No authorized to access %r (%r)' % (self._db_cls.__class__.__name__, filters)}) return obj def create(self, **attrs): - attrs[self._user_id_key] = self.user_id + assert self._user_id_key in attrs or self.user_id is not None, \ + "You must provide user_id one way or another" + attrs[self._user_id_key] = self.user_id or attrs.get(self._user_id_key) obj = self._db_cls(**attrs) db.session.add(obj) db.session.commit() diff --git a/pyaggr3g470r/controllers/user.py b/pyaggr3g470r/controllers/user.py index ed46e1e7..c6c1d545 100644 --- a/pyaggr3g470r/controllers/user.py +++ b/pyaggr3g470r/controllers/user.py @@ -4,4 +4,4 @@ from pyaggr3g470r.models import User class UserController(AbstractController): _db_cls = User - _user_id_key = 'email' + _user_id_key = 'id' diff --git a/pyaggr3g470r/views/views.py b/pyaggr3g470r/views/views.py index e202ad4d..fd970cba 100644 --- a/pyaggr3g470r/views/views.py +++ b/pyaggr3g470r/views/views.py @@ -93,7 +93,7 @@ def before_request(): @login_manager.user_loader def load_user(email): # Return an instance of the User model - return controllers.UserController(email).get(email=email) + return controllers.UserController().get(email=email) # @@ -153,7 +153,7 @@ def login(): form = SigninForm() if form.validate_on_submit(): - user = controllers.UserController(form.email.data).get(email=form.email.data) + user = controllers.UserController().get(email=form.email.data) login_user(user) g.user = user session['email'] = form.email.data @@ -382,7 +382,7 @@ def inactives(): List of inactive feeds. """ nb_days = int(request.args.get('nb_days', 365)) - user = controllers.UserController(g.user.email).get(email=g.user.email) + user = controllers.UserController(g.user.id).get(email=g.user.email) today = datetime.datetime.now() inactives = [] for feed in user.feeds: @@ -429,7 +429,7 @@ def export_articles(): """ Export all articles to HTML or JSON. """ - user = controllers.UserController(g.user.email).get(id=g.user.id) + user = controllers.UserController(g.user.id).get(id=g.user.id) if request.args.get('format') == "HTML": # Export to HTML try: @@ -439,7 +439,8 @@ def export_articles(): return redirect(redirect_url()) response = make_response(archive_file) response.headers['Content-Type'] = 'application/x-compressed' - response.headers['Content-Disposition'] = 'attachment; filename='+archive_file_name + response.headers['Content-Disposition'] = 'attachment; filename=%s' \ + % archive_file_name elif request.args.get('format') == "JSON": # Export to JSON try: @@ -461,8 +462,9 @@ def export_opml(): """ Export all feeds to OPML. """ - user = controllers.UserController(g.user.email).get(id=g.user.id) - response = make_response(render_template('opml.xml', user=user, now=datetime.datetime.now())) + user = controllers.UserController(g.user.id).get(id=g.user.id) + response = make_response(render_template('opml.xml', user=user, + now=datetime.datetime.now())) response.headers['Content-Type'] = 'application/xml' response.headers['Content-Disposition'] = 'attachment; filename=feeds.opml' return response @@ -637,7 +639,7 @@ def profile(): """ Edit the profile of the currently logged user. """ - user = controllers.UserController(g.user.email).get(id=g.user.id) + user = controllers.UserController(g.user.id).get(id=g.user.id) form = ProfileForm() if request.method == 'POST': @@ -663,7 +665,7 @@ def delete_account(): """ Delete the account of the user (with all its data). """ - user = controllers.UserController(g.user.email).get(id=g.user.id) + user = controllers.UserController(g.user.id).get(id=g.user.id) if user is not None: db.session.delete(user) db.session.commit() -- cgit