From 5572851eca3b2f1bc56aed7232284acc436d2f49 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Sun, 1 Mar 2015 03:20:12 +0100 Subject: new crawler with cache control and error handling --- bootstrap.py | 2 +- conf.py | 1 + conf/conf.cfg-sample | 1 + migrations/versions/4b5c161e1ced_.py | 24 +++-- pyaggr3g470r/controllers/abstract.py | 15 ++- pyaggr3g470r/controllers/article.py | 11 +- pyaggr3g470r/controllers/feed.py | 17 ++- pyaggr3g470r/lib/crawler.py | 204 +++++++++++++++++++++++++++++++++-- pyaggr3g470r/lib/utils.py | 14 +++ pyaggr3g470r/models/__init__.py | 8 +- pyaggr3g470r/views/api/article.py | 36 +++++-- pyaggr3g470r/views/api/common.py | 79 +++++++------- pyaggr3g470r/views/api/feed.py | 37 +++---- pyaggr3g470r/views/views.py | 27 +---- 14 files changed, 348 insertions(+), 128 deletions(-) create mode 100644 pyaggr3g470r/lib/utils.py diff --git a/bootstrap.py b/bootstrap.py index b9435d5c..5d599146 100644 --- a/bootstrap.py +++ b/bootstrap.py @@ -12,7 +12,7 @@ if not (conf.WEBSERVER_DEBUG or conf.ON_HEROKU): gevent.monkey.patch_thread() -def set_logging(log_path, log_level=logging.INFO, +def set_logging(log_path, log_level=logging.DEBUG, log_format='%(asctime)s %(levelname)s %(message)s'): logger = logging.getLogger('pyaggr3g470r') formater = logging.Formatter(log_format) diff --git a/conf.py b/conf.py index 296f784d..6d963ebe 100644 --- a/conf.py +++ b/conf.py @@ -39,6 +39,7 @@ if not ON_HEROKU: RECAPTCHA_PRIVATE_KEY = config.get('misc', 'recaptcha_private_key') LOG_PATH = config.get('misc', 'log_path') PYTHON = config.get('misc', 'python') + NB_WORKER = config.getint('misc', 'nb_worker') WHOOSH_ENABLED = True diff --git a/conf/conf.cfg-sample b/conf/conf.cfg-sample index aab5ab5f..6e4bb7b7 100644 --- a/conf/conf.cfg-sample +++ b/conf/conf.cfg-sample @@ -5,6 +5,7 @@ recaptcha_public_key = recaptcha_private_key = log_path = ./pyaggr3g470r/var/pyaggr3g470r.log python = python3.3 +nb_worker = 5 [database] uri = postgres://pgsqluser:pgsqlpwd@127.0.0.1:5432/aggregator [feedparser] diff --git a/migrations/versions/4b5c161e1ced_.py b/migrations/versions/4b5c161e1ced_.py index 1efb5f81..1fa91717 100644 --- a/migrations/versions/4b5c161e1ced_.py +++ b/migrations/versions/4b5c161e1ced_.py @@ -5,6 +5,7 @@ Revises: None Create Date: 2015-01-17 01:04:10.187285 """ +from datetime import datetime # revision identifiers, used by Alembic. revision = '4b5c161e1ced' @@ -15,22 +16,27 @@ import sqlalchemy as sa def upgrade(): - ### commands auto generated by Alembic - please adjust! ### - op.add_column('feed', sa.Column('error_count', sa.Integer(), nullable=True)) + unix_start = datetime(1970, 1, 1) + # commands auto generated by Alembic - please adjust! ### + op.add_column('feed', sa.Column('error_count', sa.Integer(), nullable=True, + default=0, server_default="0")) op.add_column('feed', sa.Column('last_error', sa.String(), nullable=True)) - op.add_column('feed', sa.Column('last_modified', sa.DateTime(), nullable=True)) + op.add_column('feed', sa.Column('last_modified', sa.DateTime(), + nullable=True, default=unix_start, server_default=str(unix_start))) + op.add_column('feed', sa.Column('last_retreived', sa.DateTime(), + nullable=True, default=unix_start, server_default=str(unix_start))) op.add_column('feed', sa.Column('etag', sa.String(), nullable=True)) - op.add_column('user', sa.Column('refresh_rate', sa.Integer(), nullable=True)) - op.add_column('article', sa.Column('guid', sa.String(), nullable=True)) - ### end Alembic commands ### + op.add_column('user', sa.Column('refresh_rate', sa.Integer(), + nullable=True)) + # end Alembic commands ### def downgrade(): - ### commands auto generated by Alembic - please adjust! ### + # commands auto generated by Alembic - please adjust! ### op.drop_column('user', 'refresh_rate') op.drop_column('feed', 'last_modified') op.drop_column('feed', 'last_error') op.drop_column('feed', 'error_count') + op.drop_column('feed', 'last_retreived') op.drop_column('feed', 'etag') - op.drop_column('article', 'guid') - ### end Alembic commands ### + # end Alembic commands ### diff --git a/pyaggr3g470r/controllers/abstract.py b/pyaggr3g470r/controllers/abstract.py index 6fe45461..ebb73e30 100644 --- a/pyaggr3g470r/controllers/abstract.py +++ b/pyaggr3g470r/controllers/abstract.py @@ -1,5 +1,9 @@ +import logging from bootstrap import db -from pyaggr3g470r.lib.exceptions import Forbidden, NotFound +from sqlalchemy import update +from werkzeug.exceptions import Forbidden, NotFound + +logger = logging.getLogger(__name__) class AbstractController(object): @@ -9,7 +13,7 @@ class AbstractController(object): def __init__(self, user_id): self.user_id = user_id - def _get(self, **filters): + def _to_filters(self, **filters): if self.user_id: filters[self._user_id_key] = self.user_id db_filters = set() @@ -28,7 +32,10 @@ class AbstractController(object): db_filters.add(getattr(self._db_cls, key[:-4]).in_(value)) else: db_filters.add(getattr(self._db_cls, key) == value) - return self._db_cls.query.filter(*db_filters) + return db_filters + + def _get(self, **filters): + return self._db_cls.query.filter(*self._to_filters(**filters)) def get(self, **filters): obj = self._get(**filters).first() @@ -41,7 +48,9 @@ class AbstractController(object): return obj def create(self, **attrs): + attrs['user_id'] = self.user_id obj = self._db_cls(**attrs) + db.session.add(obj) db.session.commit() return obj diff --git a/pyaggr3g470r/controllers/article.py b/pyaggr3g470r/controllers/article.py index 0de223ee..46ca0988 100644 --- a/pyaggr3g470r/controllers/article.py +++ b/pyaggr3g470r/controllers/article.py @@ -7,9 +7,9 @@ class ArticleController(AbstractController): _db_cls = Article def get(self, **filters): - article = super(ArticleController, self).read(**filters) + article = super(ArticleController, self).get(**filters) if not article.readed: - self.update(article.id, readed=True) + self.update({'id': article.id}, {'readed': True}) return article def delete(self, obj_id): @@ -18,3 +18,10 @@ class ArticleController(AbstractController): import pyaggr3g470r.search as fastsearch fastsearch.delete_article(self.user_id, obj.feed_id, obj_id) return obj + + def challenge(self, ids): + """Will return each id that wasn't found in the database.""" + for id_ in ids: + if self.read(**id_).first(): + continue + yield id_ diff --git a/pyaggr3g470r/controllers/feed.py b/pyaggr3g470r/controllers/feed.py index 56cef997..286bea1e 100644 --- a/pyaggr3g470r/controllers/feed.py +++ b/pyaggr3g470r/controllers/feed.py @@ -13,17 +13,12 @@ class FeedController(AbstractController): from pyaggr3g470r.controllers import UserController now = datetime.now() user = UserController(self.user_id).get(id=self.user_id) - max_last_refresh = now - timedelta(minutes=user.refresh_rate or 60) + max_last = now - timedelta(minutes=user.refresh_rate or 60) feeds = [feed for feed in self.read(user_id=self.user_id, - error_count__le=max_error, - last_modified=max_last_refresh).limit(limit)] + error_count__le=max_error, enabled=True, + last_retreived__lt=max_last).limit(limit)] - self.update({'id__in': [feed.id for feed in feeds]}, - {'last_modified': now}) + if feeds: + self.update({'id__in': [feed.id for feed in feeds]}, + {'last_retreived': now}) return feeds - - def list_last_articles(self, feed_id, limit=50): - from pyaggr3g470r.controllers import ArticleController - return ArticleController(self.user_id)._get(feed_id=feed_id)\ - .order_by(ArticleController._db_cls.retrieved_date.desc())\ - .limit(limit) diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index 1d7fca71..6697e4c3 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -1,11 +1,199 @@ +import conf +import json +import logging +import requests import feedparser -import dateutil.parser.parse +import dateutil.parser +from datetime import datetime +from concurrent.futures import ThreadPoolExecutor +from requests_futures.sessions import FuturesSession +from pyaggr3g470r.lib.utils import default_handler +logger = logging.getLogger(__name__) -def get_feed_content(feed): - etag = feed.get('etag', None) - last_modified = None - if feed.get('last_modified'): - last_modified = dateutil.parser.parse(feed['last_modified'])\ - .strftime('%a, %d %b %Y %H:%M:%S %Z') - return feedparser.parse(feed['link'], etag=etag, modified=last_modified) + +def extract_id(entry, keys=[('link', 'link'), + ('published', 'retrieved_date'), + ('updated', 'retrieved_date')], force_id=False): + entry_id = entry.get('entry_id') or entry.get('id') + if entry_id: + return {'entry_id': entry_id} + if not entry_id and force_id: + entry_id = hash("".join(entry[entry_key] for _, entry_key in keys + if entry_key in entry)) + else: + ids = {} + for entry_key, pyagg_key in keys: + if entry_key in entry and pyagg_key not in ids: + ids[pyagg_key] = entry[entry_key] + if 'date' in pyagg_key: + ids[pyagg_key] = dateutil.parser.parse(ids[pyagg_key])\ + .isoformat() + return ids + + +class AbstractCrawler: + __session__ = None + + def __init__(self, auth): + self.auth = auth + self.session = self.get_session() + self.url = conf.PLATFORM_URL + + @classmethod + def get_session(cls): + if cls.__session__ is None: + cls.__session__ = FuturesSession( + executor=ThreadPoolExecutor(max_workers=conf.NB_WORKER)) + cls.__session__.verify = False + return cls.__session__ + + def query_pyagg(self, method, urn, data=None): + if data is None: + data = {} + method = getattr(self.session, method) + return method("%sapi/v1.0/%s" % (self.url, urn), + auth=self.auth, data=json.dumps(data, + default=default_handler), + headers={'Content-Type': 'application/json'}) + + +class PyAggUpdater(AbstractCrawler): + + def __init__(self, feed, entries, headers, auth): + self.feed = feed + self.entries = entries + self.headers = headers + super(PyAggUpdater, self).__init__(auth) + + def to_article(self, entry): + date = datetime.now() + + for date_key in ('published', 'updated'): + if entry.get(date_key): + try: + date = dateutil.parser.parse(entry[date_key]) + except Exception: + pass + else: + break + content = '' + if entry.get('content'): + content = entry['content'][0]['value'] + elif entry.get('summary'): + content = entry['summary'] + + return {'feed_id': self.feed['id'], + 'entry_id': extract_id(entry).get('entry_id', None), + 'link': entry.get('link', self.feed['site_link']), + 'title': entry.get('title', 'No title'), + 'readed': False, 'like': False, + 'content': content, + 'retrieved_date': date.isoformat(), + 'date': date.isoformat()} + + def callback(self, response): + try: + results = response.result().json() + except Exception: + logger.exception('something went wront with feed %r %r %r %r', + self.feed, self.headers, response.result(), + getattr(response.result(), 'data', None)) + return + logger.debug('%r %r - %d entries were not matched', + self.feed['id'], self.feed['title'], len(results)) + for id_to_create in results: + entry = self.entries[tuple(sorted(id_to_create.items()))] + try: + logger.debug('creating %r - %r', entry['title'], id_to_create) + self.to_article(entry) + except: + logger.exception('%r %r %r something failed when parsing %r', + self.feed['title'], self.feed['id'], + self.feed['link'], entry) + self.query_pyagg('post', 'article', self.to_article(entry)) + + now = datetime.now() + logger.debug('%r %r - updating feed etag %r last_mod %r', + self.feed['id'], self.feed['title'], + self.headers.get('etag'), now) + + self.query_pyagg('put', 'feed/%d' % self.feed['id'], {'error_count': 0, + 'etag': self.headers.get('etag', ''), + 'last_modified': self.headers.get('last-modified', '')}) + + +class FeedCrawler(AbstractCrawler): + + def __init__(self, feed, auth): + self.feed = feed + super(FeedCrawler, self).__init__(auth) + + def callback(self, response): + try: + response = response.result() + response.raise_for_status() + except Exception as error: + error_count = self.feed['error_count'] + 1 + logger.warn('%r %r - an error occured while fetching feed; bumping' + ' error count to %r', self.feed['title'], + self.feed['id'], error_count) + self.query_pyagg('put', 'feed/%d' % self.feed['id'], + {'error_count': error_count, + 'last_error': str(error)}) + return + + if response.status_code == 304: + logger.debug("%r %r - feed responded with 304", + self.feed['id'], self.feed['title']) + return + if self.feed['etag'] and response.headers.get('etag') \ + and response.headers.get('etag') == self.feed['etag']: + logger.debug("%r %r - feed responded with same etag (%d) %r", + self.feed['id'], self.feed['title'], + response.status_code, self.feed['link']) + return + ids, entries = [], {} + parsed_response = feedparser.parse(response.text) + for entry in parsed_response['entries']: + entries[tuple(sorted(extract_id(entry).items()))] = entry + ids.append(extract_id(entry)) + logger.debug('%r %r - found %d entries %r', + self.feed['id'], self.feed['title'], len(ids), ids) + future = self.query_pyagg('get', 'articles/challenge', {'ids': ids}) + updater = PyAggUpdater(self.feed, entries, response.headers, self.auth) + future.add_done_callback(updater.callback) + + +class CrawlerScheduler(AbstractCrawler): + + def __init__(self, username, password): + self.auth = (username, password) + super(CrawlerScheduler, self).__init__(self.auth) + + def prepare_headers(self, feed): + headers = {} + if feed.get('etag', None): + headers['If-None-Match'] = feed['etag'] + elif feed.get('last_modified'): + headers['If-Modified-Since'] = feed['last_modified'] + logger.debug('%r %r - calculated headers %r', + feed['id'], feed['title'], headers) + return headers + + def callback(self, response): + response = response.result() + response.raise_for_status() + feeds = response.json() + logger.debug('%d to fetch %r', len(feeds), feeds) + for feed in feeds: + logger.info('%r %r - fetching resources', + feed['id'], feed['title']) + future = self.session.get(feed['link'], + headers=self.prepare_headers(feed)) + future.add_done_callback(FeedCrawler(feed, self.auth).callback) + + def run(self): + logger.debug('retreving fetchable feed') + future = self.query_pyagg('get', 'feeds/fetchable') + future.add_done_callback(self.callback) diff --git a/pyaggr3g470r/lib/utils.py b/pyaggr3g470r/lib/utils.py new file mode 100644 index 00000000..a4f4b3ec --- /dev/null +++ b/pyaggr3g470r/lib/utils.py @@ -0,0 +1,14 @@ +import types + +def default_handler(obj): + """JSON handler for default query formatting""" + if hasattr(obj, 'isoformat'): + return obj.isoformat() + if hasattr(obj, 'dump'): + return obj.dump() + if isinstance(obj, (set, frozenset, types.GeneratorType)): + return list(obj) + if isinstance(obj, BaseException): + return str(obj) + raise TypeError("Object of type %s with value of %r " + "is not JSON serializable" % (type(obj), obj)) diff --git a/pyaggr3g470r/models/__init__.py b/pyaggr3g470r/models/__init__.py index 27ee18b6..25273036 100644 --- a/pyaggr3g470r/models/__init__.py +++ b/pyaggr3g470r/models/__init__.py @@ -116,7 +116,8 @@ class Feed(db.Model): # cache handling etag = db.Column(db.String(), default="") - last_modified = db.Column(db.DateTime(), default=datetime(1970, 1, 1)) + last_modified = db.Column(db.String(), default="") + last_retreived = db.Column(db.DateTime(), default=datetime(1970, 1, 1)) # error logging last_error = db.Column(db.String(), default="") @@ -138,7 +139,9 @@ class Feed(db.Model): "link": self.link, "site_link": self.site_link, "etag": self.etag, - "last_modified": self.last_modified} + "error_count": self.error_count, + "last_modified": self.last_modified, + "last_retreived": self.last_retreived} class Article(db.Model): @@ -154,7 +157,6 @@ class Article(db.Model): like = db.Column(db.Boolean(), default=False) date = db.Column(db.DateTime(), default=datetime.now) retrieved_date = db.Column(db.DateTime(), default=datetime.now) - guid = db.Column(db.String(), default="") user_id = db.Column(db.Integer, db.ForeignKey('user.id')) feed_id = db.Column(db.Integer, db.ForeignKey('feed.id')) diff --git a/pyaggr3g470r/views/api/article.py b/pyaggr3g470r/views/api/article.py index ebda6247..17881412 100644 --- a/pyaggr3g470r/views/api/article.py +++ b/pyaggr3g470r/views/api/article.py @@ -1,36 +1,58 @@ from flask import g +import dateutil.parser from pyaggr3g470r.controllers import ArticleController -from pyaggr3g470r.views.api.common import PyAggResourceNew, \ +from pyaggr3g470r.views.api.common import PyAggAbstractResource,\ + PyAggResourceNew, \ PyAggResourceExisting, \ PyAggResourceMulti -ARTICLE_ATTRS = {'title': {'type': str}, - 'content': {'type': str}, +ARTICLE_ATTRS = {'feed_id': {'type': str}, + 'entry_id': {'type': str}, 'link': {'type': str}, - 'date': {'type': str}, - 'feed_id': {'type': int}, - 'like': {'type': bool}, - 'readed': {'type': bool}} + 'title': {'type': str}, + 'readed': {'type': bool}, 'like': {'type': bool}, + 'content': {'type': str}, + 'date': {'type': str}, 'retrieved_date': {'type': str}} class ArticleNewAPI(PyAggResourceNew): controller_cls = ArticleController attrs = ARTICLE_ATTRS + to_date = ['date', 'retrieved_date'] class ArticleAPI(PyAggResourceExisting): controller_cls = ArticleController attrs = ARTICLE_ATTRS + to_date = ['date', 'retrieved_date'] class ArticlesAPI(PyAggResourceMulti): controller_cls = ArticleController attrs = ARTICLE_ATTRS + to_date = ['date', 'retrieved_date'] + + +class ArticlesChallenge(PyAggAbstractResource): + controller_cls = ArticleController + attrs = {'ids': {'type': list, 'default': []}} + to_date = ['date', 'retrieved_date'] + + def get(self): + parsed_args = self.reqparse_args() + for id_dict in parsed_args['ids']: + for key in self.to_date: + if key in id_dict: + id_dict[key] = dateutil.parser.parse(id_dict[key]) + + return self.controller.challenge(parsed_args['ids']) g.api.add_resource(ArticleNewAPI, '/article', endpoint='article_new.json') g.api.add_resource(ArticleAPI, '/article/', endpoint='article.json') g.api.add_resource(ArticlesAPI, '/articles', endpoint='articles.json') +g.api.add_resource(ArticlesChallenge, '/articles/challenge', + endpoint='articles_challenge.json') diff --git a/pyaggr3g470r/views/api/common.py b/pyaggr3g470r/views/api/common.py index c0759c03..a9d35411 100644 --- a/pyaggr3g470r/views/api/common.py +++ b/pyaggr3g470r/views/api/common.py @@ -1,12 +1,16 @@ import json -import types +import logging +import dateutil.parser from functools import wraps from flask import request, g, session, Response from flask.ext.restful import Resource, reqparse +from pyaggr3g470r.lib.utils import default_handler from pyaggr3g470r.models import User from pyaggr3g470r.lib.exceptions import PyAggError +logger = logging.getLogger(__name__) + def authenticate(func): """ @@ -24,55 +28,47 @@ def authenticate(func): # authentication via HTTP only auth = request.authorization try: - email = auth.username - user = User.query.filter(User.email == email).first() - if user and user.check_password(auth.password) and user.activation_key == "": + user = User.query.filter(User.nickname == auth.username).first() + if user and user.check_password(auth.password) \ + and user.activation_key == "": g.user = user - return func(*args, **kwargs) - except AttributeError: - pass - - return Response('', 401, - {'WWWAuthenticate':'Basic realm="Login Required"'}) + except Exception: + return Response('', 401, + {'WWWAuthenticate': + 'Basic realm="Login Required"'}) + return func(*args, **kwargs) return wrapper -def default_handler(obj): - """JSON handler for default query formatting""" - if hasattr(obj, 'isoformat'): - return obj.isoformat() - if hasattr(obj, 'dump'): - return obj.dump() - if isinstance(obj, (set, frozenset, types.GeneratorType)): - return list(obj) - raise TypeError("Object of type %s with value of %r " - "is not JSON serializable" % (type(obj), obj)) - - def to_response(func): def wrapper(*args, **kwargs): + status_code = 200 try: result = func(*args, **kwargs) except PyAggError as error: - response = Response(json.dumps(result[0], default=default_handler)) - response.status_code = error.status_code - return response - status_code = 200 - if isinstance(result, tuple): - result, status_code = result - response = Response(json.dumps(result, default=default_handler), + return Response(json.dumps(error, default=default_handler), status=status_code) - return response + if isinstance(result, Response): + return result + elif isinstance(result, tuple): + result, status_code = result + return Response(json.dumps(result, default=default_handler), + status=status_code) return wrapper class PyAggAbstractResource(Resource): method_decorators = [authenticate, to_response] + attrs = {} + to_date = [] def __init__(self, *args, **kwargs): - self.controller = self.controller_cls(g.user.id) super(PyAggAbstractResource, self).__init__(*args, **kwargs) + @property + def controller(self): + return self.controller_cls(getattr(g.user, 'id', None)) + def reqparse_args(self, strict=False, default=True): """ strict: bool @@ -83,10 +79,17 @@ class PyAggAbstractResource(Resource): """ parser = reqparse.RequestParser() for attr_name, attrs in self.attrs.items(): - if not default and attr_name not in request.args: + if not default and attr_name not in request.json: continue parser.add_argument(attr_name, location='json', **attrs) - return parser.parse_args(strict=strict) + parsed = parser.parse_args(strict=strict) + for field in self.to_date: + if parsed.get(field): + try: + parsed[field] = dateutil.parser.parse(parsed[field]) + except Exception: + logger.exception('failed to parse %r', parsed[field]) + return parsed class PyAggResourceNew(PyAggAbstractResource): @@ -98,13 +101,13 @@ class PyAggResourceNew(PyAggAbstractResource): class PyAggResourceExisting(PyAggAbstractResource): def get(self, obj_id=None): - return self.controller.get(id=obj_id).dump() + return self.controller.get(id=obj_id) def put(self, obj_id=None): - args = self.reqparse_args() + args = self.reqparse_args(default=False) new_values = {key: args[key] for key in set(args).intersection(self.attrs)} - self.controller.update(obj_id, **new_values) + self.controller.update({'id': obj_id}, new_values) def delete(self, obj_id=None): self.controller.delete(obj_id) @@ -115,7 +118,7 @@ class PyAggResourceMulti(PyAggAbstractResource): def get(self): filters = self.reqparse_args(default=False) - return [res.dump() for res in self.controller.read(**filters).all()] + return [res for res in self.controller.read(**filters).all()] def post(self): status = 201 @@ -137,7 +140,7 @@ class PyAggResourceMulti(PyAggAbstractResource): try: new_values = {key: args[key] for key in set(attrs).intersection(self.editable_attrs)} - self.controller.update(obj_id, **new_values) + self.controller.update({'id': obj_id}, new_values) results.append('ok') except Exception as error: status = 206 diff --git a/pyaggr3g470r/views/api/feed.py b/pyaggr3g470r/views/api/feed.py index e6f74cfd..625ad52d 100644 --- a/pyaggr3g470r/views/api/feed.py +++ b/pyaggr3g470r/views/api/feed.py @@ -1,11 +1,10 @@ -from datetime import datetime from flask import g -from flask.ext.restful import Resource, reqparse from pyaggr3g470r.controllers.feed import FeedController, \ DEFAULT_MAX_ERROR, DEFAULT_LIMIT -from pyaggr3g470r.views.api.common import PyAggResourceNew, \ +from pyaggr3g470r.views.api.common import PyAggAbstractResource, \ + PyAggResourceNew, \ PyAggResourceExisting, \ PyAggResourceMulti @@ -16,44 +15,40 @@ FEED_ATTRS = {'title': {'type': str}, 'site_link': {'type': str}, 'email_notification': {'type': bool, 'default': False}, 'enabled': {'type': bool, 'default': True}, - 'etag': {'type': str, 'default': None}, - 'last_modified': {'type': datetime}, - 'last_error': {'type': datetime}, + 'etag': {'type': str, 'default': ''}, + 'last_modified': {'type': str}, + 'last_retreived': {'type': str}, + 'last_error': {'type': str}, 'error_count': {'type': int, 'default': 0}} class FeedNewAPI(PyAggResourceNew): controller_cls = FeedController attrs = FEED_ATTRS + to_date = ['date', 'last_retreived'] class FeedAPI(PyAggResourceExisting): - pass controller_cls = FeedController attrs = FEED_ATTRS + to_date = ['date', 'last_retreived'] class FeedsAPI(PyAggResourceMulti): - pass controller_cls = FeedController attrs = FEED_ATTRS + to_date = ['date', 'last_retreived'] -class FetchableFeedAPI(Resource): - - def __init__(self): - self.reqparse = reqparse.RequestParser() - self.reqparse.add_argument('max_error', type=int, location='json', - default=DEFAULT_MAX_ERROR) - self.reqparse.add_argument('limit', type=int, location='json', - default=DEFAULT_LIMIT) - super(FetchableFeedAPI, self).__init__() +class FetchableFeedAPI(PyAggAbstractResource): + controller_cls = FeedController + to_date = ['date', 'last_retreived'] + attrs = {'max_error': {'type': int, 'default': DEFAULT_MAX_ERROR}, + 'limit': {'type': int, 'default': DEFAULT_LIMIT}} def get(self): - args = self.reqparse.parse_args() - controller = FeedController(g.user.id) - return [feed for feed in controller.list_fetchable( - max_error=args['max_error'], limit=args['limit'])] + return [feed for feed in self.controller.list_fetchable( + **self.reqparse_args())] g.api.add_resource(FeedNewAPI, '/feed', endpoint='feed_new.json') diff --git a/pyaggr3g470r/views/views.py b/pyaggr3g470r/views/views.py index 053bb473..66170a77 100644 --- a/pyaggr3g470r/views/views.py +++ b/pyaggr3g470r/views/views.py @@ -156,35 +156,12 @@ def login(): login_user(user) g.user = user session['email'] = form.email.data - identity_changed.send(current_app._get_current_object(), identity=Identity(user.id)) + identity_changed.send(current_app._get_current_object(), + identity=Identity(user.id)) flash(gettext("Logged in successfully."), 'success') return redirect(url_for('home')) return render_template('login.html', form=form) -@app.route('/api/csrf', methods=['GET']) -def get_csrf(): - try: - data = json.loads(request.data.decode()) - except ValueError: - return Response(status=400) - email = data.get('email') - password = data.get('password') - if login is None or password is None: - return Response(status=401) - user = User.query.filter(User.email == email).first() - if not user: - return Response(status=404) - if not user.check_password(password): - return Response(status=401) - if not user.activation_key == "": - return Response(status=403) - login_user(user) - g.user = user - session['email'] = email - identity_changed.send(current_app._get_current_object(), - identity=Identity(user.id)) - return 'ok', 200 - @app.route('/logout') @login_required -- cgit