diff options
-rw-r--r-- | pyaggr3g470r/controllers/abstract.py | 4 | ||||
-rw-r--r-- | pyaggr3g470r/lib/crawler.py | 41 | ||||
-rw-r--r-- | pyaggr3g470r/views/api/common.py | 51 |
3 files changed, 85 insertions, 11 deletions
diff --git a/pyaggr3g470r/controllers/abstract.py b/pyaggr3g470r/controllers/abstract.py index ebb73e30..a99e67f3 100644 --- a/pyaggr3g470r/controllers/abstract.py +++ b/pyaggr3g470r/controllers/abstract.py @@ -7,7 +7,7 @@ logger = logging.getLogger(__name__) class AbstractController(object): - _db_cls = None + _db_cls = None # reference to the database class _user_id_key = 'user_id' def __init__(self, user_id): @@ -48,7 +48,7 @@ class AbstractController(object): return obj def create(self, **attrs): - attrs['user_id'] = self.user_id + attrs[self._user_id_key] = self.user_id obj = self._db_cls(**attrs) db.session.add(obj) db.session.commit() diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py index 1b9f5d60..c00b0dbf 100644 --- a/pyaggr3g470r/lib/crawler.py +++ b/pyaggr3g470r/lib/crawler.py @@ -1,3 +1,17 @@ +""" +Here's a sum up on how it works : + +CrawlerScheduler.run + will retreive a list of feeds to be refreshed and pass result to +CrawlerScheduler.callback + which will retreive each feed and treat result with +FeedCrawler.callback + which will interprete the result (status_code, etag) collect ids + and match them agaisnt pyagg which will cause +PyAggUpdater.callback + to create the missing entries +""" + import time import conf import json @@ -18,6 +32,10 @@ API_ROOT = "api/v2.0/" def extract_id(entry, keys=[('link', 'link'), ('published', 'retrieved_date'), ('updated', 'retrieved_date')], force_id=False): + """For a given entry will return a dict that allows to identify it. The + dict will be constructed on the uid of the entry. if that identifier is + absent, the dict will be constructed upon the values of "keys". + """ entry_id = entry.get('entry_id') or entry.get('id') if entry_id: return {'entry_id': entry_id} @@ -46,6 +64,7 @@ class AbstractCrawler: @classmethod def get_session(cls): + """methods that allows us to treat session as a singleton""" if cls.__session__ is None: cls.__session__ = FuturesSession( executor=ThreadPoolExecutor(max_workers=conf.NB_WORKER)) @@ -54,6 +73,9 @@ class AbstractCrawler: @classmethod def count_on_me(cls, func): + """A basic decorator which will count +1 at the begining of a call + and -1 at the end. It kinda allows us to wait for the __counter__ value + to be 0, meaning nothing is done anymore.""" @wraps(func) def wrapper(*args, **kwargs): cls.__counter__ += 1 @@ -63,6 +85,10 @@ class AbstractCrawler: return wrapper def query_pyagg(self, method, urn, data=None): + """A wrapper for internal call, method should be ones you can find + on requests (header, post, get, options, ...), urn the distant + resources you want to access on pyagg, and data, the data you wanna + transmit.""" if data is None: data = {} method = getattr(self.session, method) @@ -72,9 +98,10 @@ class AbstractCrawler: headers={'Content-Type': 'application/json'}) @classmethod - def wait(self): + def wait(cls): + "See count_on_me, that method will just wait for the counter to be 0" time.sleep(1) - while self.__counter__: + while cls.__counter__: time.sleep(1) @@ -87,6 +114,7 @@ class PyAggUpdater(AbstractCrawler): super(PyAggUpdater, self).__init__(auth) def to_article(self, entry): + "Safe method to transorm a feedparser entry into an article" date = datetime.now() for date_key in ('published', 'updated'): @@ -114,6 +142,8 @@ class PyAggUpdater(AbstractCrawler): @AbstractCrawler.count_on_me def callback(self, response): + """Will process the result from the challenge, creating missing article + and updating the feed""" results = response.result().json() logger.debug('%r %r - %d entries were not matched and will be created', self.feed['id'], self.feed['title'], len(results)) @@ -140,12 +170,15 @@ class FeedCrawler(AbstractCrawler): super(FeedCrawler, self).__init__(auth) def clean_feed(self): + """Will reset the errors counters on a feed that have known errors""" if self.feed.get('error_count') or self.feed.get('last_error'): self.query_pyagg('put', 'feed/%d' % self.feed['id'], {'error_count': 0, 'last_error': ''}) @AbstractCrawler.count_on_me def callback(self, response): + """will fetch the feed and interprete results (304, etag) or will + challenge pyagg to compare gotten entries with existing ones""" try: response = response.result() response.raise_for_status() @@ -190,6 +223,7 @@ class CrawlerScheduler(AbstractCrawler): super(CrawlerScheduler, self).__init__(self.auth) def prepare_headers(self, feed): + """For a known feed, will construct some header dictionnary""" headers = {} if feed.get('etag', None): headers['If-None-Match'] = feed['etag'] @@ -201,6 +235,7 @@ class CrawlerScheduler(AbstractCrawler): @AbstractCrawler.count_on_me def callback(self, response): + """processes feeds that need to be fetched""" response = response.result() response.raise_for_status() feeds = response.json() @@ -214,6 +249,8 @@ class CrawlerScheduler(AbstractCrawler): @AbstractCrawler.count_on_me def run(self, **kwargs): + """entry point, will retreive feeds to be fetch + and launch the whole thing""" logger.debug('retreving fetchable feed') future = self.query_pyagg('get', 'feeds/fetchable', kwargs) future.add_done_callback(self.callback) diff --git a/pyaggr3g470r/views/api/common.py b/pyaggr3g470r/views/api/common.py index e4f80bf7..c59bb1fc 100644 --- a/pyaggr3g470r/views/api/common.py +++ b/pyaggr3g470r/views/api/common.py @@ -1,3 +1,23 @@ +"""For a given resources, classes in the module intend to create the following +routes : + GET resource/<id> + -> to retreive one + POST resource + -> to create one + PUT resource/<id> + -> to update one + DELETE resource/<id> + -> to delete one + + GET resources + -> to retreive several + POST resources + -> to create several + PUT resources + -> to update several + DELETE resources + -> to delete several +""" import json import logging import dateutil.parser @@ -41,6 +61,8 @@ def authenticate(func): def to_response(func): + """Will cast results of func as a result, and try to extract + a status_code for the Response object""" def wrapper(*args, **kwargs): status_code = 200 result = func(*args, **kwargs) @@ -56,7 +78,7 @@ def to_response(func): class PyAggAbstractResource(Resource): method_decorators = [authenticate, to_response] attrs = {} - to_date = [] + to_date = [] # list of fields to cast to datetime def __init__(self, *args, **kwargs): super(PyAggAbstractResource, self).__init__(*args, **kwargs) @@ -71,6 +93,8 @@ class PyAggAbstractResource(Resource): if True will throw 400 error if args are defined and not in request default: bool if True, won't return defaults + args: dict + the args to parse, if None, self.attrs will be used """ parser = reqparse.RequestParser() for attr_name, attrs in (args or self.attrs).items(): @@ -95,21 +119,25 @@ class PyAggAbstractResource(Resource): class PyAggResourceNew(PyAggAbstractResource): def post(self): + """Create a single new object""" return self.controller.create(**self.reqparse_args()), 201 class PyAggResourceExisting(PyAggAbstractResource): def get(self, obj_id=None): + """Retreive a single object""" return self.controller.get(id=obj_id) def put(self, obj_id=None): + """update an object, new attrs should be passed in the payload""" args = self.reqparse_args(default=False) new_values = {key: args[key] for key in set(args).intersection(self.attrs)} self.controller.update({'id': obj_id}, new_values) def delete(self, obj_id=None): + """delete a object""" self.controller.delete(obj_id) return None, 204 @@ -117,6 +145,9 @@ class PyAggResourceExisting(PyAggAbstractResource): class PyAggResourceMulti(PyAggAbstractResource): def get(self): + """retreive several objects. filters can be set in the payload on the + different fields of the object, and a limit can be set in there as well + """ args = deepcopy(self.attrs) args['limit'] = {'type': int, 'default': 10, 'force_default': True} filters = self.reqparse_args(default=False, strict=False, args=args) @@ -126,10 +157,12 @@ class PyAggResourceMulti(PyAggAbstractResource): return [res for res in self.controller.read(**filters).limit(limit)] def post(self): + """creating several objects. payload should be a list of dict. + """ status = 201 results = [] args = [] # FIXME - for arg in args: + for attrs in request.json(): try: results.append(self.controller.create(**arg).id) except Exception as error: @@ -138,10 +171,14 @@ class PyAggResourceMulti(PyAggAbstractResource): return results, status def put(self): + """creating several objects. payload should be: + >>> payload + [[obj_id1, {attr1: val1, attr2: val2}] + [obj_id2, {attr1: val1, attr2: val2}]] + """ status = 200 results = [] - args = {} # FIXME - for obj_id, attrs in args.items(): + for obj_id, attrs in request.json(): try: new_values = {key: args[key] for key in set(attrs).intersection(self.editable_attrs)} @@ -153,10 +190,10 @@ class PyAggResourceMulti(PyAggAbstractResource): return results, status def delete(self): + """will delete several objects, + a list of their ids should be in the payload""" status = 204 - results = [] - obj_ids = [] # FIXME extract some real ids - for obj_id in obj_ids: + for obj_id in request.json(): try: self.controller.delete(obj_id) results.append('ok') |