aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pyaggr3g470r/controllers/abstract.py4
-rw-r--r--pyaggr3g470r/lib/crawler.py41
-rw-r--r--pyaggr3g470r/views/api/common.py51
3 files changed, 85 insertions, 11 deletions
diff --git a/pyaggr3g470r/controllers/abstract.py b/pyaggr3g470r/controllers/abstract.py
index ebb73e30..a99e67f3 100644
--- a/pyaggr3g470r/controllers/abstract.py
+++ b/pyaggr3g470r/controllers/abstract.py
@@ -7,7 +7,7 @@ logger = logging.getLogger(__name__)
class AbstractController(object):
- _db_cls = None
+ _db_cls = None # reference to the database class
_user_id_key = 'user_id'
def __init__(self, user_id):
@@ -48,7 +48,7 @@ class AbstractController(object):
return obj
def create(self, **attrs):
- attrs['user_id'] = self.user_id
+ attrs[self._user_id_key] = self.user_id
obj = self._db_cls(**attrs)
db.session.add(obj)
db.session.commit()
diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py
index 1b9f5d60..c00b0dbf 100644
--- a/pyaggr3g470r/lib/crawler.py
+++ b/pyaggr3g470r/lib/crawler.py
@@ -1,3 +1,17 @@
+"""
+Here's a sum up on how it works :
+
+CrawlerScheduler.run
+ will retreive a list of feeds to be refreshed and pass result to
+CrawlerScheduler.callback
+ which will retreive each feed and treat result with
+FeedCrawler.callback
+ which will interprete the result (status_code, etag) collect ids
+ and match them agaisnt pyagg which will cause
+PyAggUpdater.callback
+ to create the missing entries
+"""
+
import time
import conf
import json
@@ -18,6 +32,10 @@ API_ROOT = "api/v2.0/"
def extract_id(entry, keys=[('link', 'link'),
('published', 'retrieved_date'),
('updated', 'retrieved_date')], force_id=False):
+ """For a given entry will return a dict that allows to identify it. The
+ dict will be constructed on the uid of the entry. if that identifier is
+ absent, the dict will be constructed upon the values of "keys".
+ """
entry_id = entry.get('entry_id') or entry.get('id')
if entry_id:
return {'entry_id': entry_id}
@@ -46,6 +64,7 @@ class AbstractCrawler:
@classmethod
def get_session(cls):
+ """methods that allows us to treat session as a singleton"""
if cls.__session__ is None:
cls.__session__ = FuturesSession(
executor=ThreadPoolExecutor(max_workers=conf.NB_WORKER))
@@ -54,6 +73,9 @@ class AbstractCrawler:
@classmethod
def count_on_me(cls, func):
+ """A basic decorator which will count +1 at the begining of a call
+ and -1 at the end. It kinda allows us to wait for the __counter__ value
+ to be 0, meaning nothing is done anymore."""
@wraps(func)
def wrapper(*args, **kwargs):
cls.__counter__ += 1
@@ -63,6 +85,10 @@ class AbstractCrawler:
return wrapper
def query_pyagg(self, method, urn, data=None):
+ """A wrapper for internal call, method should be ones you can find
+ on requests (header, post, get, options, ...), urn the distant
+ resources you want to access on pyagg, and data, the data you wanna
+ transmit."""
if data is None:
data = {}
method = getattr(self.session, method)
@@ -72,9 +98,10 @@ class AbstractCrawler:
headers={'Content-Type': 'application/json'})
@classmethod
- def wait(self):
+ def wait(cls):
+ "See count_on_me, that method will just wait for the counter to be 0"
time.sleep(1)
- while self.__counter__:
+ while cls.__counter__:
time.sleep(1)
@@ -87,6 +114,7 @@ class PyAggUpdater(AbstractCrawler):
super(PyAggUpdater, self).__init__(auth)
def to_article(self, entry):
+ "Safe method to transorm a feedparser entry into an article"
date = datetime.now()
for date_key in ('published', 'updated'):
@@ -114,6 +142,8 @@ class PyAggUpdater(AbstractCrawler):
@AbstractCrawler.count_on_me
def callback(self, response):
+ """Will process the result from the challenge, creating missing article
+ and updating the feed"""
results = response.result().json()
logger.debug('%r %r - %d entries were not matched and will be created',
self.feed['id'], self.feed['title'], len(results))
@@ -140,12 +170,15 @@ class FeedCrawler(AbstractCrawler):
super(FeedCrawler, self).__init__(auth)
def clean_feed(self):
+ """Will reset the errors counters on a feed that have known errors"""
if self.feed.get('error_count') or self.feed.get('last_error'):
self.query_pyagg('put', 'feed/%d' % self.feed['id'],
{'error_count': 0, 'last_error': ''})
@AbstractCrawler.count_on_me
def callback(self, response):
+ """will fetch the feed and interprete results (304, etag) or will
+ challenge pyagg to compare gotten entries with existing ones"""
try:
response = response.result()
response.raise_for_status()
@@ -190,6 +223,7 @@ class CrawlerScheduler(AbstractCrawler):
super(CrawlerScheduler, self).__init__(self.auth)
def prepare_headers(self, feed):
+ """For a known feed, will construct some header dictionnary"""
headers = {}
if feed.get('etag', None):
headers['If-None-Match'] = feed['etag']
@@ -201,6 +235,7 @@ class CrawlerScheduler(AbstractCrawler):
@AbstractCrawler.count_on_me
def callback(self, response):
+ """processes feeds that need to be fetched"""
response = response.result()
response.raise_for_status()
feeds = response.json()
@@ -214,6 +249,8 @@ class CrawlerScheduler(AbstractCrawler):
@AbstractCrawler.count_on_me
def run(self, **kwargs):
+ """entry point, will retreive feeds to be fetch
+ and launch the whole thing"""
logger.debug('retreving fetchable feed')
future = self.query_pyagg('get', 'feeds/fetchable', kwargs)
future.add_done_callback(self.callback)
diff --git a/pyaggr3g470r/views/api/common.py b/pyaggr3g470r/views/api/common.py
index e4f80bf7..c59bb1fc 100644
--- a/pyaggr3g470r/views/api/common.py
+++ b/pyaggr3g470r/views/api/common.py
@@ -1,3 +1,23 @@
+"""For a given resources, classes in the module intend to create the following
+routes :
+ GET resource/<id>
+ -> to retreive one
+ POST resource
+ -> to create one
+ PUT resource/<id>
+ -> to update one
+ DELETE resource/<id>
+ -> to delete one
+
+ GET resources
+ -> to retreive several
+ POST resources
+ -> to create several
+ PUT resources
+ -> to update several
+ DELETE resources
+ -> to delete several
+"""
import json
import logging
import dateutil.parser
@@ -41,6 +61,8 @@ def authenticate(func):
def to_response(func):
+ """Will cast results of func as a result, and try to extract
+ a status_code for the Response object"""
def wrapper(*args, **kwargs):
status_code = 200
result = func(*args, **kwargs)
@@ -56,7 +78,7 @@ def to_response(func):
class PyAggAbstractResource(Resource):
method_decorators = [authenticate, to_response]
attrs = {}
- to_date = []
+ to_date = [] # list of fields to cast to datetime
def __init__(self, *args, **kwargs):
super(PyAggAbstractResource, self).__init__(*args, **kwargs)
@@ -71,6 +93,8 @@ class PyAggAbstractResource(Resource):
if True will throw 400 error if args are defined and not in request
default: bool
if True, won't return defaults
+ args: dict
+ the args to parse, if None, self.attrs will be used
"""
parser = reqparse.RequestParser()
for attr_name, attrs in (args or self.attrs).items():
@@ -95,21 +119,25 @@ class PyAggAbstractResource(Resource):
class PyAggResourceNew(PyAggAbstractResource):
def post(self):
+ """Create a single new object"""
return self.controller.create(**self.reqparse_args()), 201
class PyAggResourceExisting(PyAggAbstractResource):
def get(self, obj_id=None):
+ """Retreive a single object"""
return self.controller.get(id=obj_id)
def put(self, obj_id=None):
+ """update an object, new attrs should be passed in the payload"""
args = self.reqparse_args(default=False)
new_values = {key: args[key] for key in
set(args).intersection(self.attrs)}
self.controller.update({'id': obj_id}, new_values)
def delete(self, obj_id=None):
+ """delete a object"""
self.controller.delete(obj_id)
return None, 204
@@ -117,6 +145,9 @@ class PyAggResourceExisting(PyAggAbstractResource):
class PyAggResourceMulti(PyAggAbstractResource):
def get(self):
+ """retreive several objects. filters can be set in the payload on the
+ different fields of the object, and a limit can be set in there as well
+ """
args = deepcopy(self.attrs)
args['limit'] = {'type': int, 'default': 10, 'force_default': True}
filters = self.reqparse_args(default=False, strict=False, args=args)
@@ -126,10 +157,12 @@ class PyAggResourceMulti(PyAggAbstractResource):
return [res for res in self.controller.read(**filters).limit(limit)]
def post(self):
+ """creating several objects. payload should be a list of dict.
+ """
status = 201
results = []
args = [] # FIXME
- for arg in args:
+ for attrs in request.json():
try:
results.append(self.controller.create(**arg).id)
except Exception as error:
@@ -138,10 +171,14 @@ class PyAggResourceMulti(PyAggAbstractResource):
return results, status
def put(self):
+ """creating several objects. payload should be:
+ >>> payload
+ [[obj_id1, {attr1: val1, attr2: val2}]
+ [obj_id2, {attr1: val1, attr2: val2}]]
+ """
status = 200
results = []
- args = {} # FIXME
- for obj_id, attrs in args.items():
+ for obj_id, attrs in request.json():
try:
new_values = {key: args[key] for key in
set(attrs).intersection(self.editable_attrs)}
@@ -153,10 +190,10 @@ class PyAggResourceMulti(PyAggAbstractResource):
return results, status
def delete(self):
+ """will delete several objects,
+ a list of their ids should be in the payload"""
status = 204
- results = []
- obj_ids = [] # FIXME extract some real ids
- for obj_id in obj_ids:
+ for obj_id in request.json():
try:
self.controller.delete(obj_id)
results.append('ok')
bgstack15