aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrançois Schmidts <francois.schmidts@gmail.com>2015-04-21 14:32:05 +0200
committerFrançois Schmidts <francois.schmidts@gmail.com>2015-04-22 10:51:12 +0200
commit2531887d45e5469fec6171fbd0c63058ded33136 (patch)
treed70ef91ffeb819a9dde302c06a98596f9e88ad52
parentbetter title handling (diff)
downloadnewspipe-2531887d45e5469fec6171fbd0c63058ded33136.tar.gz
newspipe-2531887d45e5469fec6171fbd0c63058ded33136.tar.bz2
newspipe-2531887d45e5469fec6171fbd0c63058ded33136.zip
making admin able to update all other users feed
-rwxr-xr-xmanager.py6
-rw-r--r--pyaggr3g470r/controllers/abstract.py4
-rw-r--r--pyaggr3g470r/controllers/feed.py10
-rw-r--r--pyaggr3g470r/lib/crawler.py15
-rw-r--r--pyaggr3g470r/models/feed.py1
-rw-r--r--pyaggr3g470r/views/api/article.py3
-rw-r--r--pyaggr3g470r/views/api/common.py7
-rw-r--r--pyaggr3g470r/views/api/feed.py21
8 files changed, 44 insertions, 23 deletions
diff --git a/manager.py b/manager.py
index 020a0f4c..e2dc863b 100755
--- a/manager.py
+++ b/manager.py
@@ -27,11 +27,11 @@ def db_create():
pyaggr3g470r.models.db_create(db)
@manager.command
-def fetch(user, password, limit=100):
+def fetch(user, password, limit=100, retreive_all=False):
"Crawl the feeds with the client crawler."
from pyaggr3g470r.lib.crawler import CrawlerScheduler
scheduler = CrawlerScheduler(user, password)
- scheduler.run(limit=limit)
+ scheduler.run(limit=limit, retreive_all=retreive_all)
scheduler.wait()
@manager.command
@@ -61,4 +61,4 @@ def fetch_asyncio(user_id, feed_id):
feed_getter = crawler.retrieve_feed(user, feed_id)
if __name__ == '__main__':
- manager.run() \ No newline at end of file
+ manager.run()
diff --git a/pyaggr3g470r/controllers/abstract.py b/pyaggr3g470r/controllers/abstract.py
index 9a9004af..95f9e211 100644
--- a/pyaggr3g470r/controllers/abstract.py
+++ b/pyaggr3g470r/controllers/abstract.py
@@ -70,7 +70,9 @@ class AbstractController(object):
def create(self, **attrs):
assert self._user_id_key in attrs or self.user_id is not None, \
"You must provide user_id one way or another"
- attrs[self._user_id_key] = self.user_id or attrs.get(self._user_id_key)
+
+ if self._user_id_key not in attrs:
+ attrs[self._user_id_key] = self.user_id
obj = self._db_cls(**attrs)
db.session.add(obj)
db.session.commit()
diff --git a/pyaggr3g470r/controllers/feed.py b/pyaggr3g470r/controllers/feed.py
index 8db279ae..82714e39 100644
--- a/pyaggr3g470r/controllers/feed.py
+++ b/pyaggr3g470r/controllers/feed.py
@@ -27,8 +27,9 @@ from .abstract import AbstractController
from pyaggr3g470r.models import Feed
logger = logging.getLogger(__name__)
-DEFAULT_MAX_ERROR = conf.DEFAULT_MAX_ERROR
DEFAULT_LIMIT = 5
+DEFAULT_REFRESH_RATE = 60
+DEFAULT_MAX_ERROR = conf.DEFAULT_MAX_ERROR
class FeedController(AbstractController):
@@ -42,11 +43,10 @@ class FeedController(AbstractController):
.order_by('Feed.last_retrieved')
.limit(limit)]
- def list_fetchable(self, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT):
- from pyaggr3g470r.controllers import UserController
+ def list_fetchable(self, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT,
+ refresh_rate=DEFAULT_REFRESH_RATE):
now = datetime.now()
- user = UserController(self.user_id).get(id=self.user_id)
- max_last = now - timedelta(minutes=user.refresh_rate or 60)
+ max_last = now - timedelta(minutes=refresh_rate)
feeds = self.list_late(max_last, max_error, limit)
if feeds:
self.update({'id__in': [feed.id for feed in feeds]},
diff --git a/pyaggr3g470r/lib/crawler.py b/pyaggr3g470r/lib/crawler.py
index 1cb61973..339c4b12 100644
--- a/pyaggr3g470r/lib/crawler.py
+++ b/pyaggr3g470r/lib/crawler.py
@@ -16,7 +16,6 @@ import time
import conf
import json
import logging
-import requests
import feedparser
import dateutil.parser
from hashlib import md5
@@ -97,6 +96,7 @@ class AbstractCrawler:
@classmethod
def get_counter_callback(cls):
cls.__counter__ += 1
+
def debump(*args, **kwargs):
cls.__counter__ -= 1
return debump
@@ -157,6 +157,7 @@ class PyAggUpdater(AbstractCrawler):
content = entry['summary']
return {'feed_id': self.feed['id'],
+ 'user_id': self.feed['user_id'],
'entry_id': extract_id(entry).get('entry_id', None),
'link': entry.get('link', self.feed['site_link']),
'title': entry.get('title', 'No title'),
@@ -176,11 +177,11 @@ class PyAggUpdater(AbstractCrawler):
for id_to_create in results:
entry = self.to_article(
self.entries[tuple(sorted(id_to_create.items()))])
- logger.warn('%r %r - creating %r - %r', self.feed['id'],
- self.feed['title'], entry['title'], id_to_create)
+ logger.warn('%r %r - creating %r for %r - %r', self.feed['id'],
+ self.feed['title'], entry['title'], entry['user_id'],
+ id_to_create)
self.query_pyagg('post', 'article', entry)
- now = datetime.now()
logger.debug('%r %r - updating feed etag %r last_mod %r',
self.feed['id'], self.feed['title'],
self.headers.get('etag', ''),
@@ -264,8 +265,10 @@ class FeedCrawler(AbstractCrawler):
ids, entries = [], {}
parsed_response = feedparser.parse(response.text)
for entry in parsed_response['entries']:
- entries[tuple(sorted(extract_id(entry).items()))] = entry
- ids.append(extract_id(entry))
+ entry_ids = extract_id(entry)
+ entry_ids['feed_id'] = self.feed['id']
+ entries[tuple(sorted(entry_ids.items()))] = entry
+ ids.append(entry_ids)
logger.debug('%r %r - found %d entries %r',
self.feed['id'], self.feed['title'], len(ids), ids)
future = self.query_pyagg('get', 'articles/challenge', {'ids': ids})
diff --git a/pyaggr3g470r/models/feed.py b/pyaggr3g470r/models/feed.py
index aff11460..e43045f1 100644
--- a/pyaggr3g470r/models/feed.py
+++ b/pyaggr3g470r/models/feed.py
@@ -63,6 +63,7 @@ class Feed(db.Model):
def dump(self):
return {"id": self.id,
+ "user_id": self.user_id,
"title": self.title,
"description": self.description,
"link": self.link,
diff --git a/pyaggr3g470r/views/api/article.py b/pyaggr3g470r/views/api/article.py
index c3ec2d34..516eef8f 100644
--- a/pyaggr3g470r/views/api/article.py
+++ b/pyaggr3g470r/views/api/article.py
@@ -11,7 +11,8 @@ from pyaggr3g470r.views.api.common import PyAggAbstractResource,\
PyAggResourceMulti
-ARTICLE_ATTRS = {'feed_id': {'type': str},
+ARTICLE_ATTRS = {'user_id': {'type': int},
+ 'feed_id': {'type': int},
'entry_id': {'type': str},
'link': {'type': str},
'title': {'type': str},
diff --git a/pyaggr3g470r/views/api/common.py b/pyaggr3g470r/views/api/common.py
index b8477d4b..ca344c04 100644
--- a/pyaggr3g470r/views/api/common.py
+++ b/pyaggr3g470r/views/api/common.py
@@ -51,7 +51,8 @@ def authenticate(func):
# authentication via HTTP only
auth = request.authorization
if auth is not None:
- user = User.query.filter(User.nickname == auth.username).first()
+ user = User.query.filter(
+ User.nickname == auth.username).first()
if user and user.check_password(auth.password) \
and user.activation_key == "":
g.user = user
@@ -61,6 +62,7 @@ def authenticate(func):
raise Unauthorized({'WWWAuthenticate': 'Basic realm="Login Required"'})
return wrapper
+
def to_response(func):
"""Will cast results of func as a result, and try to extract
a status_code for the Response object"""
@@ -158,7 +160,8 @@ class PyAggResourceMulti(PyAggAbstractResource):
return [res for res in self.controller.read().limit(limit)]
if not limit:
return [res for res in self.controller.read(**request.json).all()]
- return [res for res in self.controller.read(**request.json).limit(limit)]
+ return [res
+ for res in self.controller.read(**request.json).limit(limit)]
def post(self):
"""creating several objects. payload should be a list of dict.
diff --git a/pyaggr3g470r/views/api/feed.py b/pyaggr3g470r/views/api/feed.py
index 7d0e2862..ad185de9 100644
--- a/pyaggr3g470r/views/api/feed.py
+++ b/pyaggr3g470r/views/api/feed.py
@@ -3,8 +3,10 @@
from flask import g
-from pyaggr3g470r.controllers.feed import FeedController, \
- DEFAULT_MAX_ERROR, DEFAULT_LIMIT
+from pyaggr3g470r.controllers.feed import (FeedController,
+ DEFAULT_MAX_ERROR,
+ DEFAULT_LIMIT,
+ DEFAULT_REFRESH_RATE)
from pyaggr3g470r.views.api.common import PyAggAbstractResource, \
PyAggResourceNew, \
@@ -41,11 +43,20 @@ class FetchableFeedAPI(PyAggAbstractResource):
controller_cls = FeedController
to_date = ['date', 'last_retrieved']
attrs = {'max_error': {'type': int, 'default': DEFAULT_MAX_ERROR},
- 'limit': {'type': int, 'default': DEFAULT_LIMIT}}
+ 'limit': {'type': int, 'default': DEFAULT_LIMIT},
+ 'refresh_rate': {'type': int, 'default': DEFAULT_REFRESH_RATE},
+ 'retreive_all': {'type': bool, 'default': False}}
def get(self):
- return [feed for feed in self.controller.list_fetchable(
- **self.reqparse_args())]
+ args = self.reqparse_args()
+ if g.user.refresh_rate:
+ args['refresh_rate'] = g.user.refresh_rate
+
+ dont_filter_by_user = args.pop('retreive_all') and g.user.is_admin()
+
+ contr = self.controller_cls() if dont_filter_by_user \
+ else self.controller
+ return [feed for feed in contr.list_fetchable(**args)]
g.api.add_resource(FeedNewAPI, '/feed', endpoint='feed_new.json')
g.api.add_resource(FeedAPI, '/feed/<int:obj_id>', endpoint='feed.json')
bgstack15