diff options
-rw-r--r-- | conf.py | 4 | ||||
-rw-r--r-- | conf/conf.cfg-sample | 2 | ||||
-rwxr-xr-x | manager.py | 10 | ||||
-rw-r--r-- | pyaggr3g470r/controllers/article.py | 6 | ||||
-rw-r--r-- | pyaggr3g470r/views/api/feed.py | 3 | ||||
-rw-r--r-- | scripts/__init__.py | 0 | ||||
-rw-r--r-- | scripts/probes.py | 72 |
7 files changed, 93 insertions, 4 deletions
@@ -28,6 +28,8 @@ DEFAULTS = {"platform_url": "https://pyaggr3g470r.herokuapp.com/", "recaptcha_public_key": "", "recaptcha_private_key": "", "nb_worker": "100", + "api_login": "", + "api_passwd": "", "default_max_error": "3", "log_path": "pyaggr3g470r.log", "log_level": "info", @@ -79,6 +81,8 @@ RECAPTCHA_PRIVATE_KEY = config.get('misc', 'recaptcha_private_key') LOG_PATH = config.get('misc', 'log_path') NB_WORKER = config.getint('misc', 'nb_worker') +API_LOGIN = config.get('crawler', 'api_login') +API_PASSWD = config.get('crawler', 'api_passwd') WHOOSH_ENABLED = True diff --git a/conf/conf.cfg-sample b/conf/conf.cfg-sample index 2f30b04a..acd244c8 100644 --- a/conf/conf.cfg-sample +++ b/conf/conf.cfg-sample @@ -18,6 +18,8 @@ default_max_error = 6 http_proxy = user_agent = pyAggr3g470r (https://bitbucket.org/cedricbonhomme/pyaggr3g470r) resolve_article_url = false +api_login = +api_passwd = [notification] notification_email = pyAggr3g470r@no-reply.com host = smtp.googlemail.com @@ -1,7 +1,7 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -from bootstrap import application, db, populate_g +from bootstrap import application, db, populate_g, conf from flask.ext.script import Manager from flask.ext.migrate import Migrate, MigrateCommand @@ -27,10 +27,10 @@ def db_create(): pyaggr3g470r.models.db_create(db) @manager.command -def fetch(user, password, limit=100, retreive_all=False): +def fetch(limit=100, retreive_all=False): "Crawl the feeds with the client crawler." from pyaggr3g470r.lib.crawler import CrawlerScheduler - scheduler = CrawlerScheduler(user, password) + scheduler = CrawlerScheduler(conf.API_LOGIN, conf.API_PASSWD) scheduler.run(limit=limit, retreive_all=retreive_all) scheduler.wait() @@ -66,5 +66,9 @@ def fetch_asyncio(user_id, feed_id): feed_getter = crawler.retrieve_feed(loop, g.user, feed_id) loop.close() +from scripts.probes import ArticleProbe, FeedProbe +manager.add_command('probe_articles', ArticleProbe()) +manager.add_command('probe_feeds', FeedProbe()) + if __name__ == '__main__': manager.run() diff --git a/pyaggr3g470r/controllers/article.py b/pyaggr3g470r/controllers/article.py index 70b9d2dd..21b4b5e7 100644 --- a/pyaggr3g470r/controllers/article.py +++ b/pyaggr3g470r/controllers/article.py @@ -33,6 +33,12 @@ class ArticleController(AbstractController): .filter(*self._to_filters(**filters)) .group_by(Article.feed_id).all()) + def count_by_user_id(self, **filters): + return dict(db.session.query(Article.user_id, + func.count(Article.id)) + .filter(*self._to_filters(**filters)) + .group_by(Article.user_id).all()) + def create(self, **attrs): # handling special denorm for article rights assert 'feed_id' in attrs diff --git a/pyaggr3g470r/views/api/feed.py b/pyaggr3g470r/views/api/feed.py index 530f3fef..ae2cd735 100644 --- a/pyaggr3g470r/views/api/feed.py +++ b/pyaggr3g470r/views/api/feed.py @@ -3,6 +3,7 @@ from flask import g +import conf from pyaggr3g470r.controllers.feed import (FeedController, DEFAULT_MAX_ERROR, DEFAULT_LIMIT, @@ -54,7 +55,7 @@ class FetchableFeedAPI(PyAggAbstractResource): if g.user.refresh_rate: args['refresh_rate'] = g.user.refresh_rate - if args.pop('retreive_all'): + if args.pop('retreive_all', False): contr = self.wider_controller else: contr = self.controller diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/scripts/__init__.py diff --git a/scripts/probes.py b/scripts/probes.py new file mode 100644 index 00000000..bfad4e6e --- /dev/null +++ b/scripts/probes.py @@ -0,0 +1,72 @@ +#!/usr/bin/python3 +import sys +from datetime import datetime, timedelta +from flask.ext.script import Command, Option + +from pyaggr3g470r.controllers \ + import UserController, FeedController, ArticleController +DEFAULT_HEADERS = {'Content-Type': 'application/json', 'User-Agent': 'munin'} +LATE_AFTER = 60 +FETCH_RATE = 3 + + +class AbstractMuninPlugin(Command): + urn = None + + def execute(self): + raise NotImplementedError() + + def config(self): + raise NotImplementedError() + + def get_options(self): + if sys.argv[-1] == 'config': + return [Option(dest='config', default=sys.argv[-1] == 'config')] + return [] + + def run(self, config=False): + if config: + self.config() + else: + self.execute() + + +class FeedProbe(AbstractMuninPlugin): + + def config(self): + print("graph_title PyAgg - Feeds counts") + print("graph_vlabel feeds") + print("feeds.label Late feeds") + print("feeds_total.label Total feeds") + print("feeds.warning 15") + print("feeds.critical 30") + print("graph_category web") + print("graph_scale yes") + + def execute(self): + delta = datetime.now() - timedelta(minutes=LATE_AFTER + FETCH_RATE + 1) + + print("feeds.value %d" % len(FeedController().list_late(delta))) + print("feeds_total.value %d" % FeedController().read().count()) + + +class ArticleProbe(AbstractMuninPlugin): + + def config(self): + print("graph_title Pyagg - Articles adding rate") + print("graph_vlabel Articles per sec") + print("articles.label Overall rate") + print("articles.type DERIVE") + print("articles.min 0") + for id_ in sorted(user.id for user in UserController().read()): + print("articles_user_%s.label Rate for user %s" % (id_, id_)) + print("articles_user_%s.type DERIVE" % id_) + print("articles_user_%s.min 0" % id_) + print("graph_category web") + print("graph_scale yes") + + def execute(self): + counts = ArticleController().count_by_user_id() + print("articles.value %s" % sum(counts.values())) + for user, count in counts.items(): + print("articles_user_%s.value %s" % (user, count)) |