From 5b7db9398abaacea241d9fcce7885457c562d7fa Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Sun, 11 Oct 2015 12:18:07 +0200 Subject: a bit of cleaning, putting code where it belongs --- src/web/controllers/article.py | 21 +++++++++++++++++++++ src/web/controllers/feed.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'src/web/controllers') diff --git a/src/web/controllers/article.py b/src/web/controllers/article.py index 3d8d5c01..72288a09 100644 --- a/src/web/controllers/article.py +++ b/src/web/controllers/article.py @@ -1,6 +1,8 @@ import re import logging +import sqlalchemy from sqlalchemy import func +from collections import Counter from bootstrap import db from .abstract import AbstractController @@ -70,3 +72,22 @@ class ArticleController(AbstractController): attrs['link']) return super().create(**attrs) + + def get_history(self, year=None, month=None): + """ + Sort articles by year and month. + """ + articles_counter = Counter() + articles = self.read() + if year is not None: + articles = articles.filter( + sqlalchemy.extract('year', Article.date) == year) + if month is not None: + articles = articles.filter( + sqlalchemy.extract('month', Article.date) == month) + for article in articles.all(): + if year is not None: + articles_counter[article.date.month] += 1 + else: + articles_counter[article.date.year] += 1 + return articles_counter, articles diff --git a/src/web/controllers/feed.py b/src/web/controllers/feed.py index 3b05b294..b76c4e42 100644 --- a/src/web/controllers/feed.py +++ b/src/web/controllers/feed.py @@ -20,12 +20,14 @@ # along with this program. If not, see . import logging +import itertools from datetime import datetime, timedelta import conf from .abstract import AbstractController from .icon import IconController from web.models import Feed +from web.lib.utils import clear_string logger = logging.getLogger(__name__) DEFAULT_LIMIT = 5 @@ -54,6 +56,37 @@ class FeedController(AbstractController): {'last_retrieved': now}) return feeds + def get_duplicates(self, feed_id): + """ + Compare a list of documents by pair. + Pairs of duplicates are sorted by "retrieved date". + """ + feed = self.get(id=feed_id) + duplicates = [] + for pair in itertools.combinations(feed.articles, 2): + date1, date2 = pair[0].date, pair[1].date + if clear_string(pair[0].title) == clear_string(pair[1].title) \ + and (date1 - date2) < timedelta(days=1): + if pair[0].retrieved_date < pair[1].retrieved_date: + duplicates.append((pair[0], pair[1])) + else: + duplicates.append((pair[1], pair[0])) + return feed, duplicates + + def get_inactives(self, nb_days): + today = datetime.now() + inactives = [] + for feed in self.read(): + try: + last_post = feed.articles[0].date + except IndexError: + continue + elapsed = today - last_post + if elapsed > timedelta(days=nb_days): + inactives.append((feed, elapsed)) + inactives.sort(key=lambda tup: tup[1], reverse=True) + return inactives + def _ensure_icon(self, attrs): if not attrs.get('icon_url'): return -- cgit