aboutsummaryrefslogtreecommitdiff
path: root/src/web/controllers
diff options
context:
space:
mode:
Diffstat (limited to 'src/web/controllers')
-rw-r--r--src/web/controllers/article.py21
-rw-r--r--src/web/controllers/feed.py33
2 files changed, 54 insertions, 0 deletions
diff --git a/src/web/controllers/article.py b/src/web/controllers/article.py
index 3d8d5c01..72288a09 100644
--- a/src/web/controllers/article.py
+++ b/src/web/controllers/article.py
@@ -1,6 +1,8 @@
import re
import logging
+import sqlalchemy
from sqlalchemy import func
+from collections import Counter
from bootstrap import db
from .abstract import AbstractController
@@ -70,3 +72,22 @@ class ArticleController(AbstractController):
attrs['link'])
return super().create(**attrs)
+
+ def get_history(self, year=None, month=None):
+ """
+ Sort articles by year and month.
+ """
+ articles_counter = Counter()
+ articles = self.read()
+ if year is not None:
+ articles = articles.filter(
+ sqlalchemy.extract('year', Article.date) == year)
+ if month is not None:
+ articles = articles.filter(
+ sqlalchemy.extract('month', Article.date) == month)
+ for article in articles.all():
+ if year is not None:
+ articles_counter[article.date.month] += 1
+ else:
+ articles_counter[article.date.year] += 1
+ return articles_counter, articles
diff --git a/src/web/controllers/feed.py b/src/web/controllers/feed.py
index 3b05b294..b76c4e42 100644
--- a/src/web/controllers/feed.py
+++ b/src/web/controllers/feed.py
@@ -20,12 +20,14 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import logging
+import itertools
from datetime import datetime, timedelta
import conf
from .abstract import AbstractController
from .icon import IconController
from web.models import Feed
+from web.lib.utils import clear_string
logger = logging.getLogger(__name__)
DEFAULT_LIMIT = 5
@@ -54,6 +56,37 @@ class FeedController(AbstractController):
{'last_retrieved': now})
return feeds
+ def get_duplicates(self, feed_id):
+ """
+ Compare a list of documents by pair.
+ Pairs of duplicates are sorted by "retrieved date".
+ """
+ feed = self.get(id=feed_id)
+ duplicates = []
+ for pair in itertools.combinations(feed.articles, 2):
+ date1, date2 = pair[0].date, pair[1].date
+ if clear_string(pair[0].title) == clear_string(pair[1].title) \
+ and (date1 - date2) < timedelta(days=1):
+ if pair[0].retrieved_date < pair[1].retrieved_date:
+ duplicates.append((pair[0], pair[1]))
+ else:
+ duplicates.append((pair[1], pair[0]))
+ return feed, duplicates
+
+ def get_inactives(self, nb_days):
+ today = datetime.now()
+ inactives = []
+ for feed in self.read():
+ try:
+ last_post = feed.articles[0].date
+ except IndexError:
+ continue
+ elapsed = today - last_post
+ if elapsed > timedelta(days=nb_days):
+ inactives.append((feed, elapsed))
+ inactives.sort(key=lambda tup: tup[1], reverse=True)
+ return inactives
+
def _ensure_icon(self, attrs):
if not attrs.get('icon_url'):
return
bgstack15