aboutsummaryrefslogtreecommitdiff
path: root/newspipe/controllers/feed.py
blob: 9461a798bac156a58343bca67ed82dab6c668fe7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import itertools
import logging
from datetime import datetime
from datetime import timedelta

from .abstract import AbstractController
from .icon import IconController
from newspipe.bootstrap import application
from newspipe.lib.utils import clear_string
from newspipe.models import Feed
from newspipe.models import User

logger = logging.getLogger(__name__)
DEFAULT_LIMIT = 5
DEFAULT_MAX_ERROR = application.config["DEFAULT_MAX_ERROR"]


class FeedController(AbstractController):
    _db_cls = Feed

    def list_late(self, max_last, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT):
        return [
            feed
            for feed in self.read(
                error_count__lt=max_error, enabled=True, last_retrieved__lt=max_last
            )
            .join(User)
            .filter(User.is_active == True)  # noqa
            .order_by("last_retrieved")
            .limit(limit)
        ]

    def list_fetchable(self, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT):
        now = datetime.now()
        max_last = now - timedelta(minutes=60)
        feeds = self.list_late(max_last, max_error, limit)
        if feeds:
            self.update(
                {"id__in": [feed.id for feed in feeds]}, {"last_retrieved": now}
            )
        return feeds

    def get_duplicates(self, feed_id):
        """
        Compare a list of documents by pair.
        Pairs of duplicates are sorted by "retrieved date".
        """
        feed = self.get(id=feed_id)
        duplicates = []
        for pair in itertools.combinations(feed.articles[:1000], 2):
            date1, date2 = pair[0].date, pair[1].date
            if clear_string(pair[0].title) == clear_string(pair[1].title) and (
                date1 - date2
            ) < timedelta(days=1):
                if pair[0].retrieved_date < pair[1].retrieved_date:
                    duplicates.append((pair[0], pair[1]))
                else:
                    duplicates.append((pair[1], pair[0]))
        return feed, duplicates

    def get_inactives(self, nb_days):
        today = datetime.now()
        inactives = []
        for feed in self.read():
            try:
                last_post = feed.articles[0].date
            except IndexError:
                continue
            except Exception as e:
                logger.exception(e)
                continue
            elapsed = today - last_post
            if elapsed > timedelta(days=nb_days):
                inactives.append((feed, elapsed))
        inactives.sort(key=lambda tup: tup[1], reverse=True)
        return inactives

    def count_by_category(self, **filters):
        return self._count_by(Feed.category_id, filters)

    def count_by_link(self, **filters):
        return self._count_by(Feed.link, filters)

    def _ensure_icon(self, attrs):
        if not attrs.get("icon_url"):
            return
        icon_contr = IconController()
        if not icon_contr.read(url=attrs["icon_url"]).count():
            icon_contr.create(**{"url": attrs["icon_url"]})

    def create(self, **attrs):
        assert "link" in attrs, "A feed must have a link."
        self._ensure_icon(attrs)
        return super().create(**attrs)

    def update(self, filters, attrs):
        from .article import ArticleController

        self._ensure_icon(attrs)
        # if "category_id" in attrs and attrs["category_id"] == 0:
        #     del attrs["category_id"]
        if "category_id" in attrs:
            art_contr = ArticleController(self.user_id)
            for feed in self.read(**filters):
                art_contr.update(
                    {"feed_id": feed.id}, {"category_id": attrs["category_id"]}
                )
        return super().update(filters, attrs)
bgstack15