1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
import itertools
import logging
from datetime import datetime
from datetime import timedelta
from .abstract import AbstractController
from .icon import IconController
from newspipe.bootstrap import application
from newspipe.lib.utils import clear_string
from newspipe.models import Feed
from newspipe.models import User
logger = logging.getLogger(__name__)
DEFAULT_LIMIT = 5
DEFAULT_MAX_ERROR = application.config["DEFAULT_MAX_ERROR"]
class FeedController(AbstractController):
_db_cls = Feed
def list_late(self, max_last, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT):
return [
feed
for feed in self.read(
error_count__lt=max_error, enabled=True, last_retrieved__lt=max_last
)
.join(User)
.filter(User.is_active == True) # noqa
.order_by("last_retrieved")
.limit(limit)
]
def list_fetchable(self, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT):
now = datetime.now()
max_last = now - timedelta(minutes=60)
feeds = self.list_late(max_last, max_error, limit)
if feeds:
self.update(
{"id__in": [feed.id for feed in feeds]}, {"last_retrieved": now}
)
return feeds
def get_duplicates(self, feed_id):
"""
Compare a list of documents by pair.
Pairs of duplicates are sorted by "retrieved date".
"""
feed = self.get(id=feed_id)
duplicates = []
for pair in itertools.combinations(feed.articles[:1000], 2):
date1, date2 = pair[0].date, pair[1].date
if clear_string(pair[0].title) == clear_string(pair[1].title) and (
date1 - date2
) < timedelta(days=1):
if pair[0].retrieved_date < pair[1].retrieved_date:
duplicates.append((pair[0], pair[1]))
else:
duplicates.append((pair[1], pair[0]))
return feed, duplicates
def get_inactives(self, nb_days):
today = datetime.now()
inactives = []
for feed in self.read():
try:
last_post = feed.articles[0].date
except IndexError:
continue
except Exception as e:
logger.exception(e)
continue
elapsed = today - last_post
if elapsed > timedelta(days=nb_days):
inactives.append((feed, elapsed))
inactives.sort(key=lambda tup: tup[1], reverse=True)
return inactives
def count_by_category(self, **filters):
return self._count_by(Feed.category_id, filters)
def count_by_link(self, **filters):
return self._count_by(Feed.link, filters)
def _ensure_icon(self, attrs):
if not attrs.get("icon_url"):
return
icon_contr = IconController()
if not icon_contr.read(url=attrs["icon_url"]).count():
icon_contr.create(**{"url": attrs["icon_url"]})
def create(self, **attrs):
assert "link" in attrs, "A feed must have a link."
self._ensure_icon(attrs)
return super().create(**attrs)
def update(self, filters, attrs):
from .article import ArticleController
self._ensure_icon(attrs)
# if "category_id" in attrs and attrs["category_id"] == 0:
# del attrs["category_id"]
if "category_id" in attrs:
art_contr = ArticleController(self.user_id)
for feed in self.read(**filters):
art_contr.update(
{"feed_id": feed.id}, {"category_id": attrs["category_id"]}
)
return super().update(filters, attrs)
|