aboutsummaryrefslogtreecommitdiff
path: root/newspipe/web/controllers/feed.py
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2020-02-26 11:27:31 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2020-02-26 11:27:31 +0100
commit62b3afeeedfe054345f86093e2d243e956c1e3c9 (patch)
treebbd58f5c8c07f5d87b1c1cca73fa1d5af6178f48 /newspipe/web/controllers/feed.py
parentUpdated Python dependencies. (diff)
downloadnewspipe-62b3afeeedfe054345f86093e2d243e956c1e3c9.tar.gz
newspipe-62b3afeeedfe054345f86093e2d243e956c1e3c9.tar.bz2
newspipe-62b3afeeedfe054345f86093e2d243e956c1e3c9.zip
The project is now using Poetry.
Diffstat (limited to 'newspipe/web/controllers/feed.py')
-rw-r--r--newspipe/web/controllers/feed.py98
1 files changed, 98 insertions, 0 deletions
diff --git a/newspipe/web/controllers/feed.py b/newspipe/web/controllers/feed.py
new file mode 100644
index 00000000..d75cd994
--- /dev/null
+++ b/newspipe/web/controllers/feed.py
@@ -0,0 +1,98 @@
+import logging
+import itertools
+from datetime import datetime, timedelta
+
+import conf
+from .abstract import AbstractController
+from .icon import IconController
+from web.models import User, Feed
+from lib.utils import clear_string
+
+logger = logging.getLogger(__name__)
+DEFAULT_LIMIT = 5
+DEFAULT_MAX_ERROR = conf.DEFAULT_MAX_ERROR
+
+
+class FeedController(AbstractController):
+ _db_cls = Feed
+
+ def list_late(self, max_last, max_error=DEFAULT_MAX_ERROR,
+ limit=DEFAULT_LIMIT):
+ return [feed for feed in self.read(
+ error_count__lt=max_error, enabled=True,
+ last_retrieved__lt=max_last)
+ .join(User).filter(User.is_active == True)
+ .order_by('last_retrieved')
+ .limit(limit)]
+
+ def list_fetchable(self, max_error=DEFAULT_MAX_ERROR, limit=DEFAULT_LIMIT):
+ now = datetime.now()
+ max_last = now - timedelta(minutes=60)
+ feeds = self.list_late(max_last, max_error, limit)
+ if feeds:
+ self.update({'id__in': [feed.id for feed in feeds]},
+ {'last_retrieved': now})
+ return feeds
+
+ def get_duplicates(self, feed_id):
+ """
+ Compare a list of documents by pair.
+ Pairs of duplicates are sorted by "retrieved date".
+ """
+ feed = self.get(id=feed_id)
+ duplicates = []
+ for pair in itertools.combinations(feed.articles[:1000], 2):
+ date1, date2 = pair[0].date, pair[1].date
+ if clear_string(pair[0].title) == clear_string(pair[1].title) \
+ and (date1 - date2) < timedelta(days=1):
+ if pair[0].retrieved_date < pair[1].retrieved_date:
+ duplicates.append((pair[0], pair[1]))
+ else:
+ duplicates.append((pair[1], pair[0]))
+ return feed, duplicates
+
+ def get_inactives(self, nb_days):
+ today = datetime.now()
+ inactives = []
+ for feed in self.read():
+ try:
+ last_post = feed.articles[0].date
+ except IndexError:
+ continue
+ except Exception as e:
+ logger.exception(e)
+ continue
+ elapsed = today - last_post
+ if elapsed > timedelta(days=nb_days):
+ inactives.append((feed, elapsed))
+ inactives.sort(key=lambda tup: tup[1], reverse=True)
+ return inactives
+
+ def count_by_category(self, **filters):
+ return self._count_by(Feed.category_id, filters)
+
+ def count_by_link(self, **filters):
+ return self._count_by(Feed.link, filters)
+
+ def _ensure_icon(self, attrs):
+ if not attrs.get('icon_url'):
+ return
+ icon_contr = IconController()
+ if not icon_contr.read(url=attrs['icon_url']).count():
+ icon_contr.create(**{'url': attrs['icon_url']})
+
+ def create(self, **attrs):
+ self._ensure_icon(attrs)
+ return super().create(**attrs)
+
+ def update(self, filters, attrs):
+ from .article import ArticleController
+ self._ensure_icon(attrs)
+ if 'category_id' in attrs and attrs['category_id'] == 0:
+ del attrs['category_id']
+ elif 'category_id' in attrs:
+ art_contr = ArticleController(self.user_id)
+ for feed in self.read(**filters):
+ art_contr.update({'feed_id': feed.id},
+ {'category_id': attrs['category_id']})
+ return super().update(filters, attrs)
bgstack15