diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2016-11-17 08:30:06 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2016-11-17 08:30:06 +0100 |
commit | b0e987fbafaa28226c54157fb11993079c5341e2 (patch) | |
tree | 1f0cd04a505dce4680155f8bb4c7bb757984c030 | |
parent | Bugfix: should import Article in order to resolve the 'date' column for the o... (diff) | |
download | newspipe-b0e987fbafaa28226c54157fb11993079c5341e2.tar.gz newspipe-b0e987fbafaa28226c54157fb11993079c5341e2.tar.bz2 newspipe-b0e987fbafaa28226c54157fb11993079c5341e2.zip |
cleaning the mess in the libs directories
30 files changed, 217 insertions, 677 deletions
@@ -5,7 +5,7 @@ Newspipe Presentation ------------ -`Newspipe <https://github.com/Newspipe/Newspipe>`_ is a web-based news +`Newspipe <https://github.com/newspipe/newspipe>`_ is a web-based news aggregator and reader. Main features @@ -36,7 +36,7 @@ provides different ways to install Newspipe. License ------- -`Newspipe <https://github.com/Newspipe/Newspipe>`_ is under the +`Newspipe <https://github.com/newspipe/newspipe>`_ is under the `GNU Affero General Public License version 3 <https://www.gnu.org/licenses/agpl-3.0.html>`_. Contact diff --git a/src/bootstrap.py b/src/bootstrap.py index f9de381a..5af29c69 100644 --- a/src/bootstrap.py +++ b/src/bootstrap.py @@ -18,6 +18,10 @@ def set_logging(log_path=None, log_level=logging.INFO, modules=(), if conf.ON_HEROKU: log_format = '%(levelname)s %(message)s' if log_path: + if not os.path.exists(os.path.dirname(log_path)): + os.makedirs(os.path.dirname(log_path)) + if not os.path.exists(log_path): + open(log_path, 'w').close() handler = logging.FileHandler(log_path) else: handler = logging.StreamHandler() diff --git a/src/conf/conf.cfg-sample b/src/conf/conf.cfg-sample index 6fae48b5..7c4668af 100644 --- a/src/conf/conf.cfg-sample +++ b/src/conf/conf.cfg-sample @@ -9,7 +9,7 @@ platform_url = http://127.0.0.1:5000/ admin_email = security_password_salt = a secret to confirm user account token_validity_period = 3600 -log_path = ./src/web/var/newspipe.log +log_path = ./var/newspipe.log nb_worker = 5 log_level = info [database] @@ -17,9 +17,7 @@ database_url = postgres://pgsqluser:pgsqlpwd@127.0.0.1:5432/aggregator [crawler] crawling_method = classic default_max_error = 6 -user_agent = Newspipe (https://github.com/Newspipe/Newspipe) -api_login = -api_passwd = +user_agent = Newspipe (https://github.com/newspipe/newspipe) timeout = 30 resolv = true feed_refresh_interval = 120 diff --git a/src/crawler/classic_crawler.py b/src/crawler/classic_crawler.py index eb75b78f..34726a83 100644 --- a/src/crawler/classic_crawler.py +++ b/src/crawler/classic_crawler.py @@ -37,8 +37,8 @@ import conf from bootstrap import db from web.models import User from web.controllers import FeedController, ArticleController -from web.lib.feed_utils import construct_feed_from, is_parsing_ok -from web.lib.article_utils import construct_article, extract_id, \ +from lib.feed_utils import construct_feed_from, is_parsing_ok +from lib.article_utils import construct_article, extract_id, \ get_article_content logger = logging.getLogger(__name__) diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py deleted file mode 100644 index f480fe96..00000000 --- a/src/crawler/http_crawler.py +++ /dev/null @@ -1,251 +0,0 @@ -""" -Here's a sum up on how it works : - -CrawlerScheduler.run - will retreive a list of feeds to be refreshed and pass result to -CrawlerScheduler.callback - which will retreive each feed and treat result with -FeedCrawler.callback - which will interprete the result (status_code, etag) collect ids - and match them agaisnt pyagg which will cause -PyAggUpdater.callback - to create the missing entries -""" - -import time -import conf -import json -import logging -import feedparser -from datetime import datetime, timedelta -from time import strftime, gmtime -from concurrent.futures import ThreadPoolExecutor -from requests_futures.sessions import FuturesSession -from web.lib.utils import default_handler, to_hash -from web.lib.feed_utils import construct_feed_from -from web.lib.article_utils import extract_id, construct_article - -logger = logging.getLogger(__name__) -logging.captureWarnings(True) -API_ROOT = "api/v2.0/" - - -class AbstractCrawler: - - def __init__(self, auth, pool=None, session=None): - self.auth = auth - self.pool = pool or ThreadPoolExecutor(max_workers=conf.NB_WORKER) - self.session = session or FuturesSession(executor=self.pool) - self.session.verify = False - self.url = conf.PLATFORM_URL - - def query_pyagg(self, method, urn, data=None): - """A wrapper for internal call, method should be ones you can find - on requests (header, post, get, options, ...), urn the distant - resources you want to access on pyagg, and data, the data you wanna - transmit.""" - if data is None: - data = {} - method = getattr(self.session, method) - return method("%s%s%s" % (self.url, API_ROOT, urn), - auth=self.auth, data=json.dumps(data, - default=default_handler), - headers={'Content-Type': 'application/json', - 'User-Agent': conf.USER_AGENT}) - - def wait(self, max_wait=300, checks=5, wait_for=2): - checked, second_waited = 0, 0 - while True: - time.sleep(wait_for) - second_waited += wait_for - if second_waited > max_wait: - logger.warn('Exiting after %d seconds', second_waited) - break - if self.pool._work_queue.qsize(): - checked = 0 - continue - checked += 1 - if checked == checks: - break - - -class PyAggUpdater(AbstractCrawler): - - def __init__(self, feed, entries, headers, parsed_feed, - auth, pool=None, session=None): - self.feed = feed - self.entries = entries - self.headers = headers - self.parsed_feed = parsed_feed - super().__init__(auth, pool, session) - - def callback(self, response): - """Will process the result from the challenge, creating missing article - and updating the feed""" - article_created = False - if response.result().status_code != 204: - results = response.result().json() - logger.debug('%r %r - %d entries were not matched ' - 'and will be created', - self.feed['id'], self.feed['title'], len(results)) - for id_to_create in results: - article_created = True - entry = construct_article( - self.entries[tuple(sorted(id_to_create.items()))], - self.feed) - logger.info('%r %r - creating %r for %r - %r', self.feed['id'], - self.feed['title'], entry['title'], - entry['user_id'], id_to_create) - self.query_pyagg('post', 'article', entry) - - logger.debug('%r %r - updating feed etag %r last_mod %r', - self.feed['id'], self.feed['title'], - self.headers.get('etag', ''), - self.headers.get('last-modified', '')) - - up_feed = {'error_count': 0, 'last_error': None, - 'etag': self.headers.get('etag', ''), - 'last_modified': self.headers.get('last-modified', - strftime('%a, %d %b %Y %X %Z', gmtime()))} - fresh_feed = construct_feed_from(url=self.feed['link'], - fp_parsed=self.parsed_feed) - for key in ('description', 'site_link', 'icon_url'): - if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key): - up_feed[key] = fresh_feed[key] - if not self.feed.get('title'): - up_feed['title'] = fresh_feed.get('title', '') - up_feed['user_id'] = self.feed['user_id'] - # re-getting that feed earlier since new entries appeared - if article_created: - up_feed['last_retrieved'] \ - = (datetime.now() - timedelta(minutes=45)).isoformat() - - diff_keys = {key for key in up_feed - if up_feed[key] != self.feed.get(key)} - if not diff_keys: - return # no change in the feed, no update - if not article_created and diff_keys == {'last_modified', 'etag'}: - return # meaningless if no new article has been published - logger.info('%r %r - pushing feed attrs %r', - self.feed['id'], self.feed['title'], - {key: "%s -> %s" % (up_feed[key], self.feed.get(key)) - for key in up_feed if up_feed[key] != self.feed.get(key)}) - - self.query_pyagg('put', 'feed/%d' % self.feed['id'], up_feed) - - -class FeedCrawler(AbstractCrawler): - - def __init__(self, feed, auth, pool=None, session=None): - self.feed = feed - super().__init__(auth, pool, session) - - def clean_feed(self): - """Will reset the errors counters on a feed that have known errors""" - if self.feed.get('error_count') or self.feed.get('last_error'): - self.query_pyagg('put', 'feed/%d' % self.feed['id'], - {'error_count': 0, 'last_error': ''}) - - def callback(self, response): - """will fetch the feed and interprete results (304, etag) or will - challenge pyagg to compare gotten entries with existing ones""" - try: - response = response.result() - response.raise_for_status() - except Exception as error: - error_count = self.feed['error_count'] + 1 - logger.exception('%r %r - an error occured while fetching ' - 'feed; bumping error count to %r', - self.feed['id'], self.feed['title'], error_count) - future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], - {'error_count': error_count, - 'last_error': str(error), - 'user_id': self.feed['user_id']}) - return - - if response.status_code == 304: - logger.info("%r %r - feed responded with 304", - self.feed['id'], self.feed['title']) - self.clean_feed() - return - if 'etag' not in response.headers: - logger.debug('%r %r - manually generating etag', - self.feed['id'], self.feed['title']) - response.headers['etag'] = 'pyagg/"%s"' % to_hash(response.text) - if response.headers['etag'] and self.feed['etag'] \ - and response.headers['etag'] == self.feed['etag']: - if 'pyagg' in self.feed['etag']: - logger.info("%r %r - calculated hash matches (%d)", - self.feed['id'], self.feed['title'], - response.status_code) - else: - logger.info("%r %r - feed responded with same etag (%d)", - self.feed['id'], self.feed['title'], - response.status_code) - self.clean_feed() - return - else: - logger.debug('%r %r - etag mismatch %r != %r', - self.feed['id'], self.feed['title'], - response.headers['etag'], self.feed['etag']) - logger.info('%r %r - cache validation failed, challenging entries', - self.feed['id'], self.feed['title']) - - ids, entries = [], {} - parsed_response = feedparser.parse(response.content) - for entry in parsed_response['entries']: - entry_ids = extract_id(entry) - entry_ids['feed_id'] = self.feed['id'] - entry_ids['user_id'] = self.feed['user_id'] - entries[tuple(sorted(entry_ids.items()))] = entry - ids.append(entry_ids) - logger.debug('%r %r - found %d entries %r', - self.feed['id'], self.feed['title'], len(ids), ids) - future = self.query_pyagg('get', 'articles/challenge', {'ids': ids}) - updater = PyAggUpdater(self.feed, entries, response.headers, - parsed_response, - self.auth, self.pool, self.session) - future.add_done_callback(updater.callback) - - -class CrawlerScheduler(AbstractCrawler): - - def __init__(self, username, password, pool=None, session=None): - self.auth = (username, password) - super(CrawlerScheduler, self).__init__(self.auth, pool, session) - - def prepare_headers(self, feed): - """For a known feed, will construct some header dictionnary""" - headers = {'User-Agent': conf.USER_AGENT} - if feed.get('last_modified'): - headers['If-Modified-Since'] = feed['last_modified'] - if feed.get('etag') and 'pyagg' not in feed['etag']: - headers['If-None-Match'] = feed['etag'] - logger.debug('%r %r - calculated headers %r', - feed['id'], feed['title'], headers) - return headers - - def callback(self, response): - """processes feeds that need to be fetched""" - response = response.result() - response.raise_for_status() - if response.status_code == 204: - logger.debug("No feed to fetch") - return - feeds = response.json() - logger.debug('%d to fetch %r', len(feeds), feeds) - for feed in feeds: - logger.debug('%r %r - fetching resources', - feed['id'], feed['title']) - future = self.session.get(feed['link'], - headers=self.prepare_headers(feed)) - - feed_crwlr = FeedCrawler(feed, self.auth, self.pool, self.session) - future.add_done_callback(feed_crwlr.callback) - - def run(self, **kwargs): - """entry point, will retreive feeds to be fetch - and launch the whole thing""" - logger.debug('retreving fetchable feed') - future = self.query_pyagg('get', 'feeds/fetchable', kwargs) - future.add_done_callback(self.callback) diff --git a/src/tests/__init__.py b/src/lib/__init__.py index e69de29b..e69de29b 100644 --- a/src/tests/__init__.py +++ b/src/lib/__init__.py diff --git a/src/web/lib/article_utils.py b/src/lib/article_utils.py index 2c5ea8c3..49494e85 100644 --- a/src/web/lib/article_utils.py +++ b/src/lib/article_utils.py @@ -10,7 +10,7 @@ from bs4 import BeautifulSoup, SoupStrainer from requests.exceptions import MissingSchema import conf -from web.lib.utils import jarr_get +from lib.utils import jarr_get logger = logging.getLogger(__name__) PROCESSED_DATE_KEYS = {'published', 'created', 'updated'} diff --git a/src/lib/data.py b/src/lib/data.py new file mode 100644 index 00000000..d887c003 --- /dev/null +++ b/src/lib/data.py @@ -0,0 +1,162 @@ +#! /usr/bin/env python +#-*- coding: utf-8 -*- + +# Newspipe - A Web based news aggregator. +# Copyright (C) 2010-2016 Cédric Bonhomme - https://www.cedricbonhomme.org +# +# For more information : https://github.com/newspipe/newspipe +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +__author__ = "Cedric Bonhomme" +__version__ = "$Revision: 0.1 $" +__date__ = "$Date: 2016/11/17 $" +__revision__ = "$Date: 2016/11/17 $" +__copyright__ = "Copyright (c) Cedric Bonhomme" +__license__ = "AGPLv3" + +# +# This file contains the import/export functions of Newspipe. +# + +import json +import opml +import datetime +from flask import jsonify + +from bootstrap import db +from web.models import User, Feed, Article + + +def import_opml(email, opml_content): + """ + Import new feeds from an OPML file. + """ + user = User.query.filter(User.email == email).first() + try: + subscriptions = opml.from_string(opml_content) + except: + logger.exception("Parsing OPML file failed:") + raise + + def read(subsubscription, nb=0): + """ + Parse recursively through the categories and sub-categories. + """ + for subscription in subsubscription: + if len(subscription) != 0: + nb = read(subscription, nb) + else: + try: + title = subscription.text + except: + title = "" + try: + description = subscription.description + except: + description = "" + try: + link = subscription.xmlUrl + except: + continue + if None != Feed.query.filter(Feed.user_id == user.id, Feed.link == link).first(): + continue + try: + site_link = subscription.htmlUrl + except: + site_link = "" + new_feed = Feed(title=title, description=description, + link=link, site_link=site_link, + enabled=True) + user.feeds.append(new_feed) + nb += 1 + return nb + nb = read(subscriptions) + db.session.commit() + return nb + + +def import_json(email, json_content): + """ + Import an account from a JSON file. + """ + user = User.query.filter(User.email == email).first() + json_account = json.loads(json_content.decode("utf-8")) + nb_feeds, nb_articles = 0, 0 + # Create feeds: + for feed in json_account["result"]: + if None != Feed.query.filter(Feed.user_id == user.id, + Feed.link == feed["link"]).first(): + continue + new_feed = Feed(title=feed["title"], + description="", + link=feed["link"], + site_link=feed["site_link"], + created_date=datetime.datetime. + fromtimestamp(int(feed["created_date"])), + enabled=feed["enabled"]) + user.feeds.append(new_feed) + nb_feeds += 1 + db.session.commit() + # Create articles: + for feed in json_account["result"]: + user_feed = Feed.query.filter(Feed.user_id == user.id, + Feed.link == feed["link"]).first() + if None != user_feed: + for article in feed["articles"]: + if None == Article.query.filter(Article.user_id == user.id, + Article.feed_id == user_feed.id, + Article.link == article["link"]).first(): + new_article = Article(entry_id=article["link"], + link=article["link"], + title=article["title"], + content=article["content"], + readed=article["readed"], + like=article["like"], + retrieved_date=datetime.datetime. + fromtimestamp(int(article["retrieved_date"])), + date=datetime.datetime. + fromtimestamp(int(article["date"])), + user_id=user.id, + feed_id=user_feed.id) + user_feed.articles.append(new_article) + nb_articles += 1 + db.session.commit() + return nb_feeds, nb_articles + + +def export_json(user): + """ + Export all articles of user in JSON. + """ + result = [] + for feed in user.feeds: + result.append({ + "title": feed.title, + "description": feed.description, + "link": feed.link, + "site_link": feed.site_link, + "enabled": feed.enabled, + "created_date": feed.created_date.strftime('%s'), + "articles": [ { + "title": article.title, + "link": article.link, + "content": article.content, + "readed": article.readed, + "like": article.like, + "date": article.date.strftime('%s'), + "retrieved_date": article.retrieved_date.strftime('%s') + } for article in feed.articles ] + }) + return jsonify(result=result) diff --git a/src/web/lib/feed_utils.py b/src/lib/feed_utils.py index ef5d4f08..492391aa 100644 --- a/src/web/lib/feed_utils.py +++ b/src/lib/feed_utils.py @@ -6,7 +6,7 @@ import feedparser from conf import CRAWLER_USER_AGENT from bs4 import BeautifulSoup, SoupStrainer -from web.lib.utils import try_keys, try_get_icon_url, rebuild_url +from lib.utils import try_keys, try_get_icon_url, rebuild_url logger = logging.getLogger(__name__) logging.captureWarnings(True) diff --git a/src/web/lib/misc_utils.py b/src/lib/misc_utils.py index 6a0e00ec..d594c01e 100755 --- a/src/web/lib/misc_utils.py +++ b/src/lib/misc_utils.py @@ -4,7 +4,7 @@ # Newspipe - A Web based news aggregator. # Copyright (C) 2010-2016 Cédric Bonhomme - https://www.cedricbonhomme.org # -# For more information : https://github.com/Newspipe/Newspipe +# For more information : https://github.com/newspipe/newspipe # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -20,27 +20,18 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. __author__ = "Cedric Bonhomme" -__version__ = "$Revision: 1.8 $" +__version__ = "$Revision: 1.9 $" __date__ = "$Date: 2010/12/07 $" -__revision__ = "$Date: 2016/04/10 $" +__revision__ = "$Date: 2016/01/17 $" __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "AGPLv3" -# -# This file provides functions used for: -# - import from a JSON file; -# - generation of tags cloud; -# - HTML processing. -# - import re import os import sys import glob -import opml import json import logging -import datetime import operator import urllib import subprocess @@ -49,21 +40,19 @@ try: from urlparse import urlparse, parse_qs, urlunparse except: from urllib.parse import urlparse, parse_qs, urlunparse, urljoin -from bs4 import BeautifulSoup from collections import Counter from contextlib import contextmanager from flask import request import conf -from bootstrap import db -from web import controllers -from web.models import User, Feed, Article -from web.lib.utils import clear_string +from web.controllers import ArticleController +from lib.utils import clear_string logger = logging.getLogger(__name__) ALLOWED_EXTENSIONS = set(['xml', 'opml', 'json']) + def is_safe_url(target): """ Ensures that a redirect target will lead to the same server. @@ -73,6 +62,7 @@ def is_safe_url(target): return test_url.scheme in ('http', 'https') and \ ref_url.netloc == test_url.netloc + def get_redirect_target(): """ Looks at various hints to find the redirect target. @@ -83,6 +73,7 @@ def get_redirect_target(): if is_safe_url(target): return target + def allowed_file(filename): """ Check if the uploaded file is allowed. @@ -90,6 +81,7 @@ def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS + @contextmanager def opened_w_error(filename, mode="r"): try: @@ -102,6 +94,7 @@ def opened_w_error(filename, mode="r"): finally: f.close() + def fetch(id, feed_id=None): """ Fetch the feeds in a new processus. @@ -113,16 +106,17 @@ def fetch(id, feed_id=None): cmd.append('--feed_id='+str(feed_id)) return subprocess.Popen(cmd, stdout=subprocess.PIPE) + def history(user_id, year=None, month=None): """ Sort articles by year and month. """ articles_counter = Counter() - articles = controllers.ArticleController(user_id).read() + articles = ArticleController(user_id).read() if None != year: - articles = articles.filter(sqlalchemy.extract('year', Article.date) == year) + articles = articles.filter(sqlalchemy.extract('year', 'Article.date') == year) if None != month: - articles = articles.filter(sqlalchemy.extract('month', Article.date) == month) + articles = articles.filter(sqlalchemy.extract('month', 'Article.date') == month) for article in articles.all(): if None != year: articles_counter[article.date.month] += 1 @@ -130,100 +124,6 @@ def history(user_id, year=None, month=None): articles_counter[article.date.year] += 1 return articles_counter, articles -def import_opml(email, opml_content): - """ - Import new feeds from an OPML file. - """ - user = User.query.filter(User.email == email).first() - try: - subscriptions = opml.from_string(opml_content) - except: - logger.exception("Parsing OPML file failed:") - raise - - def read(subsubscription, nb=0): - """ - Parse recursively through the categories and sub-categories. - """ - for subscription in subsubscription: - if len(subscription) != 0: - nb = read(subscription, nb) - else: - try: - title = subscription.text - except: - title = "" - try: - description = subscription.description - except: - description = "" - try: - link = subscription.xmlUrl - except: - continue - if None != Feed.query.filter(Feed.user_id == user.id, Feed.link == link).first(): - continue - try: - site_link = subscription.htmlUrl - except: - site_link = "" - new_feed = Feed(title=title, description=description, - link=link, site_link=site_link, - enabled=True) - user.feeds.append(new_feed) - nb += 1 - return nb - nb = read(subscriptions) - db.session.commit() - return nb - -def import_json(email, json_content): - """ - Import an account from a JSON file. - """ - user = User.query.filter(User.email == email).first() - json_account = json.loads(json_content.decode("utf-8")) - nb_feeds, nb_articles = 0, 0 - # Create feeds: - for feed in json_account["result"]: - if None != Feed.query.filter(Feed.user_id == user.id, - Feed.link == feed["link"]).first(): - continue - new_feed = Feed(title=feed["title"], - description="", - link=feed["link"], - site_link=feed["site_link"], - created_date=datetime.datetime. - fromtimestamp(int(feed["created_date"])), - enabled=feed["enabled"]) - user.feeds.append(new_feed) - nb_feeds += 1 - db.session.commit() - # Create articles: - for feed in json_account["result"]: - user_feed = Feed.query.filter(Feed.user_id == user.id, - Feed.link == feed["link"]).first() - if None != user_feed: - for article in feed["articles"]: - if None == Article.query.filter(Article.user_id == user.id, - Article.feed_id == user_feed.id, - Article.link == article["link"]).first(): - new_article = Article(entry_id=article["link"], - link=article["link"], - title=article["title"], - content=article["content"], - readed=article["readed"], - like=article["like"], - retrieved_date=datetime.datetime. - fromtimestamp(int(article["retrieved_date"])), - date=datetime.datetime. - fromtimestamp(int(article["date"])), - user_id=user.id, - feed_id=user_feed.id) - user_feed.articles.append(new_article) - nb_articles += 1 - db.session.commit() - return nb_feeds, nb_articles def clean_url(url): """ @@ -242,6 +142,7 @@ def clean_url(url): parsed_url.fragment ]).rstrip('=') + def load_stop_words(): """ Load the stop words and return them in a list. @@ -258,6 +159,7 @@ def load_stop_words(): stop_words += stop_wods_file.read().split(";") return stop_words + def top_words(articles, n=10, size=5): """ Return the n most frequent words in a list. @@ -272,15 +174,12 @@ def top_words(articles, n=10, size=5): words[word] += 1 return words.most_common(n) + def tag_cloud(tags): """ Generates a tags cloud. """ tags.sort(key=operator.itemgetter(0)) return '\n'.join([('<font size=%d>%s</font>' % \ - (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), word)) \ - for (word, count) in tags]) - -if __name__ == "__main__": - import_opml("root@newspipe.localhost", "./var/feeds_test.opml") - #import_opml("root@newspipe.localhost", "./var/Newspipe.opml") + (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), word)) \ + for (word, count) in tags]) diff --git a/src/web/lib/utils.py b/src/lib/utils.py index d206b769..d206b769 100644 --- a/src/web/lib/utils.py +++ b/src/lib/utils.py diff --git a/src/tests/base.py b/src/tests/base.py deleted file mode 100644 index d6f62583..00000000 --- a/src/tests/base.py +++ /dev/null @@ -1,41 +0,0 @@ -import os -os.environ['PYAGG_TESTING'] = 'true' - -import unittest -from bootstrap import db -import runserver -from tests.fixtures import populate_db, reset_db -from werkzeug.exceptions import NotFound - - -class BasePyaggTest(unittest.TestCase): - _contr_cls = None - - def _get_from_contr(self, obj_id, user_id=None): - return self._contr_cls(user_id).get(id=obj_id).dump() - - def _test_controller_rights(self, obj, user_id): - obj_id = obj['id'] - self.assertEquals(obj, self._get_from_contr(obj_id)) - self.assertEquals(obj, self._get_from_contr(obj_id, user_id)) - # fetching non existent object - self.assertRaises(NotFound, self._get_from_contr, 99, user_id) - # fetching object with inexistent user - self.assertRaises(NotFound, self._get_from_contr, obj_id, 99) - # fetching object with wrong user - self.assertRaises(NotFound, self._get_from_contr, obj_id, user_id + 1) - self.assertRaises(NotFound, self._contr_cls().delete, 99) - self.assertRaises(NotFound, self._contr_cls(user_id).delete, 99) - self.assertEquals(obj['id'], - self._contr_cls(user_id).delete(obj_id).id) - self.assertRaises(NotFound, self._contr_cls(user_id).delete, obj_id) - - def setUp(self): - populate_db(db) - - def tearDown(self): - reset_db(db) - - -if __name__ == '__main__': - unittest.main() diff --git a/src/tests/controllers/__init__.py b/src/tests/controllers/__init__.py deleted file mode 100644 index 26922c43..00000000 --- a/src/tests/controllers/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from tests.controllers.feed import FeedControllerTest -from tests.controllers.article import ArticleControllerTest - - -__all__ = ['FeedControllerTest', 'ArticleControllerTest'] diff --git a/src/tests/controllers/article.py b/src/tests/controllers/article.py deleted file mode 100644 index a62d1a83..00000000 --- a/src/tests/controllers/article.py +++ /dev/null @@ -1,117 +0,0 @@ -from tests.base import BasePyaggTest -from web.controllers import ArticleController -from web.controllers import FeedController - - -class ArticleControllerTest(BasePyaggTest): - _contr_cls = ArticleController - - def test_article_rights(self): - article = ArticleController(2).read()[0].dump() - self.assertFalse(article['readed']) - article['readed'] = True # article get read when retreived through get - self._test_controller_rights(article, article['user_id']) - - def test_article_challange_method(self): - self.assertEquals(0, len(list(ArticleController().challenge( - [{'id': art.id} for art in ArticleController(3).read()])))) - self.assertEquals(9, len(list(ArticleController(2).challenge( - [{'id': art.id} for art in ArticleController(3).read()])))) - self.assertEquals(9, len(list(ArticleController(2).challenge( - [{'entry_id': art.id} for art in ArticleController(3).read()] - )))) - - def test_article_get_unread(self): - self.assertEquals({1: 3, 2: 3, 3: 3}, - ArticleController(2).count_by_feed(readed=False)) - self.assertEquals({4: 3, 5: 3, 6: 3}, - ArticleController(3).count_by_feed(readed=False)) - - def test_create_using_filters(self): - feed_ctr = FeedController(2) - feed1 = feed_ctr.read()[0].dump() - feed2 = feed_ctr.read()[1].dump() - feed3 = feed_ctr.read()[2].dump() - feed_ctr.update({'id': feed1['id']}, - {'filters': [{"type": "simple match", - "pattern": "no see pattern", - "action on": "match", - "action": "mark as read"}]}) - feed_ctr.update({'id': feed3['id']}, - {'filters': [{"type": "regex", - "pattern": ".*(pattern1|pattern2).*", - "action on": "no match", - "action": "mark as favorite"}, - {"type": "simple match", - "pattern": "no see pattern", - "action on": "match", - "action": "mark as read"}]}) - art1 = ArticleController(2).create( - entry_id="thisisnotatest", - feed_id=feed1['id'], - title="garbage no see pattern garbage", - content="doesn't matter", - link="doesn't matter either") - art2 = ArticleController(2).create( - entry_id="thisisnotatesteither", - feed_id=feed1['id'], - title="garbage see pattern garbage", - content="doesn't matter2", - link="doesn't matter either2") - - art3 = ArticleController(2).create( - entry_id="thisisnotatest", - user_id=2, - feed_id=feed2['id'], - title="garbage no see pattern garbage", - content="doesn't matter", - link="doesn't matter either") - art4 = ArticleController(2).create( - entry_id="thisisnotatesteither", - user_id=2, - feed_id=feed2['id'], - title="garbage see pattern garbage", - content="doesn't matter2", - link="doesn't matter either2") - - art5 = ArticleController(2).create( - entry_id="thisisnotatest", - feed_id=feed3['id'], - title="garbage pattern1 garbage", - content="doesn't matter", - link="doesn't matter either") - art6 = ArticleController(2).create( - entry_id="thisisnotatesteither", - feed_id=feed3['id'], - title="garbage pattern2 garbage", - content="doesn't matter2", - link="doesn't matter either2") - art7 = ArticleController(2).create( - entry_id="thisisnotatesteither", - feed_id=feed3['id'], - title="garbage no see pattern3 garbage", - content="doesn't matter3", - link="doesn't matter either3") - art8 = ArticleController(2).create( - entry_id="thisisnotatesteither", - feed_id=feed3['id'], - title="garbage pattern4 garbage", - content="doesn't matter4", - link="doesn't matter either4") - - self.assertTrue(art1.readed) - self.assertFalse(art1.like) - self.assertFalse(art2.readed) - self.assertFalse(art2.like) - self.assertFalse(art3.readed) - self.assertFalse(art3.like) - self.assertFalse(art4.readed) - self.assertFalse(art4.like) - self.assertFalse(art5.readed) - self.assertFalse(art5.like) - self.assertFalse(art6.readed) - self.assertFalse(art6.like) - self.assertTrue(art7.readed) - self.assertTrue(art7.like) - self.assertFalse(art8.readed) - self.assertTrue(art8.like) diff --git a/src/tests/controllers/feed.py b/src/tests/controllers/feed.py deleted file mode 100644 index 7dd77295..00000000 --- a/src/tests/controllers/feed.py +++ /dev/null @@ -1,27 +0,0 @@ -from tests.base import BasePyaggTest -from web.controllers import FeedController -from web.controllers import ArticleController - - -class FeedControllerTest(BasePyaggTest): - _contr_cls = FeedController - - def test_feed_rights(self): - feed = FeedController(2).read()[0].dump() - self.assertTrue(3, - ArticleController().read(feed_id=feed['id']).count()) - self._test_controller_rights(feed, feed['user_id']) - # checking articles are deleted after the feed has been deleted - - def test_feed_article_deletion(self): - feed_ctr = FeedController(2) - feed = feed_ctr.read()[0].dump() - feed_ctr.delete(feed['id']) - self.assertFalse(0, - ArticleController().read(feed_id=feed['id']).count()) - - def test_feed_list_fetchable(self): - self.assertEquals(3, len(FeedController(3).list_fetchable())) - self.assertEquals(0, len(FeedController(3).list_fetchable())) - self.assertEquals(3, len(FeedController().list_fetchable())) - self.assertEquals(0, len(FeedController().list_fetchable())) diff --git a/src/tests/fixtures.py b/src/tests/fixtures.py deleted file mode 100644 index 16a9cb81..00000000 --- a/src/tests/fixtures.py +++ /dev/null @@ -1,31 +0,0 @@ -from web.models import db_create, db_empty, User, Article, Feed - - -def populate_db(db): - role_admin, role_user = db_create(db) - user1, user2 = [User(nickname=name, email="%s@test.te" % name, - pwdhash=name, roles=[role_user], enabled=True) - for name in ["user1", "user2"]] - db.session.add(user1) - db.session.add(user2) - db.session.commit() - - for user in (user1, user2): - for feed_name in ['feed1', 'feed2', 'feed3']: - feed = Feed(link=feed_name, user_id=user.id, - title="%r %r" % (user.nickname, feed_name)) - db.session.add(feed) - db.session.commit() - for article in ['article1', 'article2', 'article3']: - entry = "%s %s %s" % (user.nickname, feed.title, article) - article = Article(entry_id=entry, link=article, - feed_id=feed.id, user_id=user.id, - title=entry, content=article) - db.session.add(article) - db.session.commit() - - db.session.commit() - - -def reset_db(db): - db_empty(db) diff --git a/src/web/controllers/article.py b/src/web/controllers/article.py index 4607b225..d7058229 100644 --- a/src/web/controllers/article.py +++ b/src/web/controllers/article.py @@ -6,7 +6,7 @@ from collections import Counter from bootstrap import db from .abstract import AbstractController -from web.lib.article_utils import process_filters +from lib.article_utils import process_filters from web.controllers import CategoryController, FeedController from web.models import Article diff --git a/src/web/controllers/feed.py b/src/web/controllers/feed.py index 7203c37e..a77fd926 100644 --- a/src/web/controllers/feed.py +++ b/src/web/controllers/feed.py @@ -6,7 +6,7 @@ import conf from .abstract import AbstractController from .icon import IconController from web.models import User, Feed -from web.lib.utils import clear_string +from lib.utils import clear_string logger = logging.getLogger(__name__) DEFAULT_LIMIT = 5 diff --git a/src/web/export.py b/src/web/export.py deleted file mode 100644 index 98473c9e..00000000 --- a/src/web/export.py +++ /dev/null @@ -1,58 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -# Newspipe - A Web based news aggregator. -# Copyright (C) 2010-2016 Cédric Bonhomme - https://www.cedricbonhomme.org -# -# For more information : https://github.com/Newspipe/Newspipe -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -__author__ = "Cedric Bonhomme" -__version__ = "$Revision: 0.7 $" -__date__ = "$Date: 2011/10/24 $" -__revision__ = "$Date: 2016/10/06 $" -__copyright__ = "Copyright (c) Cedric Bonhomme" -__license__ = "AGPLv3" - -# -# This file contains the export functions of newspipe. -# - -from flask import jsonify - -def export_json(user): - """ - Export all articles of user in JSON. - """ - result = [] - for feed in user.feeds: - result.append({ - "title": feed.title, - "description": feed.description, - "link": feed.link, - "site_link": feed.site_link, - "enabled": feed.enabled, - "created_date": feed.created_date.strftime('%s'), - "articles": [ { - "title": article.title, - "link": article.link, - "content": article.content, - "readed": article.readed, - "like": article.like, - "date": article.date.strftime('%s'), - "retrieved_date": article.retrieved_date.strftime('%s') - } for article in feed.articles ] - }) - return jsonify(result=result) diff --git a/src/web/forms.py b/src/web/forms.py index be1650d8..8088f27b 100644 --- a/src/web/forms.py +++ b/src/web/forms.py @@ -34,7 +34,7 @@ from wtforms import TextField, TextAreaField, PasswordField, BooleanField, \ SubmitField, IntegerField, SelectField, validators, HiddenField from wtforms.fields.html5 import EmailField, URLField -from web.lib import misc_utils +from lib import misc_utils from web.controllers import UserController from web.models import User diff --git a/src/web/lib/view_utils.py b/src/web/lib/view_utils.py index d4c119da..1d8c6aed 100644 --- a/src/web/lib/view_utils.py +++ b/src/web/lib/view_utils.py @@ -1,6 +1,6 @@ from functools import wraps from flask import request, Response, make_response -from web.lib.utils import to_hash +from lib.utils import to_hash def etag_match(func): diff --git a/src/web/models/category.py b/src/web/models/category.py index 15b616bf..2da7809a 100644 --- a/src/web/models/category.py +++ b/src/web/models/category.py @@ -1,3 +1,6 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + from bootstrap import db from sqlalchemy import Index from web.models.right_mixin import RightMixin @@ -10,7 +13,7 @@ class Category(db.Model, RightMixin): # relationships user_id = db.Column(db.Integer, db.ForeignKey('user.id')) feeds = db.relationship('Feed', cascade='all,delete-orphan') - articles = db.relationship('Article', + articles = db.relationship('Article', cascade='all,delete-orphan') # index diff --git a/src/web/models/icon.py b/src/web/models/icon.py index 22ef1164..adc9cf69 100644 --- a/src/web/models/icon.py +++ b/src/web/models/icon.py @@ -1,3 +1,6 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + from bootstrap import db diff --git a/src/web/views/admin.py b/src/web/views/admin.py index a9e1e43d..4de4009a 100644 --- a/src/web/views/admin.py +++ b/src/web/views/admin.py @@ -4,8 +4,8 @@ from flask_babel import gettext, format_timedelta from flask_login import login_required, current_user from werkzeug import generate_password_hash +from lib.utils import redirect_url from web.views.common import admin_permission -from web.lib.utils import redirect_url from web.controllers import UserController from web.forms import InformationMessageForm, UserForm diff --git a/src/web/views/article.py b/src/web/views/article.py index 283ef001..640de8b4 100644 --- a/src/web/views/article.py +++ b/src/web/views/article.py @@ -7,8 +7,8 @@ from flask_login import login_required, current_user from bootstrap import db -from web.export import export_json -from web.lib.utils import clear_string, redirect_url +from lib.utils import clear_string, redirect_url +from lib.data import export_json from web.controllers import (ArticleController, UserController, CategoryController) from web.lib.view_utils import etag_match diff --git a/src/web/views/category.py b/src/web/views/category.py index 1a81a5c4..2bdcf9cc 100644 --- a/src/web/views/category.py +++ b/src/web/views/category.py @@ -3,7 +3,7 @@ from flask_babel import gettext from flask_login import login_required, current_user from web.forms import CategoryForm -from web.lib.utils import redirect_url +from lib.utils import redirect_url from web.lib.view_utils import etag_match from web.controllers import ArticleController, FeedController, \ CategoryController diff --git a/src/web/views/common.py b/src/web/views/common.py index f9613c01..e422fd57 100644 --- a/src/web/views/common.py +++ b/src/web/views/common.py @@ -6,7 +6,7 @@ from flask_login import login_user from flask_principal import (Identity, Permission, RoleNeed, session_identity_loader, identity_changed) from web.controllers import UserController -from web.lib.utils import default_handler +from lib.utils import default_handler admin_role = RoleNeed('admin') api_role = RoleNeed('api') diff --git a/src/web/views/feed.py b/src/web/views/feed.py index 3edb942e..fa5cfc77 100644 --- a/src/web/views/feed.py +++ b/src/web/views/feed.py @@ -10,9 +10,9 @@ from flask_babel import gettext from flask_login import login_required, current_user import conf -from web.lib import misc_utils, utils +from lib import misc_utils, utils +from lib.feed_utils import construct_feed_from from web.lib.view_utils import etag_match -from web.lib.feed_utils import construct_feed_from from web.forms import AddFeedForm from web.controllers import (CategoryController, FeedController, ArticleController) diff --git a/src/web/views/home.py b/src/web/views/home.py index 179f3f9d..5274dc12 100644 --- a/src/web/views/home.py +++ b/src/web/views/home.py @@ -9,8 +9,8 @@ from flask_babel import gettext, get_locale from babel.dates import format_datetime, format_timedelta import conf -from web.lib.utils import redirect_url -from web.lib import misc_utils +from lib.utils import redirect_url +from lib import misc_utils from web.lib.view_utils import etag_match from web.views.common import jsonify diff --git a/src/web/views/user.py b/src/web/views/user.py index 91cf7e4a..58c23dd2 100644 --- a/src/web/views/user.py +++ b/src/web/views/user.py @@ -8,7 +8,8 @@ from flask_login import login_required, current_user import conf from notifications import notifications -from web.lib import misc_utils +from lib import misc_utils +from lib.data import import_opml, import_json from web.lib.user_utils import confirm_token from web.controllers import (UserController, FeedController, ArticleController, CategoryController) @@ -59,7 +60,7 @@ def management(): flash(gettext('File not allowed.'), 'danger') else: try: - nb = misc_utils.import_opml(current_user.email, data.read()) + nb = import_opml(current_user.email, data.read()) if conf.CRAWLING_METHOD == "classic": misc_utils.fetch(current_user.email, None) flash(str(nb) + ' ' + gettext('feeds imported.'), @@ -75,7 +76,7 @@ def management(): flash(gettext('File not allowed.'), 'danger') else: try: - nb = misc_utils.import_json(current_user.email, data.read()) + nb = import_json(current_user.email, data.read()) flash(gettext('Account imported.'), "success") except: flash(gettext("Impossible to import the account."), |