aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2016-11-17 08:30:06 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2016-11-17 08:30:06 +0100
commitb0e987fbafaa28226c54157fb11993079c5341e2 (patch)
tree1f0cd04a505dce4680155f8bb4c7bb757984c030 /src
parentBugfix: should import Article in order to resolve the 'date' column for the o... (diff)
downloadnewspipe-b0e987fbafaa28226c54157fb11993079c5341e2.tar.gz
newspipe-b0e987fbafaa28226c54157fb11993079c5341e2.tar.bz2
newspipe-b0e987fbafaa28226c54157fb11993079c5341e2.zip
cleaning the mess in the libs directories
Diffstat (limited to 'src')
-rw-r--r--src/bootstrap.py4
-rw-r--r--src/conf/conf.cfg-sample6
-rw-r--r--src/crawler/classic_crawler.py4
-rw-r--r--src/crawler/http_crawler.py251
-rw-r--r--src/lib/__init__.py (renamed from src/tests/__init__.py)0
-rw-r--r--src/lib/article_utils.py (renamed from src/web/lib/article_utils.py)2
-rw-r--r--src/lib/data.py162
-rw-r--r--src/lib/feed_utils.py (renamed from src/web/lib/feed_utils.py)2
-rwxr-xr-xsrc/lib/misc_utils.py (renamed from src/web/lib/misc_utils.py)139
-rw-r--r--src/lib/utils.py (renamed from src/web/lib/utils.py)0
-rw-r--r--src/tests/base.py41
-rw-r--r--src/tests/controllers/__init__.py5
-rw-r--r--src/tests/controllers/article.py117
-rw-r--r--src/tests/controllers/feed.py27
-rw-r--r--src/tests/fixtures.py31
-rw-r--r--src/web/controllers/article.py2
-rw-r--r--src/web/controllers/feed.py2
-rw-r--r--src/web/export.py58
-rw-r--r--src/web/forms.py2
-rw-r--r--src/web/lib/view_utils.py2
-rw-r--r--src/web/models/category.py5
-rw-r--r--src/web/models/icon.py3
-rw-r--r--src/web/views/admin.py2
-rw-r--r--src/web/views/article.py4
-rw-r--r--src/web/views/category.py2
-rw-r--r--src/web/views/common.py2
-rw-r--r--src/web/views/feed.py4
-rw-r--r--src/web/views/home.py4
-rw-r--r--src/web/views/user.py7
29 files changed, 215 insertions, 675 deletions
diff --git a/src/bootstrap.py b/src/bootstrap.py
index f9de381a..5af29c69 100644
--- a/src/bootstrap.py
+++ b/src/bootstrap.py
@@ -18,6 +18,10 @@ def set_logging(log_path=None, log_level=logging.INFO, modules=(),
if conf.ON_HEROKU:
log_format = '%(levelname)s %(message)s'
if log_path:
+ if not os.path.exists(os.path.dirname(log_path)):
+ os.makedirs(os.path.dirname(log_path))
+ if not os.path.exists(log_path):
+ open(log_path, 'w').close()
handler = logging.FileHandler(log_path)
else:
handler = logging.StreamHandler()
diff --git a/src/conf/conf.cfg-sample b/src/conf/conf.cfg-sample
index 6fae48b5..7c4668af 100644
--- a/src/conf/conf.cfg-sample
+++ b/src/conf/conf.cfg-sample
@@ -9,7 +9,7 @@ platform_url = http://127.0.0.1:5000/
admin_email =
security_password_salt = a secret to confirm user account
token_validity_period = 3600
-log_path = ./src/web/var/newspipe.log
+log_path = ./var/newspipe.log
nb_worker = 5
log_level = info
[database]
@@ -17,9 +17,7 @@ database_url = postgres://pgsqluser:pgsqlpwd@127.0.0.1:5432/aggregator
[crawler]
crawling_method = classic
default_max_error = 6
-user_agent = Newspipe (https://github.com/Newspipe/Newspipe)
-api_login =
-api_passwd =
+user_agent = Newspipe (https://github.com/newspipe/newspipe)
timeout = 30
resolv = true
feed_refresh_interval = 120
diff --git a/src/crawler/classic_crawler.py b/src/crawler/classic_crawler.py
index eb75b78f..34726a83 100644
--- a/src/crawler/classic_crawler.py
+++ b/src/crawler/classic_crawler.py
@@ -37,8 +37,8 @@ import conf
from bootstrap import db
from web.models import User
from web.controllers import FeedController, ArticleController
-from web.lib.feed_utils import construct_feed_from, is_parsing_ok
-from web.lib.article_utils import construct_article, extract_id, \
+from lib.feed_utils import construct_feed_from, is_parsing_ok
+from lib.article_utils import construct_article, extract_id, \
get_article_content
logger = logging.getLogger(__name__)
diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py
deleted file mode 100644
index f480fe96..00000000
--- a/src/crawler/http_crawler.py
+++ /dev/null
@@ -1,251 +0,0 @@
-"""
-Here's a sum up on how it works :
-
-CrawlerScheduler.run
- will retreive a list of feeds to be refreshed and pass result to
-CrawlerScheduler.callback
- which will retreive each feed and treat result with
-FeedCrawler.callback
- which will interprete the result (status_code, etag) collect ids
- and match them agaisnt pyagg which will cause
-PyAggUpdater.callback
- to create the missing entries
-"""
-
-import time
-import conf
-import json
-import logging
-import feedparser
-from datetime import datetime, timedelta
-from time import strftime, gmtime
-from concurrent.futures import ThreadPoolExecutor
-from requests_futures.sessions import FuturesSession
-from web.lib.utils import default_handler, to_hash
-from web.lib.feed_utils import construct_feed_from
-from web.lib.article_utils import extract_id, construct_article
-
-logger = logging.getLogger(__name__)
-logging.captureWarnings(True)
-API_ROOT = "api/v2.0/"
-
-
-class AbstractCrawler:
-
- def __init__(self, auth, pool=None, session=None):
- self.auth = auth
- self.pool = pool or ThreadPoolExecutor(max_workers=conf.NB_WORKER)
- self.session = session or FuturesSession(executor=self.pool)
- self.session.verify = False
- self.url = conf.PLATFORM_URL
-
- def query_pyagg(self, method, urn, data=None):
- """A wrapper for internal call, method should be ones you can find
- on requests (header, post, get, options, ...), urn the distant
- resources you want to access on pyagg, and data, the data you wanna
- transmit."""
- if data is None:
- data = {}
- method = getattr(self.session, method)
- return method("%s%s%s" % (self.url, API_ROOT, urn),
- auth=self.auth, data=json.dumps(data,
- default=default_handler),
- headers={'Content-Type': 'application/json',
- 'User-Agent': conf.USER_AGENT})
-
- def wait(self, max_wait=300, checks=5, wait_for=2):
- checked, second_waited = 0, 0
- while True:
- time.sleep(wait_for)
- second_waited += wait_for
- if second_waited > max_wait:
- logger.warn('Exiting after %d seconds', second_waited)
- break
- if self.pool._work_queue.qsize():
- checked = 0
- continue
- checked += 1
- if checked == checks:
- break
-
-
-class PyAggUpdater(AbstractCrawler):
-
- def __init__(self, feed, entries, headers, parsed_feed,
- auth, pool=None, session=None):
- self.feed = feed
- self.entries = entries
- self.headers = headers
- self.parsed_feed = parsed_feed
- super().__init__(auth, pool, session)
-
- def callback(self, response):
- """Will process the result from the challenge, creating missing article
- and updating the feed"""
- article_created = False
- if response.result().status_code != 204:
- results = response.result().json()
- logger.debug('%r %r - %d entries were not matched '
- 'and will be created',
- self.feed['id'], self.feed['title'], len(results))
- for id_to_create in results:
- article_created = True
- entry = construct_article(
- self.entries[tuple(sorted(id_to_create.items()))],
- self.feed)
- logger.info('%r %r - creating %r for %r - %r', self.feed['id'],
- self.feed['title'], entry['title'],
- entry['user_id'], id_to_create)
- self.query_pyagg('post', 'article', entry)
-
- logger.debug('%r %r - updating feed etag %r last_mod %r',
- self.feed['id'], self.feed['title'],
- self.headers.get('etag', ''),
- self.headers.get('last-modified', ''))
-
- up_feed = {'error_count': 0, 'last_error': None,
- 'etag': self.headers.get('etag', ''),
- 'last_modified': self.headers.get('last-modified',
- strftime('%a, %d %b %Y %X %Z', gmtime()))}
- fresh_feed = construct_feed_from(url=self.feed['link'],
- fp_parsed=self.parsed_feed)
- for key in ('description', 'site_link', 'icon_url'):
- if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key):
- up_feed[key] = fresh_feed[key]
- if not self.feed.get('title'):
- up_feed['title'] = fresh_feed.get('title', '')
- up_feed['user_id'] = self.feed['user_id']
- # re-getting that feed earlier since new entries appeared
- if article_created:
- up_feed['last_retrieved'] \
- = (datetime.now() - timedelta(minutes=45)).isoformat()
-
- diff_keys = {key for key in up_feed
- if up_feed[key] != self.feed.get(key)}
- if not diff_keys:
- return # no change in the feed, no update
- if not article_created and diff_keys == {'last_modified', 'etag'}:
- return # meaningless if no new article has been published
- logger.info('%r %r - pushing feed attrs %r',
- self.feed['id'], self.feed['title'],
- {key: "%s -> %s" % (up_feed[key], self.feed.get(key))
- for key in up_feed if up_feed[key] != self.feed.get(key)})
-
- self.query_pyagg('put', 'feed/%d' % self.feed['id'], up_feed)
-
-
-class FeedCrawler(AbstractCrawler):
-
- def __init__(self, feed, auth, pool=None, session=None):
- self.feed = feed
- super().__init__(auth, pool, session)
-
- def clean_feed(self):
- """Will reset the errors counters on a feed that have known errors"""
- if self.feed.get('error_count') or self.feed.get('last_error'):
- self.query_pyagg('put', 'feed/%d' % self.feed['id'],
- {'error_count': 0, 'last_error': ''})
-
- def callback(self, response):
- """will fetch the feed and interprete results (304, etag) or will
- challenge pyagg to compare gotten entries with existing ones"""
- try:
- response = response.result()
- response.raise_for_status()
- except Exception as error:
- error_count = self.feed['error_count'] + 1
- logger.exception('%r %r - an error occured while fetching '
- 'feed; bumping error count to %r',
- self.feed['id'], self.feed['title'], error_count)
- future = self.query_pyagg('put', 'feed/%d' % self.feed['id'],
- {'error_count': error_count,
- 'last_error': str(error),
- 'user_id': self.feed['user_id']})
- return
-
- if response.status_code == 304:
- logger.info("%r %r - feed responded with 304",
- self.feed['id'], self.feed['title'])
- self.clean_feed()
- return
- if 'etag' not in response.headers:
- logger.debug('%r %r - manually generating etag',
- self.feed['id'], self.feed['title'])
- response.headers['etag'] = 'pyagg/"%s"' % to_hash(response.text)
- if response.headers['etag'] and self.feed['etag'] \
- and response.headers['etag'] == self.feed['etag']:
- if 'pyagg' in self.feed['etag']:
- logger.info("%r %r - calculated hash matches (%d)",
- self.feed['id'], self.feed['title'],
- response.status_code)
- else:
- logger.info("%r %r - feed responded with same etag (%d)",
- self.feed['id'], self.feed['title'],
- response.status_code)
- self.clean_feed()
- return
- else:
- logger.debug('%r %r - etag mismatch %r != %r',
- self.feed['id'], self.feed['title'],
- response.headers['etag'], self.feed['etag'])
- logger.info('%r %r - cache validation failed, challenging entries',
- self.feed['id'], self.feed['title'])
-
- ids, entries = [], {}
- parsed_response = feedparser.parse(response.content)
- for entry in parsed_response['entries']:
- entry_ids = extract_id(entry)
- entry_ids['feed_id'] = self.feed['id']
- entry_ids['user_id'] = self.feed['user_id']
- entries[tuple(sorted(entry_ids.items()))] = entry
- ids.append(entry_ids)
- logger.debug('%r %r - found %d entries %r',
- self.feed['id'], self.feed['title'], len(ids), ids)
- future = self.query_pyagg('get', 'articles/challenge', {'ids': ids})
- updater = PyAggUpdater(self.feed, entries, response.headers,
- parsed_response,
- self.auth, self.pool, self.session)
- future.add_done_callback(updater.callback)
-
-
-class CrawlerScheduler(AbstractCrawler):
-
- def __init__(self, username, password, pool=None, session=None):
- self.auth = (username, password)
- super(CrawlerScheduler, self).__init__(self.auth, pool, session)
-
- def prepare_headers(self, feed):
- """For a known feed, will construct some header dictionnary"""
- headers = {'User-Agent': conf.USER_AGENT}
- if feed.get('last_modified'):
- headers['If-Modified-Since'] = feed['last_modified']
- if feed.get('etag') and 'pyagg' not in feed['etag']:
- headers['If-None-Match'] = feed['etag']
- logger.debug('%r %r - calculated headers %r',
- feed['id'], feed['title'], headers)
- return headers
-
- def callback(self, response):
- """processes feeds that need to be fetched"""
- response = response.result()
- response.raise_for_status()
- if response.status_code == 204:
- logger.debug("No feed to fetch")
- return
- feeds = response.json()
- logger.debug('%d to fetch %r', len(feeds), feeds)
- for feed in feeds:
- logger.debug('%r %r - fetching resources',
- feed['id'], feed['title'])
- future = self.session.get(feed['link'],
- headers=self.prepare_headers(feed))
-
- feed_crwlr = FeedCrawler(feed, self.auth, self.pool, self.session)
- future.add_done_callback(feed_crwlr.callback)
-
- def run(self, **kwargs):
- """entry point, will retreive feeds to be fetch
- and launch the whole thing"""
- logger.debug('retreving fetchable feed')
- future = self.query_pyagg('get', 'feeds/fetchable', kwargs)
- future.add_done_callback(self.callback)
diff --git a/src/tests/__init__.py b/src/lib/__init__.py
index e69de29b..e69de29b 100644
--- a/src/tests/__init__.py
+++ b/src/lib/__init__.py
diff --git a/src/web/lib/article_utils.py b/src/lib/article_utils.py
index 2c5ea8c3..49494e85 100644
--- a/src/web/lib/article_utils.py
+++ b/src/lib/article_utils.py
@@ -10,7 +10,7 @@ from bs4 import BeautifulSoup, SoupStrainer
from requests.exceptions import MissingSchema
import conf
-from web.lib.utils import jarr_get
+from lib.utils import jarr_get
logger = logging.getLogger(__name__)
PROCESSED_DATE_KEYS = {'published', 'created', 'updated'}
diff --git a/src/lib/data.py b/src/lib/data.py
new file mode 100644
index 00000000..d887c003
--- /dev/null
+++ b/src/lib/data.py
@@ -0,0 +1,162 @@
+#! /usr/bin/env python
+#-*- coding: utf-8 -*-
+
+# Newspipe - A Web based news aggregator.
+# Copyright (C) 2010-2016 Cédric Bonhomme - https://www.cedricbonhomme.org
+#
+# For more information : https://github.com/newspipe/newspipe
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+__author__ = "Cedric Bonhomme"
+__version__ = "$Revision: 0.1 $"
+__date__ = "$Date: 2016/11/17 $"
+__revision__ = "$Date: 2016/11/17 $"
+__copyright__ = "Copyright (c) Cedric Bonhomme"
+__license__ = "AGPLv3"
+
+#
+# This file contains the import/export functions of Newspipe.
+#
+
+import json
+import opml
+import datetime
+from flask import jsonify
+
+from bootstrap import db
+from web.models import User, Feed, Article
+
+
+def import_opml(email, opml_content):
+ """
+ Import new feeds from an OPML file.
+ """
+ user = User.query.filter(User.email == email).first()
+ try:
+ subscriptions = opml.from_string(opml_content)
+ except:
+ logger.exception("Parsing OPML file failed:")
+ raise
+
+ def read(subsubscription, nb=0):
+ """
+ Parse recursively through the categories and sub-categories.
+ """
+ for subscription in subsubscription:
+ if len(subscription) != 0:
+ nb = read(subscription, nb)
+ else:
+ try:
+ title = subscription.text
+ except:
+ title = ""
+ try:
+ description = subscription.description
+ except:
+ description = ""
+ try:
+ link = subscription.xmlUrl
+ except:
+ continue
+ if None != Feed.query.filter(Feed.user_id == user.id, Feed.link == link).first():
+ continue
+ try:
+ site_link = subscription.htmlUrl
+ except:
+ site_link = ""
+ new_feed = Feed(title=title, description=description,
+ link=link, site_link=site_link,
+ enabled=True)
+ user.feeds.append(new_feed)
+ nb += 1
+ return nb
+ nb = read(subscriptions)
+ db.session.commit()
+ return nb
+
+
+def import_json(email, json_content):
+ """
+ Import an account from a JSON file.
+ """
+ user = User.query.filter(User.email == email).first()
+ json_account = json.loads(json_content.decode("utf-8"))
+ nb_feeds, nb_articles = 0, 0
+ # Create feeds:
+ for feed in json_account["result"]:
+ if None != Feed.query.filter(Feed.user_id == user.id,
+ Feed.link == feed["link"]).first():
+ continue
+ new_feed = Feed(title=feed["title"],
+ description="",
+ link=feed["link"],
+ site_link=feed["site_link"],
+ created_date=datetime.datetime.
+ fromtimestamp(int(feed["created_date"])),
+ enabled=feed["enabled"])
+ user.feeds.append(new_feed)
+ nb_feeds += 1
+ db.session.commit()
+ # Create articles:
+ for feed in json_account["result"]:
+ user_feed = Feed.query.filter(Feed.user_id == user.id,
+ Feed.link == feed["link"]).first()
+ if None != user_feed:
+ for article in feed["articles"]:
+ if None == Article.query.filter(Article.user_id == user.id,
+ Article.feed_id == user_feed.id,
+ Article.link == article["link"]).first():
+ new_article = Article(entry_id=article["link"],
+ link=article["link"],
+ title=article["title"],
+ content=article["content"],
+ readed=article["readed"],
+ like=article["like"],
+ retrieved_date=datetime.datetime.
+ fromtimestamp(int(article["retrieved_date"])),
+ date=datetime.datetime.
+ fromtimestamp(int(article["date"])),
+ user_id=user.id,
+ feed_id=user_feed.id)
+ user_feed.articles.append(new_article)
+ nb_articles += 1
+ db.session.commit()
+ return nb_feeds, nb_articles
+
+
+def export_json(user):
+ """
+ Export all articles of user in JSON.
+ """
+ result = []
+ for feed in user.feeds:
+ result.append({
+ "title": feed.title,
+ "description": feed.description,
+ "link": feed.link,
+ "site_link": feed.site_link,
+ "enabled": feed.enabled,
+ "created_date": feed.created_date.strftime('%s'),
+ "articles": [ {
+ "title": article.title,
+ "link": article.link,
+ "content": article.content,
+ "readed": article.readed,
+ "like": article.like,
+ "date": article.date.strftime('%s'),
+ "retrieved_date": article.retrieved_date.strftime('%s')
+ } for article in feed.articles ]
+ })
+ return jsonify(result=result)
diff --git a/src/web/lib/feed_utils.py b/src/lib/feed_utils.py
index ef5d4f08..492391aa 100644
--- a/src/web/lib/feed_utils.py
+++ b/src/lib/feed_utils.py
@@ -6,7 +6,7 @@ import feedparser
from conf import CRAWLER_USER_AGENT
from bs4 import BeautifulSoup, SoupStrainer
-from web.lib.utils import try_keys, try_get_icon_url, rebuild_url
+from lib.utils import try_keys, try_get_icon_url, rebuild_url
logger = logging.getLogger(__name__)
logging.captureWarnings(True)
diff --git a/src/web/lib/misc_utils.py b/src/lib/misc_utils.py
index 6a0e00ec..d594c01e 100755
--- a/src/web/lib/misc_utils.py
+++ b/src/lib/misc_utils.py
@@ -4,7 +4,7 @@
# Newspipe - A Web based news aggregator.
# Copyright (C) 2010-2016 Cédric Bonhomme - https://www.cedricbonhomme.org
#
-# For more information : https://github.com/Newspipe/Newspipe
+# For more information : https://github.com/newspipe/newspipe
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
@@ -20,27 +20,18 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 1.8 $"
+__version__ = "$Revision: 1.9 $"
__date__ = "$Date: 2010/12/07 $"
-__revision__ = "$Date: 2016/04/10 $"
+__revision__ = "$Date: 2016/01/17 $"
__copyright__ = "Copyright (c) Cedric Bonhomme"
__license__ = "AGPLv3"
-#
-# This file provides functions used for:
-# - import from a JSON file;
-# - generation of tags cloud;
-# - HTML processing.
-#
-
import re
import os
import sys
import glob
-import opml
import json
import logging
-import datetime
import operator
import urllib
import subprocess
@@ -49,21 +40,19 @@ try:
from urlparse import urlparse, parse_qs, urlunparse
except:
from urllib.parse import urlparse, parse_qs, urlunparse, urljoin
-from bs4 import BeautifulSoup
from collections import Counter
from contextlib import contextmanager
from flask import request
import conf
-from bootstrap import db
-from web import controllers
-from web.models import User, Feed, Article
-from web.lib.utils import clear_string
+from web.controllers import ArticleController
+from lib.utils import clear_string
logger = logging.getLogger(__name__)
ALLOWED_EXTENSIONS = set(['xml', 'opml', 'json'])
+
def is_safe_url(target):
"""
Ensures that a redirect target will lead to the same server.
@@ -73,6 +62,7 @@ def is_safe_url(target):
return test_url.scheme in ('http', 'https') and \
ref_url.netloc == test_url.netloc
+
def get_redirect_target():
"""
Looks at various hints to find the redirect target.
@@ -83,6 +73,7 @@ def get_redirect_target():
if is_safe_url(target):
return target
+
def allowed_file(filename):
"""
Check if the uploaded file is allowed.
@@ -90,6 +81,7 @@ def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS
+
@contextmanager
def opened_w_error(filename, mode="r"):
try:
@@ -102,6 +94,7 @@ def opened_w_error(filename, mode="r"):
finally:
f.close()
+
def fetch(id, feed_id=None):
"""
Fetch the feeds in a new processus.
@@ -113,16 +106,17 @@ def fetch(id, feed_id=None):
cmd.append('--feed_id='+str(feed_id))
return subprocess.Popen(cmd, stdout=subprocess.PIPE)
+
def history(user_id, year=None, month=None):
"""
Sort articles by year and month.
"""
articles_counter = Counter()
- articles = controllers.ArticleController(user_id).read()
+ articles = ArticleController(user_id).read()
if None != year:
- articles = articles.filter(sqlalchemy.extract('year', Article.date) == year)
+ articles = articles.filter(sqlalchemy.extract('year', 'Article.date') == year)
if None != month:
- articles = articles.filter(sqlalchemy.extract('month', Article.date) == month)
+ articles = articles.filter(sqlalchemy.extract('month', 'Article.date') == month)
for article in articles.all():
if None != year:
articles_counter[article.date.month] += 1
@@ -130,100 +124,6 @@ def history(user_id, year=None, month=None):
articles_counter[article.date.year] += 1
return articles_counter, articles
-def import_opml(email, opml_content):
- """
- Import new feeds from an OPML file.
- """
- user = User.query.filter(User.email == email).first()
- try:
- subscriptions = opml.from_string(opml_content)
- except:
- logger.exception("Parsing OPML file failed:")
- raise
-
- def read(subsubscription, nb=0):
- """
- Parse recursively through the categories and sub-categories.
- """
- for subscription in subsubscription:
- if len(subscription) != 0:
- nb = read(subscription, nb)
- else:
- try:
- title = subscription.text
- except:
- title = ""
- try:
- description = subscription.description
- except:
- description = ""
- try:
- link = subscription.xmlUrl
- except:
- continue
- if None != Feed.query.filter(Feed.user_id == user.id, Feed.link == link).first():
- continue
- try:
- site_link = subscription.htmlUrl
- except:
- site_link = ""
- new_feed = Feed(title=title, description=description,
- link=link, site_link=site_link,
- enabled=True)
- user.feeds.append(new_feed)
- nb += 1
- return nb
- nb = read(subscriptions)
- db.session.commit()
- return nb
-
-def import_json(email, json_content):
- """
- Import an account from a JSON file.
- """
- user = User.query.filter(User.email == email).first()
- json_account = json.loads(json_content.decode("utf-8"))
- nb_feeds, nb_articles = 0, 0
- # Create feeds:
- for feed in json_account["result"]:
- if None != Feed.query.filter(Feed.user_id == user.id,
- Feed.link == feed["link"]).first():
- continue
- new_feed = Feed(title=feed["title"],
- description="",
- link=feed["link"],
- site_link=feed["site_link"],
- created_date=datetime.datetime.
- fromtimestamp(int(feed["created_date"])),
- enabled=feed["enabled"])
- user.feeds.append(new_feed)
- nb_feeds += 1
- db.session.commit()
- # Create articles:
- for feed in json_account["result"]:
- user_feed = Feed.query.filter(Feed.user_id == user.id,
- Feed.link == feed["link"]).first()
- if None != user_feed:
- for article in feed["articles"]:
- if None == Article.query.filter(Article.user_id == user.id,
- Article.feed_id == user_feed.id,
- Article.link == article["link"]).first():
- new_article = Article(entry_id=article["link"],
- link=article["link"],
- title=article["title"],
- content=article["content"],
- readed=article["readed"],
- like=article["like"],
- retrieved_date=datetime.datetime.
- fromtimestamp(int(article["retrieved_date"])),
- date=datetime.datetime.
- fromtimestamp(int(article["date"])),
- user_id=user.id,
- feed_id=user_feed.id)
- user_feed.articles.append(new_article)
- nb_articles += 1
- db.session.commit()
- return nb_feeds, nb_articles
def clean_url(url):
"""
@@ -242,6 +142,7 @@ def clean_url(url):
parsed_url.fragment
]).rstrip('=')
+
def load_stop_words():
"""
Load the stop words and return them in a list.
@@ -258,6 +159,7 @@ def load_stop_words():
stop_words += stop_wods_file.read().split(";")
return stop_words
+
def top_words(articles, n=10, size=5):
"""
Return the n most frequent words in a list.
@@ -272,15 +174,12 @@ def top_words(articles, n=10, size=5):
words[word] += 1
return words.most_common(n)
+
def tag_cloud(tags):
"""
Generates a tags cloud.
"""
tags.sort(key=operator.itemgetter(0))
return '\n'.join([('<font size=%d>%s</font>' % \
- (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), word)) \
- for (word, count) in tags])
-
-if __name__ == "__main__":
- import_opml("root@newspipe.localhost", "./var/feeds_test.opml")
- #import_opml("root@newspipe.localhost", "./var/Newspipe.opml")
+ (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), word)) \
+ for (word, count) in tags])
diff --git a/src/web/lib/utils.py b/src/lib/utils.py
index d206b769..d206b769 100644
--- a/src/web/lib/utils.py
+++ b/src/lib/utils.py
diff --git a/src/tests/base.py b/src/tests/base.py
deleted file mode 100644
index d6f62583..00000000
--- a/src/tests/base.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import os
-os.environ['PYAGG_TESTING'] = 'true'
-
-import unittest
-from bootstrap import db
-import runserver
-from tests.fixtures import populate_db, reset_db
-from werkzeug.exceptions import NotFound
-
-
-class BasePyaggTest(unittest.TestCase):
- _contr_cls = None
-
- def _get_from_contr(self, obj_id, user_id=None):
- return self._contr_cls(user_id).get(id=obj_id).dump()
-
- def _test_controller_rights(self, obj, user_id):
- obj_id = obj['id']
- self.assertEquals(obj, self._get_from_contr(obj_id))
- self.assertEquals(obj, self._get_from_contr(obj_id, user_id))
- # fetching non existent object
- self.assertRaises(NotFound, self._get_from_contr, 99, user_id)
- # fetching object with inexistent user
- self.assertRaises(NotFound, self._get_from_contr, obj_id, 99)
- # fetching object with wrong user
- self.assertRaises(NotFound, self._get_from_contr, obj_id, user_id + 1)
- self.assertRaises(NotFound, self._contr_cls().delete, 99)
- self.assertRaises(NotFound, self._contr_cls(user_id).delete, 99)
- self.assertEquals(obj['id'],
- self._contr_cls(user_id).delete(obj_id).id)
- self.assertRaises(NotFound, self._contr_cls(user_id).delete, obj_id)
-
- def setUp(self):
- populate_db(db)
-
- def tearDown(self):
- reset_db(db)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/src/tests/controllers/__init__.py b/src/tests/controllers/__init__.py
deleted file mode 100644
index 26922c43..00000000
--- a/src/tests/controllers/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from tests.controllers.feed import FeedControllerTest
-from tests.controllers.article import ArticleControllerTest
-
-
-__all__ = ['FeedControllerTest', 'ArticleControllerTest']
diff --git a/src/tests/controllers/article.py b/src/tests/controllers/article.py
deleted file mode 100644
index a62d1a83..00000000
--- a/src/tests/controllers/article.py
+++ /dev/null
@@ -1,117 +0,0 @@
-from tests.base import BasePyaggTest
-from web.controllers import ArticleController
-from web.controllers import FeedController
-
-
-class ArticleControllerTest(BasePyaggTest):
- _contr_cls = ArticleController
-
- def test_article_rights(self):
- article = ArticleController(2).read()[0].dump()
- self.assertFalse(article['readed'])
- article['readed'] = True # article get read when retreived through get
- self._test_controller_rights(article, article['user_id'])
-
- def test_article_challange_method(self):
- self.assertEquals(0, len(list(ArticleController().challenge(
- [{'id': art.id} for art in ArticleController(3).read()]))))
- self.assertEquals(9, len(list(ArticleController(2).challenge(
- [{'id': art.id} for art in ArticleController(3).read()]))))
- self.assertEquals(9, len(list(ArticleController(2).challenge(
- [{'entry_id': art.id} for art in ArticleController(3).read()]
- ))))
-
- def test_article_get_unread(self):
- self.assertEquals({1: 3, 2: 3, 3: 3},
- ArticleController(2).count_by_feed(readed=False))
- self.assertEquals({4: 3, 5: 3, 6: 3},
- ArticleController(3).count_by_feed(readed=False))
-
- def test_create_using_filters(self):
- feed_ctr = FeedController(2)
- feed1 = feed_ctr.read()[0].dump()
- feed2 = feed_ctr.read()[1].dump()
- feed3 = feed_ctr.read()[2].dump()
- feed_ctr.update({'id': feed1['id']},
- {'filters': [{"type": "simple match",
- "pattern": "no see pattern",
- "action on": "match",
- "action": "mark as read"}]})
- feed_ctr.update({'id': feed3['id']},
- {'filters': [{"type": "regex",
- "pattern": ".*(pattern1|pattern2).*",
- "action on": "no match",
- "action": "mark as favorite"},
- {"type": "simple match",
- "pattern": "no see pattern",
- "action on": "match",
- "action": "mark as read"}]})
- art1 = ArticleController(2).create(
- entry_id="thisisnotatest",
- feed_id=feed1['id'],
- title="garbage no see pattern garbage",
- content="doesn't matter",
- link="doesn't matter either")
- art2 = ArticleController(2).create(
- entry_id="thisisnotatesteither",
- feed_id=feed1['id'],
- title="garbage see pattern garbage",
- content="doesn't matter2",
- link="doesn't matter either2")
-
- art3 = ArticleController(2).create(
- entry_id="thisisnotatest",
- user_id=2,
- feed_id=feed2['id'],
- title="garbage no see pattern garbage",
- content="doesn't matter",
- link="doesn't matter either")
- art4 = ArticleController(2).create(
- entry_id="thisisnotatesteither",
- user_id=2,
- feed_id=feed2['id'],
- title="garbage see pattern garbage",
- content="doesn't matter2",
- link="doesn't matter either2")
-
- art5 = ArticleController(2).create(
- entry_id="thisisnotatest",
- feed_id=feed3['id'],
- title="garbage pattern1 garbage",
- content="doesn't matter",
- link="doesn't matter either")
- art6 = ArticleController(2).create(
- entry_id="thisisnotatesteither",
- feed_id=feed3['id'],
- title="garbage pattern2 garbage",
- content="doesn't matter2",
- link="doesn't matter either2")
- art7 = ArticleController(2).create(
- entry_id="thisisnotatesteither",
- feed_id=feed3['id'],
- title="garbage no see pattern3 garbage",
- content="doesn't matter3",
- link="doesn't matter either3")
- art8 = ArticleController(2).create(
- entry_id="thisisnotatesteither",
- feed_id=feed3['id'],
- title="garbage pattern4 garbage",
- content="doesn't matter4",
- link="doesn't matter either4")
-
- self.assertTrue(art1.readed)
- self.assertFalse(art1.like)
- self.assertFalse(art2.readed)
- self.assertFalse(art2.like)
- self.assertFalse(art3.readed)
- self.assertFalse(art3.like)
- self.assertFalse(art4.readed)
- self.assertFalse(art4.like)
- self.assertFalse(art5.readed)
- self.assertFalse(art5.like)
- self.assertFalse(art6.readed)
- self.assertFalse(art6.like)
- self.assertTrue(art7.readed)
- self.assertTrue(art7.like)
- self.assertFalse(art8.readed)
- self.assertTrue(art8.like)
diff --git a/src/tests/controllers/feed.py b/src/tests/controllers/feed.py
deleted file mode 100644
index 7dd77295..00000000
--- a/src/tests/controllers/feed.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from tests.base import BasePyaggTest
-from web.controllers import FeedController
-from web.controllers import ArticleController
-
-
-class FeedControllerTest(BasePyaggTest):
- _contr_cls = FeedController
-
- def test_feed_rights(self):
- feed = FeedController(2).read()[0].dump()
- self.assertTrue(3,
- ArticleController().read(feed_id=feed['id']).count())
- self._test_controller_rights(feed, feed['user_id'])
- # checking articles are deleted after the feed has been deleted
-
- def test_feed_article_deletion(self):
- feed_ctr = FeedController(2)
- feed = feed_ctr.read()[0].dump()
- feed_ctr.delete(feed['id'])
- self.assertFalse(0,
- ArticleController().read(feed_id=feed['id']).count())
-
- def test_feed_list_fetchable(self):
- self.assertEquals(3, len(FeedController(3).list_fetchable()))
- self.assertEquals(0, len(FeedController(3).list_fetchable()))
- self.assertEquals(3, len(FeedController().list_fetchable()))
- self.assertEquals(0, len(FeedController().list_fetchable()))
diff --git a/src/tests/fixtures.py b/src/tests/fixtures.py
deleted file mode 100644
index 16a9cb81..00000000
--- a/src/tests/fixtures.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from web.models import db_create, db_empty, User, Article, Feed
-
-
-def populate_db(db):
- role_admin, role_user = db_create(db)
- user1, user2 = [User(nickname=name, email="%s@test.te" % name,
- pwdhash=name, roles=[role_user], enabled=True)
- for name in ["user1", "user2"]]
- db.session.add(user1)
- db.session.add(user2)
- db.session.commit()
-
- for user in (user1, user2):
- for feed_name in ['feed1', 'feed2', 'feed3']:
- feed = Feed(link=feed_name, user_id=user.id,
- title="%r %r" % (user.nickname, feed_name))
- db.session.add(feed)
- db.session.commit()
- for article in ['article1', 'article2', 'article3']:
- entry = "%s %s %s" % (user.nickname, feed.title, article)
- article = Article(entry_id=entry, link=article,
- feed_id=feed.id, user_id=user.id,
- title=entry, content=article)
- db.session.add(article)
- db.session.commit()
-
- db.session.commit()
-
-
-def reset_db(db):
- db_empty(db)
diff --git a/src/web/controllers/article.py b/src/web/controllers/article.py
index 4607b225..d7058229 100644
--- a/src/web/controllers/article.py
+++ b/src/web/controllers/article.py
@@ -6,7 +6,7 @@ from collections import Counter
from bootstrap import db
from .abstract import AbstractController
-from web.lib.article_utils import process_filters
+from lib.article_utils import process_filters
from web.controllers import CategoryController, FeedController
from web.models import Article
diff --git a/src/web/controllers/feed.py b/src/web/controllers/feed.py
index 7203c37e..a77fd926 100644
--- a/src/web/controllers/feed.py
+++ b/src/web/controllers/feed.py
@@ -6,7 +6,7 @@ import conf
from .abstract import AbstractController
from .icon import IconController
from web.models import User, Feed
-from web.lib.utils import clear_string
+from lib.utils import clear_string
logger = logging.getLogger(__name__)
DEFAULT_LIMIT = 5
diff --git a/src/web/export.py b/src/web/export.py
deleted file mode 100644
index 98473c9e..00000000
--- a/src/web/export.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-# Newspipe - A Web based news aggregator.
-# Copyright (C) 2010-2016 Cédric Bonhomme - https://www.cedricbonhomme.org
-#
-# For more information : https://github.com/Newspipe/Newspipe
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 0.7 $"
-__date__ = "$Date: 2011/10/24 $"
-__revision__ = "$Date: 2016/10/06 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "AGPLv3"
-
-#
-# This file contains the export functions of newspipe.
-#
-
-from flask import jsonify
-
-def export_json(user):
- """
- Export all articles of user in JSON.
- """
- result = []
- for feed in user.feeds:
- result.append({
- "title": feed.title,
- "description": feed.description,
- "link": feed.link,
- "site_link": feed.site_link,
- "enabled": feed.enabled,
- "created_date": feed.created_date.strftime('%s'),
- "articles": [ {
- "title": article.title,
- "link": article.link,
- "content": article.content,
- "readed": article.readed,
- "like": article.like,
- "date": article.date.strftime('%s'),
- "retrieved_date": article.retrieved_date.strftime('%s')
- } for article in feed.articles ]
- })
- return jsonify(result=result)
diff --git a/src/web/forms.py b/src/web/forms.py
index be1650d8..8088f27b 100644
--- a/src/web/forms.py
+++ b/src/web/forms.py
@@ -34,7 +34,7 @@ from wtforms import TextField, TextAreaField, PasswordField, BooleanField, \
SubmitField, IntegerField, SelectField, validators, HiddenField
from wtforms.fields.html5 import EmailField, URLField
-from web.lib import misc_utils
+from lib import misc_utils
from web.controllers import UserController
from web.models import User
diff --git a/src/web/lib/view_utils.py b/src/web/lib/view_utils.py
index d4c119da..1d8c6aed 100644
--- a/src/web/lib/view_utils.py
+++ b/src/web/lib/view_utils.py
@@ -1,6 +1,6 @@
from functools import wraps
from flask import request, Response, make_response
-from web.lib.utils import to_hash
+from lib.utils import to_hash
def etag_match(func):
diff --git a/src/web/models/category.py b/src/web/models/category.py
index 15b616bf..2da7809a 100644
--- a/src/web/models/category.py
+++ b/src/web/models/category.py
@@ -1,3 +1,6 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
from bootstrap import db
from sqlalchemy import Index
from web.models.right_mixin import RightMixin
@@ -10,7 +13,7 @@ class Category(db.Model, RightMixin):
# relationships
user_id = db.Column(db.Integer, db.ForeignKey('user.id'))
feeds = db.relationship('Feed', cascade='all,delete-orphan')
- articles = db.relationship('Article',
+ articles = db.relationship('Article',
cascade='all,delete-orphan')
# index
diff --git a/src/web/models/icon.py b/src/web/models/icon.py
index 22ef1164..adc9cf69 100644
--- a/src/web/models/icon.py
+++ b/src/web/models/icon.py
@@ -1,3 +1,6 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
from bootstrap import db
diff --git a/src/web/views/admin.py b/src/web/views/admin.py
index a9e1e43d..4de4009a 100644
--- a/src/web/views/admin.py
+++ b/src/web/views/admin.py
@@ -4,8 +4,8 @@ from flask_babel import gettext, format_timedelta
from flask_login import login_required, current_user
from werkzeug import generate_password_hash
+from lib.utils import redirect_url
from web.views.common import admin_permission
-from web.lib.utils import redirect_url
from web.controllers import UserController
from web.forms import InformationMessageForm, UserForm
diff --git a/src/web/views/article.py b/src/web/views/article.py
index 283ef001..640de8b4 100644
--- a/src/web/views/article.py
+++ b/src/web/views/article.py
@@ -7,8 +7,8 @@ from flask_login import login_required, current_user
from bootstrap import db
-from web.export import export_json
-from web.lib.utils import clear_string, redirect_url
+from lib.utils import clear_string, redirect_url
+from lib.data import export_json
from web.controllers import (ArticleController, UserController,
CategoryController)
from web.lib.view_utils import etag_match
diff --git a/src/web/views/category.py b/src/web/views/category.py
index 1a81a5c4..2bdcf9cc 100644
--- a/src/web/views/category.py
+++ b/src/web/views/category.py
@@ -3,7 +3,7 @@ from flask_babel import gettext
from flask_login import login_required, current_user
from web.forms import CategoryForm
-from web.lib.utils import redirect_url
+from lib.utils import redirect_url
from web.lib.view_utils import etag_match
from web.controllers import ArticleController, FeedController, \
CategoryController
diff --git a/src/web/views/common.py b/src/web/views/common.py
index f9613c01..e422fd57 100644
--- a/src/web/views/common.py
+++ b/src/web/views/common.py
@@ -6,7 +6,7 @@ from flask_login import login_user
from flask_principal import (Identity, Permission, RoleNeed,
session_identity_loader, identity_changed)
from web.controllers import UserController
-from web.lib.utils import default_handler
+from lib.utils import default_handler
admin_role = RoleNeed('admin')
api_role = RoleNeed('api')
diff --git a/src/web/views/feed.py b/src/web/views/feed.py
index 3edb942e..fa5cfc77 100644
--- a/src/web/views/feed.py
+++ b/src/web/views/feed.py
@@ -10,9 +10,9 @@ from flask_babel import gettext
from flask_login import login_required, current_user
import conf
-from web.lib import misc_utils, utils
+from lib import misc_utils, utils
+from lib.feed_utils import construct_feed_from
from web.lib.view_utils import etag_match
-from web.lib.feed_utils import construct_feed_from
from web.forms import AddFeedForm
from web.controllers import (CategoryController, FeedController,
ArticleController)
diff --git a/src/web/views/home.py b/src/web/views/home.py
index 179f3f9d..5274dc12 100644
--- a/src/web/views/home.py
+++ b/src/web/views/home.py
@@ -9,8 +9,8 @@ from flask_babel import gettext, get_locale
from babel.dates import format_datetime, format_timedelta
import conf
-from web.lib.utils import redirect_url
-from web.lib import misc_utils
+from lib.utils import redirect_url
+from lib import misc_utils
from web.lib.view_utils import etag_match
from web.views.common import jsonify
diff --git a/src/web/views/user.py b/src/web/views/user.py
index 91cf7e4a..58c23dd2 100644
--- a/src/web/views/user.py
+++ b/src/web/views/user.py
@@ -8,7 +8,8 @@ from flask_login import login_required, current_user
import conf
from notifications import notifications
-from web.lib import misc_utils
+from lib import misc_utils
+from lib.data import import_opml, import_json
from web.lib.user_utils import confirm_token
from web.controllers import (UserController, FeedController, ArticleController,
CategoryController)
@@ -59,7 +60,7 @@ def management():
flash(gettext('File not allowed.'), 'danger')
else:
try:
- nb = misc_utils.import_opml(current_user.email, data.read())
+ nb = import_opml(current_user.email, data.read())
if conf.CRAWLING_METHOD == "classic":
misc_utils.fetch(current_user.email, None)
flash(str(nb) + ' ' + gettext('feeds imported.'),
@@ -75,7 +76,7 @@ def management():
flash(gettext('File not allowed.'), 'danger')
else:
try:
- nb = misc_utils.import_json(current_user.email, data.read())
+ nb = import_json(current_user.email, data.read())
flash(gettext('Account imported.'), "success")
except:
flash(gettext("Impossible to import the account."),
bgstack15