diff options
-rw-r--r-- | README.rst | 2 | ||||
-rw-r--r-- | pyaggr3g470r/duplicate.py (renamed from pyaggr3g470r/compare.py) | 6 | ||||
-rw-r--r-- | pyaggr3g470r/views.py | 4 | ||||
-rw-r--r-- | vagrant/bootstrap.sh | 2 |
4 files changed, 11 insertions, 3 deletions
@@ -69,6 +69,7 @@ The geek way $ cd pyaggr3g470r $ heroku create $ heroku addons:add heroku-postgresql:dev + $ heroku config:set BUILDPACK_URL=https://github.com/cedricbonhomme/heroku-buildpack-scipy $ heroku config:set HEROKU=1 $ git push heroku master $ heroku run init @@ -111,6 +112,7 @@ Deploying the application on a traditional server $ sudo apt-get install python libpq-dev python-dev python-pip build-essential git $ sudo apt-get install libatlas-base-dev gfortran # for scipy $ sudo apt-get install libxml2-dev libxslt1-dev # for lxml + $ sudo apt-get install python-nose # for scikit-learn $ git clone https://bitbucket.org/cedricbonhomme/pyaggr3g470r.git $ cd pyaggr3g470r $ sudo pip install --upgrade -r requirements.txt diff --git a/pyaggr3g470r/compare.py b/pyaggr3g470r/duplicate.py index 80f3d694..6220a3cb 100644 --- a/pyaggr3g470r/compare.py +++ b/pyaggr3g470r/duplicate.py @@ -37,7 +37,11 @@ def compare_documents(feed): """ Compare a list of documents by pair. """ - nltk.download("punkt") + downloaded = nltk.download("punkt") + if not downloaded: + # Ubuntu packaged version still uses old URL + dl = nltk.downloader.Downloader("https://nltk.github.com/nltk_data/") + dl.download("punkt") duplicates = [] for pair in itertools.combinations(feed.articles, 2): try: diff --git a/pyaggr3g470r/views.py b/pyaggr3g470r/views.py index c9acec2d..c6c7b5b3 100644 --- a/pyaggr3g470r/views.py +++ b/pyaggr3g470r/views.py @@ -42,7 +42,7 @@ from sqlalchemy.exc import IntegrityError from werkzeug import generate_password_hash import conf -from pyaggr3g470r import utils, notifications, export, compare +from pyaggr3g470r import utils, notifications, export, duplicate from pyaggr3g470r import app, db, allowed_file, babel from pyaggr3g470r.models import User, Feed, Article, Role from pyaggr3g470r.decorators import feed_access_required @@ -484,7 +484,7 @@ def duplicates(feed_id=None): """ feed = Feed.query.filter(Feed.user_id == g.user.id, Feed.id == feed_id).first() duplicates = [] - duplicates = compare.compare_documents(feed) + duplicates = duplicate.compare_documents(feed) return render_template('duplicates.html', duplicates=duplicates) @app.route('/index_database', methods=['GET']) diff --git a/vagrant/bootstrap.sh b/vagrant/bootstrap.sh index 38f3b689..750fe4f5 100644 --- a/vagrant/bootstrap.sh +++ b/vagrant/bootstrap.sh @@ -17,6 +17,8 @@ cd pyaggr3g470r apt-get install -y libxml2-dev libxslt1-dev
# For scipy:
apt-get install -y libatlas-base-dev gfortran
+# For scikit-learn:
+apt-get install -y python-nose
# installation with pip
sudo pip install --upgrade -r requirements.txt
# copy of the default configuration files for vagrant
|