aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.rst2
-rw-r--r--pyaggr3g470r/duplicate.py (renamed from pyaggr3g470r/compare.py)6
-rw-r--r--pyaggr3g470r/views.py4
-rw-r--r--vagrant/bootstrap.sh2
4 files changed, 11 insertions, 3 deletions
diff --git a/README.rst b/README.rst
index 46c8b866..52a22cfd 100644
--- a/README.rst
+++ b/README.rst
@@ -69,6 +69,7 @@ The geek way
$ cd pyaggr3g470r
$ heroku create
$ heroku addons:add heroku-postgresql:dev
+ $ heroku config:set BUILDPACK_URL=https://github.com/cedricbonhomme/heroku-buildpack-scipy
$ heroku config:set HEROKU=1
$ git push heroku master
$ heroku run init
@@ -111,6 +112,7 @@ Deploying the application on a traditional server
$ sudo apt-get install python libpq-dev python-dev python-pip build-essential git
$ sudo apt-get install libatlas-base-dev gfortran # for scipy
$ sudo apt-get install libxml2-dev libxslt1-dev # for lxml
+ $ sudo apt-get install python-nose # for scikit-learn
$ git clone https://bitbucket.org/cedricbonhomme/pyaggr3g470r.git
$ cd pyaggr3g470r
$ sudo pip install --upgrade -r requirements.txt
diff --git a/pyaggr3g470r/compare.py b/pyaggr3g470r/duplicate.py
index 80f3d694..6220a3cb 100644
--- a/pyaggr3g470r/compare.py
+++ b/pyaggr3g470r/duplicate.py
@@ -37,7 +37,11 @@ def compare_documents(feed):
"""
Compare a list of documents by pair.
"""
- nltk.download("punkt")
+ downloaded = nltk.download("punkt")
+ if not downloaded:
+ # Ubuntu packaged version still uses old URL
+ dl = nltk.downloader.Downloader("https://nltk.github.com/nltk_data/")
+ dl.download("punkt")
duplicates = []
for pair in itertools.combinations(feed.articles, 2):
try:
diff --git a/pyaggr3g470r/views.py b/pyaggr3g470r/views.py
index c9acec2d..c6c7b5b3 100644
--- a/pyaggr3g470r/views.py
+++ b/pyaggr3g470r/views.py
@@ -42,7 +42,7 @@ from sqlalchemy.exc import IntegrityError
from werkzeug import generate_password_hash
import conf
-from pyaggr3g470r import utils, notifications, export, compare
+from pyaggr3g470r import utils, notifications, export, duplicate
from pyaggr3g470r import app, db, allowed_file, babel
from pyaggr3g470r.models import User, Feed, Article, Role
from pyaggr3g470r.decorators import feed_access_required
@@ -484,7 +484,7 @@ def duplicates(feed_id=None):
"""
feed = Feed.query.filter(Feed.user_id == g.user.id, Feed.id == feed_id).first()
duplicates = []
- duplicates = compare.compare_documents(feed)
+ duplicates = duplicate.compare_documents(feed)
return render_template('duplicates.html', duplicates=duplicates)
@app.route('/index_database', methods=['GET'])
diff --git a/vagrant/bootstrap.sh b/vagrant/bootstrap.sh
index 38f3b689..750fe4f5 100644
--- a/vagrant/bootstrap.sh
+++ b/vagrant/bootstrap.sh
@@ -17,6 +17,8 @@ cd pyaggr3g470r
apt-get install -y libxml2-dev libxslt1-dev
# For scipy:
apt-get install -y libatlas-base-dev gfortran
+# For scikit-learn:
+apt-get install -y python-nose
# installation with pip
sudo pip install --upgrade -r requirements.txt
# copy of the default configuration files for vagrant
bgstack15