aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2016-09-21 07:57:00 +0200
committerCédric Bonhomme <cedric@cedricbonhomme.org>2016-09-21 07:57:00 +0200
commit5ab832be1911d7c80d6fe1aaac916346643357c0 (patch)
treef7046b3fea0332b68c835d693fc6b4f296de10ed /src
parentAdded more comments. (diff)
downloadnewspipe-5ab832be1911d7c80d6fe1aaac916346643357c0.tar.gz
newspipe-5ab832be1911d7c80d6fe1aaac916346643357c0.tar.bz2
newspipe-5ab832be1911d7c80d6fe1aaac916346643357c0.zip
Add new stop words.
Diffstat (limited to 'src')
-rwxr-xr-xsrc/web/lib/misc_utils.py4
-rw-r--r--src/web/var/french-stop-words.txt18
-rw-r--r--src/web/var/stop_words/french-stop-words-list.txt2
-rw-r--r--src/web/views/user.py5
4 files changed, 25 insertions, 4 deletions
diff --git a/src/web/lib/misc_utils.py b/src/web/lib/misc_utils.py
index ea0b18ce..fc49dd6d 100755
--- a/src/web/lib/misc_utils.py
+++ b/src/web/lib/misc_utils.py
@@ -34,6 +34,7 @@ __license__ = "AGPLv3"
#
import re
+import os
import sys
import glob
import opml
@@ -242,7 +243,8 @@ def load_stop_words():
"""
Load the stop words and return them in a list.
"""
- stop_words_lists = glob.glob('./JARR/var/stop_words/*.txt')
+ stop_words_lists = glob.glob(os.path.join(conf.BASE_DIR,
+ 'web/var/stop_words/*.txt'))
stop_words = []
for stop_wods_list in stop_words_lists:
diff --git a/src/web/var/french-stop-words.txt b/src/web/var/french-stop-words.txt
index 08a2f5d7..2af35642 100644
--- a/src/web/var/french-stop-words.txt
+++ b/src/web/var/french-stop-words.txt
@@ -173,4 +173,20 @@ quels | which
quelle | which
quelles | which
sans | without
-soi | oneself \ No newline at end of file
+soi | oneself
+
+
+| Later additions (from Cédric Bonhomme)
+quelques
+beaucoup
+encore
+toujours
+maintenant
+toutes
+tous
+chaque
+plusieurs
+eacute
+egrave
+vraiment
+permet
diff --git a/src/web/var/stop_words/french-stop-words-list.txt b/src/web/var/stop_words/french-stop-words-list.txt
index a6a36c79..e48bd8d0 100644
--- a/src/web/var/stop_words/french-stop-words-list.txt
+++ b/src/web/var/stop_words/french-stop-words-list.txt
@@ -1 +1 @@
-à;ai;aie;aient;aies;ait;as;au;aura;aurai;auraient;aurais;aurait;auras;aurez;auriez;aurions;aurons;auront;aux;avaient;avais;avait;avec;avez;aviez;avions;avons;ayant;ayez;ayons;c;ce;ceci;celà;ces;cet;cette;d;dans;de;des;du;elle;en;es;est;et;étaient;étais;était;étant;été;étée;étées;êtes;étés;étiez;étions;eu;eue;eues;eûmes;eurent;eus;eusse;eussent;eusses;eussiez;eussions;eut;eût;eûtes;eux;fûmes;furent;fus;fusse;fussent;fusses;fussiez;fussions;fut;fût;fûtes;ici;il;ils;j;je;l;la;le;les;leur;leurs;lui;m;ma;mais;me;même;mes;moi;mon;n;ne;nos;notre;nous;on;ont;ou;par;pas;pour;qu;que;quel;quelle;quelles;quels;qui;s;sa;sans;se;sera;serai;seraient;serais;serait;seras;serez;seriez;serions;serons;seront;ses;soi;soient;sois;soit;sommes;son;sont;soyez;soyons;suis;sur;t;ta;te;tes;toi;ton;tu;toujours;un;une;vos;votre;vous;y;
+ai;aie;aient;aies;ait;as;au;aura;aurai;auraient;aurais;aurait;auras;aurez;auriez;aurions;aurons;auront;aux;avaient;avais;avait;avec;avez;aviez;avions;avons;ayant;ayez;ayons;beaucoup;c;ce;ceci;celà;ces;cet;cette;chaque;d;dans;de;des;du;eacute;egrave;elle;en;encore;es;est;et;eu;eue;eues;eûmes;eurent;eus;eusse;eussent;eusses;eussiez;eussions;eut;eût;eûtes;eux;fûmes;furent;fus;fusse;fussent;fusses;fussiez;fussions;fut;fût;fûtes;ici;il;ils;j;je;l;la;le;les;leur;leurs;lui;m;ma;maintenant;mais;me;même;mes;moi;mon;n;ne;nos;notre;nous;on;ont;ou;par;pas;permet;plusieurs;pour;qu;que;quel;quelle;quelles;quelques;quels;qui;s;sa;sans;se;sera;serai;seraient;serais;serait;seras;serez;seriez;serions;serons;seront;ses;soi;soient;sois;soit;sommes;son;sont;soyez;soyons;suis;sur;t;ta;te;tes;toi;ton;toujours;tous;toutes;tu;un;une;vos;votre;vous;vraiment;y; \ No newline at end of file
diff --git a/src/web/views/user.py b/src/web/views/user.py
index 01e26325..c7c318e3 100644
--- a/src/web/views/user.py
+++ b/src/web/views/user.py
@@ -1,5 +1,6 @@
import string
import random
+from datetime import datetime, timedelta
from flask import (Blueprint, g, render_template, redirect,
flash, url_for, request)
from flask_babel import gettext
@@ -29,7 +30,9 @@ def profile_public(nickname=None):
return redirect(url_for('home'))
word_size = 6
- articles = ArticleController(user.id).read().all()
+ filters = {}
+ filters['retrieved_date__gt'] = datetime.now() - timedelta(weeks=24)
+ articles = ArticleController(user.id).read(**filters).all()
top_words = misc_utils.top_words(articles, n=50, size=int(word_size))
tag_cloud = misc_utils.tag_cloud(top_words)
bgstack15