From 5ab832be1911d7c80d6fe1aaac916346643357c0 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Wed, 21 Sep 2016 07:57:00 +0200 Subject: Add new stop words. --- src/web/lib/misc_utils.py | 4 +++- src/web/var/french-stop-words.txt | 18 +++++++++++++++++- src/web/var/stop_words/french-stop-words-list.txt | 2 +- src/web/views/user.py | 5 ++++- 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/web/lib/misc_utils.py b/src/web/lib/misc_utils.py index ea0b18ce..fc49dd6d 100755 --- a/src/web/lib/misc_utils.py +++ b/src/web/lib/misc_utils.py @@ -34,6 +34,7 @@ __license__ = "AGPLv3" # import re +import os import sys import glob import opml @@ -242,7 +243,8 @@ def load_stop_words(): """ Load the stop words and return them in a list. """ - stop_words_lists = glob.glob('./JARR/var/stop_words/*.txt') + stop_words_lists = glob.glob(os.path.join(conf.BASE_DIR, + 'web/var/stop_words/*.txt')) stop_words = [] for stop_wods_list in stop_words_lists: diff --git a/src/web/var/french-stop-words.txt b/src/web/var/french-stop-words.txt index 08a2f5d7..2af35642 100644 --- a/src/web/var/french-stop-words.txt +++ b/src/web/var/french-stop-words.txt @@ -173,4 +173,20 @@ quels | which quelle | which quelles | which sans | without -soi | oneself \ No newline at end of file +soi | oneself + + +| Later additions (from Cédric Bonhomme) +quelques +beaucoup +encore +toujours +maintenant +toutes +tous +chaque +plusieurs +eacute +egrave +vraiment +permet diff --git a/src/web/var/stop_words/french-stop-words-list.txt b/src/web/var/stop_words/french-stop-words-list.txt index a6a36c79..e48bd8d0 100644 --- a/src/web/var/stop_words/french-stop-words-list.txt +++ b/src/web/var/stop_words/french-stop-words-list.txt @@ -1 +1 @@ -à;ai;aie;aient;aies;ait;as;au;aura;aurai;auraient;aurais;aurait;auras;aurez;auriez;aurions;aurons;auront;aux;avaient;avais;avait;avec;avez;aviez;avions;avons;ayant;ayez;ayons;c;ce;ceci;celà;ces;cet;cette;d;dans;de;des;du;elle;en;es;est;et;étaient;étais;était;étant;été;étée;étées;êtes;étés;étiez;étions;eu;eue;eues;eûmes;eurent;eus;eusse;eussent;eusses;eussiez;eussions;eut;eût;eûtes;eux;fûmes;furent;fus;fusse;fussent;fusses;fussiez;fussions;fut;fût;fûtes;ici;il;ils;j;je;l;la;le;les;leur;leurs;lui;m;ma;mais;me;même;mes;moi;mon;n;ne;nos;notre;nous;on;ont;ou;par;pas;pour;qu;que;quel;quelle;quelles;quels;qui;s;sa;sans;se;sera;serai;seraient;serais;serait;seras;serez;seriez;serions;serons;seront;ses;soi;soient;sois;soit;sommes;son;sont;soyez;soyons;suis;sur;t;ta;te;tes;toi;ton;tu;toujours;un;une;vos;votre;vous;y; +ai;aie;aient;aies;ait;as;au;aura;aurai;auraient;aurais;aurait;auras;aurez;auriez;aurions;aurons;auront;aux;avaient;avais;avait;avec;avez;aviez;avions;avons;ayant;ayez;ayons;beaucoup;c;ce;ceci;celà;ces;cet;cette;chaque;d;dans;de;des;du;eacute;egrave;elle;en;encore;es;est;et;eu;eue;eues;eûmes;eurent;eus;eusse;eussent;eusses;eussiez;eussions;eut;eût;eûtes;eux;fûmes;furent;fus;fusse;fussent;fusses;fussiez;fussions;fut;fût;fûtes;ici;il;ils;j;je;l;la;le;les;leur;leurs;lui;m;ma;maintenant;mais;me;même;mes;moi;mon;n;ne;nos;notre;nous;on;ont;ou;par;pas;permet;plusieurs;pour;qu;que;quel;quelle;quelles;quelques;quels;qui;s;sa;sans;se;sera;serai;seraient;serais;serait;seras;serez;seriez;serions;serons;seront;ses;soi;soient;sois;soit;sommes;son;sont;soyez;soyons;suis;sur;t;ta;te;tes;toi;ton;toujours;tous;toutes;tu;un;une;vos;votre;vous;vraiment;y; \ No newline at end of file diff --git a/src/web/views/user.py b/src/web/views/user.py index 01e26325..c7c318e3 100644 --- a/src/web/views/user.py +++ b/src/web/views/user.py @@ -1,5 +1,6 @@ import string import random +from datetime import datetime, timedelta from flask import (Blueprint, g, render_template, redirect, flash, url_for, request) from flask_babel import gettext @@ -29,7 +30,9 @@ def profile_public(nickname=None): return redirect(url_for('home')) word_size = 6 - articles = ArticleController(user.id).read().all() + filters = {} + filters['retrieved_date__gt'] = datetime.now() - timedelta(weeks=24) + articles = ArticleController(user.id).read(**filters).all() top_words = misc_utils.top_words(articles, n=50, size=int(word_size)) tag_cloud = misc_utils.tag_cloud(top_words) -- cgit