From 81bcf97ed8828fdfc7ae611fd70d57ba0e0de458 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Sun, 4 Nov 2018 22:30:31 +0100 Subject: Removed now useless stop words lists. --- src/web/var/english-stop-words.txt | 311 --------------------- src/web/var/french-stop-words.txt | 192 ------------- src/web/var/generate-top-words-list.sh | 8 - src/web/var/stop_words/english-stop-words-list.txt | 1 - src/web/var/stop_words/french-stop-words-list.txt | 1 - 5 files changed, 513 deletions(-) delete mode 100644 src/web/var/english-stop-words.txt delete mode 100644 src/web/var/french-stop-words.txt delete mode 100755 src/web/var/generate-top-words-list.sh delete mode 100644 src/web/var/stop_words/english-stop-words-list.txt delete mode 100644 src/web/var/stop_words/french-stop-words-list.txt (limited to 'src/web') diff --git a/src/web/var/english-stop-words.txt b/src/web/var/english-stop-words.txt deleted file mode 100644 index 497a1f96..00000000 --- a/src/web/var/english-stop-words.txt +++ /dev/null @@ -1,311 +0,0 @@ - - | An English stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | Many of the forms below are quite rare (e.g. "yourselves") but included for - | completeness. - - | PRONOUNS FORMS - | 1st person sing - -i | subject, always in upper case of course - -me | object -my | possessive adjective - | the possessive pronoun `mine' is best suppressed, because of the - | sense of coal-mine etc. -myself | reflexive - | 1st person plural -we | subject - -| us | object - | care is required here because US = United States. It is usually - | safe to remove it if it is in lower case. -our | possessive adjective -ours | possessive pronoun -ourselves | reflexive - | second person (archaic `thou' forms not included) -you | subject and object -your | possessive adjective -yours | possessive pronoun -yourself | reflexive (singular) -yourselves | reflexive (plural) - | third person singular -he | subject -him | object -his | possessive adjective and pronoun -himself | reflexive - -she | subject -her | object and possessive adjective -hers | possessive pronoun -herself | reflexive - -it | subject and object -its | possessive adjective -itself | reflexive - | third person plural -they | subject -them | object -their | possessive adjective -theirs | possessive pronoun -themselves | reflexive - | other forms (demonstratives, interrogatives) -what -which -who -whom -this -that -these -those - - | VERB FORMS (using F.R. Palmer's nomenclature) - | BE -am | 1st person, present -is | -s form (3rd person, present) -are | present -was | 1st person, past -were | past -be | infinitive -been | past participle -being | -ing form - | HAVE -have | simple -has | -s form -had | past -having | -ing form - | DO -do | simple -does | -s form -did | past -doing | -ing form - - | The forms below are, I believe, best omitted, because of the significant - | homonym forms: - - | He made a WILL - | old tin CAN - | merry month of MAY - | a smell of MUST - | fight the good fight with all thy MIGHT - - | would, could, should, ought might however be included - - | | AUXILIARIES - | | WILL - |will - -would - - | | SHALL - |shall - -should - - | | CAN - |can - -could - - | | MAY - |may - |might - | | MUST - |must - | | OUGHT - -ought - - | COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing - | pronoun + verb - -i'm -you're -he's -she's -it's -we're -they're -i've -you've -we've -they've -i'd -you'd -he'd -she'd -we'd -they'd -i'll -you'll -he'll -she'll -we'll -they'll - - | verb + negation - -isn't -aren't -wasn't -weren't -hasn't -haven't -hadn't -doesn't -don't -didn't - - | auxiliary + negation - -won't -wouldn't -shan't -shouldn't -can't -cannot -couldn't -mustn't - - | miscellaneous forms - -let's -that's -who's -what's -here's -there's -when's -where's -why's -how's - - | rarer forms - - | daren't needn't - - | doubtful forms - - | oughtn't mightn't - - | ARTICLES -a -an -the - - | THE REST (Overlap among prepositions, conjunctions, adverbs etc is so - | high, that classification is pointless.) -and -but -if -or -because -as -until -while - -of -at -by -for -with -about -against -between -into -through -during -before -after -above -below -to -from -up -down -in -out -on -off -over -under - -again -further -then -once - -here -there -when -where -why -how - -all -any -both -each -few -more -most -other -some -such - -no -nor -not -only -own -same -so -than -too -very - - | Just for the record, the following words are among the commonest in English - - | one - | every - | least - | less - | many - | now - | ever - | never - | say - | says - | said - | also - | get - | go - | goes - | just - | made - | make - | put - | see - | seen - | whether - | like - | well - | back - | even - | still - | way - | take - | since - | another - | however - | two - | three - | four - | five - | first - | second - | new - | old - | high - | long \ No newline at end of file diff --git a/src/web/var/french-stop-words.txt b/src/web/var/french-stop-words.txt deleted file mode 100644 index 2af35642..00000000 --- a/src/web/var/french-stop-words.txt +++ /dev/null @@ -1,192 +0,0 @@ - - | A French stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -au | a + le -aux | a + les -avec | with -ce | this -ces | these -dans | with -de | of -des | de + les -du | de + le -elle | she -en | `of them' etc -et | and -eux | them -il | he -je | I -la | the -le | the -leur | their -lui | him -ma | my (fem) -mais | but -me | me -même | same; as in moi-même (myself) etc -mes | me (pl) -moi | me -mon | my (masc) -ne | not -nos | our (pl) -notre | our -nous | we -on | one -ou | where -par | by -pas | not -pour | for -qu | que before vowel -que | that -qui | who -sa | his, her (fem) -se | oneself -ses | his (pl) -son | his, her (masc) -sur | on -ta | thy (fem) -te | thee -tes | thy (pl) -toi | thee -ton | thy (masc) -tu | thou -un | a -une | a -vos | your (pl) -votre | your -vous | you - - | single letter forms - -c | c' -d | d' -j | j' -l | l' -à | to, at -m | m' -n | n' -s | s' -t | t' -y | there - - | forms of être (not including the infinitive): -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent - - | forms of avoir (not including the infinitive): -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent - - | Later additions (from Jean-Christophe Deschamps) -ceci | this -celà | that -cet | this -cette | this -ici | here -ils | they -les | the (pl) -leurs | their (pl) -quel | which -quels | which -quelle | which -quelles | which -sans | without -soi | oneself - - -| Later additions (from Cédric Bonhomme) -quelques -beaucoup -encore -toujours -maintenant -toutes -tous -chaque -plusieurs -eacute -egrave -vraiment -permet diff --git a/src/web/var/generate-top-words-list.sh b/src/web/var/generate-top-words-list.sh deleted file mode 100755 index 2a87e147..00000000 --- a/src/web/var/generate-top-words-list.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -if test $# != 2 ; then - echo No input files given 1>&2 - exit 1 -fi - -awk 'BEGIN{FS = " "} { if ($1 ~ /^[A-Za-z]/) {print $1}}' $1 | sort | tr '\n' ';' > $2 \ No newline at end of file diff --git a/src/web/var/stop_words/english-stop-words-list.txt b/src/web/var/stop_words/english-stop-words-list.txt deleted file mode 100644 index caa26aaf..00000000 --- a/src/web/var/stop_words/english-stop-words-list.txt +++ /dev/null @@ -1 +0,0 @@ -a;about;above;after;again;against;all;am;an;and;any;are;aren't;as;at;be;because;been;before;being;below;between;both;but;by;cannot;can't;could;couldn't;did;didn't;do;does;doesn't;doing;don't;down;during;each;few;for;from;further;had;hadn't;has;hasn't;have;haven't;having;he;he'd;he'll;her;here;here's;hers;herself;he's;him;himself;his;how;how's;i;i'd;if;i'll;i'm;in;into;is;isn't;it;its;it's;itself;i've;let's;me;more;most;mustn't;my;myself;no;nor;not;of;off;on;once;only;or;other;ought;our;ours;ourselves;out;over;own;same;shan't;she;she'd;she'll;she's;should;shouldn't;slashdot;so;some;such;than;that;that's;the;their;theirs;them;themselves;then;there;there's;these;they;they'd;they'll;they're;they've;this;those;through;to;too;under;until;up;very;was;wasn't;we;we'd;we'll;were;we're;weren't;we've;what;what's;when;when's;where;where's;which;while;who;whom;who's;why;why's;with;won't;would;wouldn't;writes;you;you'd;you'll;your;you're;yours;yourself;yourselves;you've; diff --git a/src/web/var/stop_words/french-stop-words-list.txt b/src/web/var/stop_words/french-stop-words-list.txt deleted file mode 100644 index e48bd8d0..00000000 --- a/src/web/var/stop_words/french-stop-words-list.txt +++ /dev/null @@ -1 +0,0 @@ -ai;aie;aient;aies;ait;as;au;aura;aurai;auraient;aurais;aurait;auras;aurez;auriez;aurions;aurons;auront;aux;avaient;avais;avait;avec;avez;aviez;avions;avons;ayant;ayez;ayons;beaucoup;c;ce;ceci;celà;ces;cet;cette;chaque;d;dans;de;des;du;eacute;egrave;elle;en;encore;es;est;et;eu;eue;eues;eûmes;eurent;eus;eusse;eussent;eusses;eussiez;eussions;eut;eût;eûtes;eux;fûmes;furent;fus;fusse;fussent;fusses;fussiez;fussions;fut;fût;fûtes;ici;il;ils;j;je;l;la;le;les;leur;leurs;lui;m;ma;maintenant;mais;me;même;mes;moi;mon;n;ne;nos;notre;nous;on;ont;ou;par;pas;permet;plusieurs;pour;qu;que;quel;quelle;quelles;quelques;quels;qui;s;sa;sans;se;sera;serai;seraient;serais;serait;seras;serez;seriez;serions;serons;seront;ses;soi;soient;sois;soit;sommes;son;sont;soyez;soyons;suis;sur;t;ta;te;tes;toi;ton;toujours;tous;toutes;tu;un;une;vos;votre;vous;vraiment;y; \ No newline at end of file -- cgit