From 84a79ec06541c7db92af48b43d1d4d379cded730 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Wed, 28 Nov 2012 11:39:23 +0100 Subject: Ignore stop words when calculating top words. --- source/var/generate-top-words-list.sh | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100755 source/var/generate-top-words-list.sh (limited to 'source/var/generate-top-words-list.sh') diff --git a/source/var/generate-top-words-list.sh b/source/var/generate-top-words-list.sh new file mode 100755 index 00000000..2a87e147 --- /dev/null +++ b/source/var/generate-top-words-list.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +if test $# != 2 ; then + echo No input files given 1>&2 + exit 1 +fi + +awk 'BEGIN{FS = " "} { if ($1 ~ /^[A-Za-z]/) {print $1}}' $1 | sort | tr '\n' ';' > $2 \ No newline at end of file -- cgit