# stop.sh
#
# Generate proper stop words list from the 'stop.txt' file.
# Original source: http://snowball.tartarus.org/algorithms/english/stop.txt
# NOTE: in our experiments, stop.txt has been modified to include the last stop
# words (stop.txt is included).
sed 's/|.*//g' <stop.txt \
| sed 's/ \+//g' \
| sed '/^$/d' >words.txt