Files
nlnieuws/namen.sh
2026-03-04 13:30:59 +01:00

97 lines
1.5 KiB
Bash
Executable File

#!/bin/bash
PROGNAME=$0
usage() {
echo "
gebruik:
$PROGNAME [opties] -i
$PROGNAME [opties] file.data.dz ...
-i : kies interactief
overige opties:
-n int : max aantal resultaten
-p : gebruik pager
-s : tel hits één keer per bericht
"
exit
}
SINGLE=0
SELECT=0
USEPAGER=0
LIMIT=0
while getopts 'sin:p' opt
do
case "$opt" in
i)
SELECT=1
;;
n)
LIMIT="$OPTARG"
;;
p)
USEPAGER=1
;;
s)
SINGLE=1
;;
*)
usage
;;
esac
done
shift "$(($OPTIND -1))"
TAIL=''
if [ $LIMIT -gt 0 ]
then
TAIL=" | head -n $LIMIT"
fi
if [ $USEPAGER = 1 ]
then
TAIL="$TAIL | ${PAGER:-less}"
fi
EXPR='fp://node[@cat="mwu" and node[@pt="spec"] and not(@his="normal") and not(@his_1="decap")]'
if [ $SINGLE = 1 ]
then
SORT="sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | sed -e 's/\t.*//'"
TEMPLATE='tt:%w\t%I'
else
SORT=sort
TEMPLATE='tt:%w'
fi
search () {
eval "alto $@ '$EXPR' '$TEMPLATE' | $SORT | uniq -c | sort -nr 2> /dev/null $TAIL"
}
for i in "$@"
do
case "$i" in
*.data.dz)
;;
*)
usage
;;
esac
done
if [ $# == 0 -a $SELECT = 1 ]
then
cd /net/corpora/nlnieuws
select i in `find . -name '*data.dz' | sort`
do
search $i
done
elif [ $# -gt 0 -a $SELECT = 0 ]
then
search "$@"
else
usage
fi