97 lines
1.5 KiB
Bash
Executable File
97 lines
1.5 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
PROGNAME=$0
|
|
usage() {
|
|
echo "
|
|
gebruik:
|
|
|
|
$PROGNAME [opties] -i
|
|
$PROGNAME [opties] file.data.dz ...
|
|
|
|
-i : kies interactief
|
|
|
|
overige opties:
|
|
|
|
-n int : max aantal resultaten
|
|
-p : gebruik pager
|
|
-s : tel hits één keer per bericht
|
|
"
|
|
exit
|
|
}
|
|
|
|
SINGLE=0
|
|
SELECT=0
|
|
USEPAGER=0
|
|
LIMIT=0
|
|
while getopts 'sin:p' opt
|
|
do
|
|
case "$opt" in
|
|
i)
|
|
SELECT=1
|
|
;;
|
|
n)
|
|
LIMIT="$OPTARG"
|
|
;;
|
|
p)
|
|
USEPAGER=1
|
|
;;
|
|
s)
|
|
SINGLE=1
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|
|
done
|
|
shift "$(($OPTIND -1))"
|
|
|
|
TAIL=''
|
|
if [ $LIMIT -gt 0 ]
|
|
then
|
|
TAIL=" | head -n $LIMIT"
|
|
fi
|
|
if [ $USEPAGER = 1 ]
|
|
then
|
|
TAIL="$TAIL | ${PAGER:-less}"
|
|
fi
|
|
|
|
EXPR='fp://node[@cat="mwu" and node[@pt="spec"] and not(@his="normal") and not(@his_1="decap")]'
|
|
if [ $SINGLE = 1 ]
|
|
then
|
|
SORT="sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | sed -e 's/\t.*//'"
|
|
TEMPLATE='tt:%w\t%I'
|
|
else
|
|
SORT=sort
|
|
TEMPLATE='tt:%w'
|
|
fi
|
|
|
|
search () {
|
|
eval "alto $@ '$EXPR' '$TEMPLATE' | $SORT | uniq -c | sort -nr 2> /dev/null $TAIL"
|
|
}
|
|
|
|
for i in "$@"
|
|
do
|
|
case "$i" in
|
|
*.data.dz)
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [ $# == 0 -a $SELECT = 1 ]
|
|
then
|
|
cd /net/corpora/nlnieuws
|
|
select i in `find . -name '*data.dz' | sort`
|
|
do
|
|
search $i
|
|
done
|
|
elif [ $# -gt 0 -a $SELECT = 0 ]
|
|
then
|
|
search "$@"
|
|
else
|
|
usage
|
|
fi
|
|
|