#!/bin/bash PROGNAME=$0 usage() { echo " gebruik: $PROGNAME [opties] -i $PROGNAME [opties] corpusfile(s) en/of corpusdirectory(s) -i : kies interactief overige opties: -n int : max aantal resultaten -p : gebruik pager -s : tel hits één keer per bericht -v : verbose " exit } SINGLE=0 SELECT=0 USEPAGER=0 LIMIT=0 VERBOSE=0 while getopts 'sin:pv' opt do case "$opt" in i) SELECT=1 ;; n) LIMIT="$OPTARG" ;; p) USEPAGER=1 ;; s) SINGLE=1 ;; v) VERBOSE=1 ;; *) usage ;; esac done shift "$(($OPTIND -1))" TAIL='' if [ $LIMIT -gt 0 ] then TAIL=" | head -n $LIMIT" fi if [ $USEPAGER = 1 ] then TAIL="$TAIL | ${PAGER:-less}" fi EXPR='fp://node[@cat="mwu" and node[@pt="spec"] and not(@his="normal") and not(@his_1="decap")]' if [ $SINGLE = 1 ] then SORT="sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | sed -e 's/\t.*//'" TEMPLATE='tt:%w\t%I' else SORT=sort TEMPLATE='tt:%w' fi search () { if [ $VERBOSE = 1 ] then echo "alto $@ '$EXPR' '$TEMPLATE' | $SORT | uniq -c | sort -nr 2> /dev/null$TAIL" fi eval "alto $@ '$EXPR' '$TEMPLATE' | $SORT | uniq -c | sort -nr 2> /dev/null$TAIL" } for i in "$@" do case "$i" in *.xml|*.dact|*.dbxml|*.data.dz|*.index|*.zip) ;; *) if [ ! -d "$i" ] then usage fi ;; esac done if [ $# == 0 -a $SELECT = 1 ] then cd /net/corpora/nlnieuws select i in `find . -name '*data.dz' | sort` do search $i done elif [ $# -gt 0 -a $SELECT = 0 ] then search "$@" else usage fi