diff --git a/namen.sh b/namen.sh new file mode 100755 index 0000000..7cca9f7 --- /dev/null +++ b/namen.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +cd /net/corpora/nlnieuws + +select i in `find . -name '*data.dz' | sort` +do + alto "$i" fp:'//node[@cat="mwu" and node[@pt="spec"] and not(@his="normal") and not(@his_1="decap")]' tt:%w | \ + sort | uniq -c | sort -nr | head -n 40 +done