This commit is contained in:
Peter Kleiweg
2026-06-18 12:52:40 +02:00
parent a8bea0ab44
commit 01e6d48665
13 changed files with 15363 additions and 8 deletions

View File

@@ -163,14 +163,57 @@ do
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
> $part-nieuwe-adjww-extra-$ds-$i
# ranglijsten
# kale tellingen
say $part-rang-$ds-$i
say $part-count-word-$ds-$i
alto \
'fp://node[((@pt="n" or @neclass) and not(@rel="mwp")) or (@cat="mwu" and .//node[@neclass])]' \
'tt:%w\t%I' $files \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | rang \
> $part-rang-$ds-$i
'fp://node[(@pt and not(@pt="let" or @rel="mwp" or @neclass)) or (@cat="mwu" and not(.//node[@neclass]))]' \
'tt:%l\t%I' $files \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| sed -e 's/\t.*//' | uniq -c \
| grep -v '^ *1 ' \
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
> $part-count-word-$ds-$i
say $part-count-loc-$ds-$i
alto \
'fp://node[(@neclass="LOC" and not(@rel="mwp")) or (@cat="mwu" and .//node[@neclass="LOC" ])]' \
'tt:%l\t%I' $files \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| sed -e 's/\t.*//' | uniq -c \
| grep -v '^ *1 ' \
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
> $part-count-loc-$ds-$i
say $part-count-per-$ds-$i
alto \
'fp://node[(@neclass="PER" and not(@rel="mwp")) or (@cat="mwu" and .//node[@neclass="PER" ])]' \
'tt:%l\t%I' $files \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| sed -e 's/\t.*//' | uniq -c \
| grep -v '^ *1 ' \
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
> $part-count-per-$ds-$i
say $part-count-org-$ds-$i
alto \
'fp://node[(@neclass="ORG" and not(@rel="mwp")) or (@cat="mwu" and .//node[@neclass="ORG" ])]' \
'tt:%l\t%I' $files \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| sed -e 's/\t.*//' | uniq -c \
| grep -v '^ *1 ' \
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
> $part-count-org-$ds-$i
say $part-count-misc-$ds-$i
alto \
'fp://node[(@neclass="MISC" and not(@rel="mwp")) or (@cat="mwu" and .//node[@neclass="MISC" ])]' \
'tt:%l\t%I' $files \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| sed -e 's/\t.*//' | uniq -c \
| grep -v '^ *1 ' \
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
> $part-count-misc-$ds-$i
done
done