update stijgers2json.py

This commit is contained in:
Peter Kleiweg
2026-06-20 12:41:48 +02:00
parent c2389c65af
commit ce8ed07327
2 changed files with 22 additions and 14 deletions

View File

@@ -143,6 +143,14 @@ do
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| items2count > $part-overige-namen-$ds-$i
say $part-allewoorden-$ds-$i
alto \
'fp://node[(@pt and not(@pt="let" or @rel="mwp" or @neclass)) or (@cat="mwu" and not(.//node[@neclass]))]' \
'tt:%l\ti%d\t%I' $files \
| sed -e 's/pubdate: "[-0-9]*"//' \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| items2count > $part-allewoorden-$ds-$i
# tellingen met tags en postags
say $part-nieuwe-woorden-extra-$ds-$i
@@ -166,18 +174,6 @@ do
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
> $part-nieuwe-adjww-extra-$ds-$i
# kale tellingen
say $part-allewoorden-$ds-$i
alto \
'fp://node[(@pt and not(@pt="let" or @rel="mwp" or @neclass)) or (@cat="mwu" and not(.//node[@neclass]))]' \
'tt:%l\t%I' $files \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| sed -e 's/\t.*//' | uniq -c \
| grep -v '^ *1 ' \
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
> $part-allewoorden-$ds-$i
done
# score