diverse aanpassingen

This commit is contained in:
Peter Kleiweg
2026-04-22 19:02:34 +02:00
parent c6c2abb387
commit 9d974b3725
6 changed files with 68 additions and 39 deletions

View File

@@ -62,7 +62,11 @@ do
for i in 1 4
do
files=$(find .. $(week2files $ds $i) | grep -E "$regex")
files=$(find .. $(week2files $ds $i) | grep -E "$regex") || true
if [ -z "$files" ]
then
continue
fi
# tellingen met tags
@@ -86,7 +90,7 @@ do
say $part-nieuwe-woorden-$ds-$i
alto \
'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \
'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="decap" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \
'tt:%w\t%d\t%I' $files \
| sed -e 's/pubdate: "[-0-9]*"//' \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
@@ -125,18 +129,18 @@ do
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| items2count > $part-overige-namen-$ds-$i
# tellingen met postags
# tellingen met tags en postags
say $part-nieuwe-woorden-extra-$ds-$i
alto \
'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="name" or @his="prefix_name" or @his_1="decap" or @
his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \
'tt:%w\t%l\t%P\t%I' $files \
'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="decap" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \
'tt:%w\t%d\t%l\t%P\t%I' $files \
| sed -e 's/pubdate: "[-0-9]*"//' \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| sed -e 's/\(.*\)\t.*/\1/' | uniq -c \
| grep -v '^ *1 ' \
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
> $part-nieuwe-woorden-extra-$ds-$i
| items2count > $part-nieuwe-woorden-extra-$ds-$i
top20 $part-nieuwe-woorden-extra-$ds-$i
# tellingen met postags
say $part-nieuwe-adjww-extra-$ds-$i
alto \