update stijgers2json.py
This commit is contained in:
20
collect.sh
20
collect.sh
@@ -143,6 +143,14 @@ do
|
||||
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
|
||||
| items2count > $part-overige-namen-$ds-$i
|
||||
|
||||
say $part-allewoorden-$ds-$i
|
||||
alto \
|
||||
'fp://node[(@pt and not(@pt="let" or @rel="mwp" or @neclass)) or (@cat="mwu" and not(.//node[@neclass]))]' \
|
||||
'tt:%l\ti%d\t%I' $files \
|
||||
| sed -e 's/pubdate: "[-0-9]*"//' \
|
||||
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
|
||||
| items2count > $part-allewoorden-$ds-$i
|
||||
|
||||
# tellingen met tags en postags
|
||||
|
||||
say $part-nieuwe-woorden-extra-$ds-$i
|
||||
@@ -166,18 +174,6 @@ do
|
||||
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
|
||||
> $part-nieuwe-adjww-extra-$ds-$i
|
||||
|
||||
# kale tellingen
|
||||
|
||||
say $part-allewoorden-$ds-$i
|
||||
alto \
|
||||
'fp://node[(@pt and not(@pt="let" or @rel="mwp" or @neclass)) or (@cat="mwu" and not(.//node[@neclass]))]' \
|
||||
'tt:%l\t%I' $files \
|
||||
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
|
||||
| sed -e 's/\t.*//' | uniq -c \
|
||||
| grep -v '^ *1 ' \
|
||||
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
|
||||
> $part-allewoorden-$ds-$i
|
||||
|
||||
done
|
||||
|
||||
# score
|
||||
|
||||
Reference in New Issue
Block a user