cats en tags

This commit is contained in:
Peter Kleiweg
2026-04-01 19:30:10 +02:00
parent 1fb1867550
commit e550b58889
14 changed files with 61 additions and 22 deletions

View File

@@ -90,7 +90,7 @@ func main() {
fp, err := os.Create("out/" + filename[:len(filename)-4] + ".txt")
x(err)
for _, cat := range item.Cats {
x(fmt.Fprintf(fp, "##META text cat = %s\n", fixSpace(cat)))
x(fmt.Fprintf(fp, "##META text tag = %s\n", fixSpace(cat)))
}
x(fp.WriteString(text))
x(fp.Close())

View File

@@ -58,6 +58,9 @@ Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.
cd xml
alto -o $corpus.data.dz *.xml 2> /dev/null
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x 10 -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out