tags; .De -> . De

This commit is contained in:
Peter Kleiweg
2026-05-29 12:22:57 +02:00
parent 66581d4e98
commit ca4e7af8fa
21 changed files with 123 additions and 22 deletions

View File

@@ -7,6 +7,7 @@ import (
"encoding/json"
"fmt"
"os"
"regexp"
"strconv"
"strings"
"time"
@@ -49,13 +50,14 @@ var (
parts = map[string]struct {
file string
suffix string
re *regexp.Regexp
}{
"nieuwe namen": {"nieuwe-namen", ".t20"},
"nieuwe woorden": {"nieuwe-woorden-extra", ".t20"},
"personen": {"personen", ""},
"andere namen": {"overige-namen", ""},
"locaties": {"locaties", ""},
"organisaties": {"organisaties", ""},
"nieuwe namen": {"nieuwe-namen", ".t20", nil},
"nieuwe woorden": {"nieuwe-woorden-extra", ".t20", nil},
"personen": {"personen", "", nil},
"andere namen": {"overige-namen", "", nil},
"locaties": {"locaties", "", nil},
"organisaties": {"organisaties", "", regexp.MustCompile(`^(ANP|AT5)`)},
}
maanden = strings.Fields("x januari februari maart april mei juni juli augustus september oktober november december")
@@ -142,12 +144,15 @@ func makeValues(source, part string) [][5]any {
scanner := bufio.NewScanner(fp)
lineno := 0
for scanner.Scan() {
lineno++
line := scanner.Text()
aa := strings.Split(line, "\t")
count, err := strconv.Atoi(strings.TrimSpace(aa[0]))
x(err)
word := aa[1]
if parts[part].re != nil && parts[part].re.MatchString(word) {
continue
}
lineno++
var tags, lemma, postag string
if len(aa) > 2 {
tags = aa[2]