tags; .De -> . De
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -49,13 +50,14 @@ var (
|
||||
parts = map[string]struct {
|
||||
file string
|
||||
suffix string
|
||||
re *regexp.Regexp
|
||||
}{
|
||||
"nieuwe namen": {"nieuwe-namen", ".t20"},
|
||||
"nieuwe woorden": {"nieuwe-woorden-extra", ".t20"},
|
||||
"personen": {"personen", ""},
|
||||
"andere namen": {"overige-namen", ""},
|
||||
"locaties": {"locaties", ""},
|
||||
"organisaties": {"organisaties", ""},
|
||||
"nieuwe namen": {"nieuwe-namen", ".t20", nil},
|
||||
"nieuwe woorden": {"nieuwe-woorden-extra", ".t20", nil},
|
||||
"personen": {"personen", "", nil},
|
||||
"andere namen": {"overige-namen", "", nil},
|
||||
"locaties": {"locaties", "", nil},
|
||||
"organisaties": {"organisaties", "", regexp.MustCompile(`^(ANP|AT5)`)},
|
||||
}
|
||||
|
||||
maanden = strings.Fields("x januari februari maart april mei juni juli augustus september oktober november december")
|
||||
@@ -142,12 +144,15 @@ func makeValues(source, part string) [][5]any {
|
||||
scanner := bufio.NewScanner(fp)
|
||||
lineno := 0
|
||||
for scanner.Scan() {
|
||||
lineno++
|
||||
line := scanner.Text()
|
||||
aa := strings.Split(line, "\t")
|
||||
count, err := strconv.Atoi(strings.TrimSpace(aa[0]))
|
||||
x(err)
|
||||
word := aa[1]
|
||||
if parts[part].re != nil && parts[part].re.MatchString(word) {
|
||||
continue
|
||||
}
|
||||
lineno++
|
||||
var tags, lemma, postag string
|
||||
if len(aa) > 2 {
|
||||
tags = aa[2]
|
||||
|
||||
Reference in New Issue
Block a user