diverse aanpassingen
This commit is contained in:
@@ -27,18 +27,12 @@ type Data struct {
|
||||
}
|
||||
|
||||
type Parts struct {
|
||||
NieuweNamen [][3]any `json:"nieuwe namen"`
|
||||
NieuweWoorden [][3]any `json:"nieuwe woorden"`
|
||||
Personen [][3]any `json:"personen"`
|
||||
AndereNamen [][3]any `json:"andere namen"`
|
||||
Locaties [][3]any `json:"locaties"`
|
||||
Organisaties [][3]any `json:"organisaties"`
|
||||
}
|
||||
|
||||
type Value struct {
|
||||
Word string `json:"word"`
|
||||
Tags string `json:"tags"`
|
||||
Count int `json:"count"`
|
||||
NieuweNamen [][5]any `json:"nieuwe namen"`
|
||||
NieuweWoorden [][5]any `json:"nieuwe woorden"`
|
||||
Personen [][5]any `json:"personen"`
|
||||
AndereNamen [][5]any `json:"andere namen"`
|
||||
Locaties [][5]any `json:"locaties"`
|
||||
Organisaties [][5]any `json:"organisaties"`
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -55,7 +49,7 @@ var (
|
||||
suffix string
|
||||
}{
|
||||
"nieuwe namen": {"nieuwe-namen", ".t20"},
|
||||
"nieuwe woorden": {"nieuwe-woorden", ".t20"},
|
||||
"nieuwe woorden": {"nieuwe-woorden-extra", ".t20"},
|
||||
"personen": {"personen", ""},
|
||||
"andere namen": {"overige-namen", ""},
|
||||
"locaties": {"locaties", ""},
|
||||
@@ -127,8 +121,8 @@ func makeParts(source string) *Parts {
|
||||
}
|
||||
}
|
||||
|
||||
func makeValues(source, part string) [][3]any {
|
||||
v := make([][3]any, 0)
|
||||
func makeValues(source, part string) [][5]any {
|
||||
v := make([][5]any, 0)
|
||||
|
||||
filename := fmt.Sprintf("/net/corpora/nlnieuws/data/%s-%s-%d-%02d-%d%s",
|
||||
sources[source],
|
||||
@@ -149,11 +143,17 @@ func makeValues(source, part string) [][3]any {
|
||||
count, err := strconv.Atoi(strings.TrimSpace(aa[0]))
|
||||
x(err)
|
||||
word := aa[1]
|
||||
var tags string
|
||||
var tags, lemma, postag string
|
||||
if len(aa) > 2 {
|
||||
tags = aa[2]
|
||||
}
|
||||
v = append(v, [3]any{count, word, tags})
|
||||
if len(aa) > 3 {
|
||||
lemma = aa[3]
|
||||
}
|
||||
if len(aa) > 4 {
|
||||
postag = aa[4]
|
||||
}
|
||||
v = append(v, [5]any{count, word, tags, lemma, postag})
|
||||
if lineno == 20 {
|
||||
break
|
||||
}
|
||||
@@ -173,7 +173,7 @@ func dates() (start, first, last string) {
|
||||
// zoek juiste week
|
||||
var y, w int
|
||||
for {
|
||||
y, w = t.ISOWeek()
|
||||
y, _ = t.ISOWeek()
|
||||
if y < year {
|
||||
t = t.AddDate(0, 12, 0)
|
||||
continue
|
||||
|
||||
@@ -48,6 +48,12 @@ func main() {
|
||||
word := aa[0]
|
||||
tags := aa[1]
|
||||
lbl := aa[2]
|
||||
if n := len(aa); n > 3 {
|
||||
lbl = aa[n-1]
|
||||
for i := 2; i < n-1; i++ {
|
||||
word += "\t" + aa[i]
|
||||
}
|
||||
}
|
||||
w, ok := words[word]
|
||||
if !ok {
|
||||
w = &Word{
|
||||
@@ -87,7 +93,13 @@ func main() {
|
||||
})
|
||||
|
||||
for _, w := range wordlist {
|
||||
fmt.Printf("%6d\t%s\t%s\n", w.count, w.word, getTag(w.tags))
|
||||
var tail string
|
||||
i := strings.Index(w.word, "\t")
|
||||
if i > 0 {
|
||||
tail = w.word[i:]
|
||||
w.word = w.word[:i]
|
||||
}
|
||||
fmt.Printf("%6d\t%s\t%s%s\n", w.count, w.word, getTag(w.tags), tail)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user