diverse aanpassingen

This commit is contained in:
Peter Kleiweg
2026-04-22 19:02:34 +02:00
parent c6c2abb387
commit 9d974b3725
6 changed files with 68 additions and 39 deletions

View File

@@ -27,18 +27,12 @@ type Data struct {
} }
type Parts struct { type Parts struct {
NieuweNamen [][3]any `json:"nieuwe namen"` NieuweNamen [][5]any `json:"nieuwe namen"`
NieuweWoorden [][3]any `json:"nieuwe woorden"` NieuweWoorden [][5]any `json:"nieuwe woorden"`
Personen [][3]any `json:"personen"` Personen [][5]any `json:"personen"`
AndereNamen [][3]any `json:"andere namen"` AndereNamen [][5]any `json:"andere namen"`
Locaties [][3]any `json:"locaties"` Locaties [][5]any `json:"locaties"`
Organisaties [][3]any `json:"organisaties"` Organisaties [][5]any `json:"organisaties"`
}
type Value struct {
Word string `json:"word"`
Tags string `json:"tags"`
Count int `json:"count"`
} }
var ( var (
@@ -55,7 +49,7 @@ var (
suffix string suffix string
}{ }{
"nieuwe namen": {"nieuwe-namen", ".t20"}, "nieuwe namen": {"nieuwe-namen", ".t20"},
"nieuwe woorden": {"nieuwe-woorden", ".t20"}, "nieuwe woorden": {"nieuwe-woorden-extra", ".t20"},
"personen": {"personen", ""}, "personen": {"personen", ""},
"andere namen": {"overige-namen", ""}, "andere namen": {"overige-namen", ""},
"locaties": {"locaties", ""}, "locaties": {"locaties", ""},
@@ -127,8 +121,8 @@ func makeParts(source string) *Parts {
} }
} }
func makeValues(source, part string) [][3]any { func makeValues(source, part string) [][5]any {
v := make([][3]any, 0) v := make([][5]any, 0)
filename := fmt.Sprintf("/net/corpora/nlnieuws/data/%s-%s-%d-%02d-%d%s", filename := fmt.Sprintf("/net/corpora/nlnieuws/data/%s-%s-%d-%02d-%d%s",
sources[source], sources[source],
@@ -149,11 +143,17 @@ func makeValues(source, part string) [][3]any {
count, err := strconv.Atoi(strings.TrimSpace(aa[0])) count, err := strconv.Atoi(strings.TrimSpace(aa[0]))
x(err) x(err)
word := aa[1] word := aa[1]
var tags string var tags, lemma, postag string
if len(aa) > 2 { if len(aa) > 2 {
tags = aa[2] tags = aa[2]
} }
v = append(v, [3]any{count, word, tags}) if len(aa) > 3 {
lemma = aa[3]
}
if len(aa) > 4 {
postag = aa[4]
}
v = append(v, [5]any{count, word, tags, lemma, postag})
if lineno == 20 { if lineno == 20 {
break break
} }
@@ -173,7 +173,7 @@ func dates() (start, first, last string) {
// zoek juiste week // zoek juiste week
var y, w int var y, w int
for { for {
y, w = t.ISOWeek() y, _ = t.ISOWeek()
if y < year { if y < year {
t = t.AddDate(0, 12, 0) t = t.AddDate(0, 12, 0)
continue continue

View File

@@ -48,6 +48,12 @@ func main() {
word := aa[0] word := aa[0]
tags := aa[1] tags := aa[1]
lbl := aa[2] lbl := aa[2]
if n := len(aa); n > 3 {
lbl = aa[n-1]
for i := 2; i < n-1; i++ {
word += "\t" + aa[i]
}
}
w, ok := words[word] w, ok := words[word]
if !ok { if !ok {
w = &Word{ w = &Word{
@@ -87,7 +93,13 @@ func main() {
}) })
for _, w := range wordlist { for _, w := range wordlist {
fmt.Printf("%6d\t%s\t%s\n", w.count, w.word, getTag(w.tags)) var tail string
i := strings.Index(w.word, "\t")
if i > 0 {
tail = w.word[i:]
w.word = w.word[:i]
}
fmt.Printf("%6d\t%s\t%s%s\n", w.count, w.word, getTag(w.tags), tail)
} }
} }

View File

@@ -62,7 +62,11 @@ do
for i in 1 4 for i in 1 4
do do
files=$(find .. $(week2files $ds $i) | grep -E "$regex") files=$(find .. $(week2files $ds $i) | grep -E "$regex") || true
if [ -z "$files" ]
then
continue
fi
# tellingen met tags # tellingen met tags
@@ -86,7 +90,7 @@ do
say $part-nieuwe-woorden-$ds-$i say $part-nieuwe-woorden-$ds-$i
alto \ alto \
'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \ 'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="decap" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \
'tt:%w\t%d\t%I' $files \ 'tt:%w\t%d\t%I' $files \
| sed -e 's/pubdate: "[-0-9]*"//' \ | sed -e 's/pubdate: "[-0-9]*"//' \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \ | sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
@@ -125,18 +129,18 @@ do
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \ | sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| items2count > $part-overige-namen-$ds-$i | items2count > $part-overige-namen-$ds-$i
# tellingen met postags # tellingen met tags en postags
say $part-nieuwe-woorden-extra-$ds-$i say $part-nieuwe-woorden-extra-$ds-$i
alto \ alto \
'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="name" or @his="prefix_name" or @his_1="decap" or @ 'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="decap" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \
his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \ 'tt:%w\t%d\t%l\t%P\t%I' $files \
'tt:%w\t%l\t%P\t%I' $files \ | sed -e 's/pubdate: "[-0-9]*"//' \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \ | sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
| sed -e 's/\(.*\)\t.*/\1/' | uniq -c \ | items2count > $part-nieuwe-woorden-extra-$ds-$i
| grep -v '^ *1 ' \ top20 $part-nieuwe-woorden-extra-$ds-$i
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
> $part-nieuwe-woorden-extra-$ds-$i # tellingen met postags
say $part-nieuwe-adjww-extra-$ds-$i say $part-nieuwe-adjww-extra-$ds-$i
alto \ alto \

View File

@@ -76,13 +76,13 @@ case $XN in
;; ;;
2) 2)
# nieuwe woorden # nieuwe woorden
EXPR='fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' EXPR='fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="decap" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]'
TEMPLATE='tt:%w' TEMPLATE='tt:%w'
XVALID=1 XVALID=1
;; ;;
3) 3)
# nieuwe woorden met postag en lemma # nieuwe woorden met postag en lemma
EXPR='fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' EXPR='fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="decap" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]'
TEMPLATE='tt:%w\t%l\t%P' TEMPLATE='tt:%w\t%l\t%P'
XVALID=1 XVALID=1
;; ;;

View File

@@ -56,7 +56,7 @@ function makeTD(title, values) {
if (i < values.length) { if (i < values.length) {
value = values[i] value = values[i]
} else { } else {
value = [0, '\xa0', ''] value = [0, '\xa0', '', '', '']
} }
if (i == 0) { if (i == 0) {
max = value[0] max = value[0]
@@ -68,9 +68,24 @@ function makeTD(title, values) {
tr.classList.add('tags') tr.classList.add('tags')
t2 = '<br><small>' + escape(value[2]) + '</small>' t2 = '<br><small>' + escape(value[2]) + '</small>'
} }
var t3 = ''
if (value[3] && value[4]) {
t3 =
'<hr><small><em>lemma:</em> ' +
escape(value[3]) +
'<br><em>postag:</em> ' +
escape(value[4]) +
'</small>'
}
tr.setAttribute( tr.setAttribute(
'onmouseover', 'onmouseover',
"tooltip.show('" + value[0] + ' \xa0 ' + escape(value[1]) + t2 + "')", "tooltip.show('" +
value[0] +
' \xa0 ' +
escape(value[1]) +
t2 +
t3 +
"')",
) )
tr.setAttribute('onmouseout', 'tooltip.hide()') tr.setAttribute('onmouseout', 'tooltip.hide()')
} }
@@ -97,7 +112,7 @@ async function loadSource(source, week) {
} }
document.getElementById('subtitle').innerHTML = document.getElementById('subtitle').innerHTML =
source + '' + data[week].year + ' week ' + data[week].week source + '<br>' + data[week].start + ' t/m ' + data[week].last
const d = document.createElement('div') const d = document.createElement('div')
const tab = document.createElement('table') const tab = document.createElement('table')
@@ -118,7 +133,7 @@ async function loadPart(part, week) {
} }
document.getElementById('subtitle').innerHTML = document.getElementById('subtitle').innerHTML =
part + '' + data[week].year + ' week ' + data[week].week part + '<br>' + data[week].start + ' t/m ' + data[week].last
const d = document.createElement('div') const d = document.createElement('div')
const tab = document.createElement('table') const tab = document.createElement('table')
@@ -148,9 +163,7 @@ async function loadWeken(source, part) {
data[week] = await getJSON('DATA-' + week + '-4.json') data[week] = await getJSON('DATA-' + week + '-4.json')
} }
var values = data[week][source][part] var values = data[week][source][part]
tr.appendChild( tr.appendChild(makeTD('t/m ' + data[week].last, values))
makeTD(data[week].year + ' week ' + data[week].week, values),
)
} }
} }

View File

@@ -146,6 +146,6 @@ label:hover {
transition: all 200ms linear; transition: all 200ms linear;
} }
#data.fade { #data.fade {
background-color: #ffa54f; background-color: #00fa9a;
transition: all 20ms linear; transition: all 20ms linear;
} }