diverse aanpassingen
This commit is contained in:
@@ -27,18 +27,12 @@ type Data struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Parts struct {
|
type Parts struct {
|
||||||
NieuweNamen [][3]any `json:"nieuwe namen"`
|
NieuweNamen [][5]any `json:"nieuwe namen"`
|
||||||
NieuweWoorden [][3]any `json:"nieuwe woorden"`
|
NieuweWoorden [][5]any `json:"nieuwe woorden"`
|
||||||
Personen [][3]any `json:"personen"`
|
Personen [][5]any `json:"personen"`
|
||||||
AndereNamen [][3]any `json:"andere namen"`
|
AndereNamen [][5]any `json:"andere namen"`
|
||||||
Locaties [][3]any `json:"locaties"`
|
Locaties [][5]any `json:"locaties"`
|
||||||
Organisaties [][3]any `json:"organisaties"`
|
Organisaties [][5]any `json:"organisaties"`
|
||||||
}
|
|
||||||
|
|
||||||
type Value struct {
|
|
||||||
Word string `json:"word"`
|
|
||||||
Tags string `json:"tags"`
|
|
||||||
Count int `json:"count"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -55,7 +49,7 @@ var (
|
|||||||
suffix string
|
suffix string
|
||||||
}{
|
}{
|
||||||
"nieuwe namen": {"nieuwe-namen", ".t20"},
|
"nieuwe namen": {"nieuwe-namen", ".t20"},
|
||||||
"nieuwe woorden": {"nieuwe-woorden", ".t20"},
|
"nieuwe woorden": {"nieuwe-woorden-extra", ".t20"},
|
||||||
"personen": {"personen", ""},
|
"personen": {"personen", ""},
|
||||||
"andere namen": {"overige-namen", ""},
|
"andere namen": {"overige-namen", ""},
|
||||||
"locaties": {"locaties", ""},
|
"locaties": {"locaties", ""},
|
||||||
@@ -127,8 +121,8 @@ func makeParts(source string) *Parts {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeValues(source, part string) [][3]any {
|
func makeValues(source, part string) [][5]any {
|
||||||
v := make([][3]any, 0)
|
v := make([][5]any, 0)
|
||||||
|
|
||||||
filename := fmt.Sprintf("/net/corpora/nlnieuws/data/%s-%s-%d-%02d-%d%s",
|
filename := fmt.Sprintf("/net/corpora/nlnieuws/data/%s-%s-%d-%02d-%d%s",
|
||||||
sources[source],
|
sources[source],
|
||||||
@@ -149,11 +143,17 @@ func makeValues(source, part string) [][3]any {
|
|||||||
count, err := strconv.Atoi(strings.TrimSpace(aa[0]))
|
count, err := strconv.Atoi(strings.TrimSpace(aa[0]))
|
||||||
x(err)
|
x(err)
|
||||||
word := aa[1]
|
word := aa[1]
|
||||||
var tags string
|
var tags, lemma, postag string
|
||||||
if len(aa) > 2 {
|
if len(aa) > 2 {
|
||||||
tags = aa[2]
|
tags = aa[2]
|
||||||
}
|
}
|
||||||
v = append(v, [3]any{count, word, tags})
|
if len(aa) > 3 {
|
||||||
|
lemma = aa[3]
|
||||||
|
}
|
||||||
|
if len(aa) > 4 {
|
||||||
|
postag = aa[4]
|
||||||
|
}
|
||||||
|
v = append(v, [5]any{count, word, tags, lemma, postag})
|
||||||
if lineno == 20 {
|
if lineno == 20 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -173,7 +173,7 @@ func dates() (start, first, last string) {
|
|||||||
// zoek juiste week
|
// zoek juiste week
|
||||||
var y, w int
|
var y, w int
|
||||||
for {
|
for {
|
||||||
y, w = t.ISOWeek()
|
y, _ = t.ISOWeek()
|
||||||
if y < year {
|
if y < year {
|
||||||
t = t.AddDate(0, 12, 0)
|
t = t.AddDate(0, 12, 0)
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -48,6 +48,12 @@ func main() {
|
|||||||
word := aa[0]
|
word := aa[0]
|
||||||
tags := aa[1]
|
tags := aa[1]
|
||||||
lbl := aa[2]
|
lbl := aa[2]
|
||||||
|
if n := len(aa); n > 3 {
|
||||||
|
lbl = aa[n-1]
|
||||||
|
for i := 2; i < n-1; i++ {
|
||||||
|
word += "\t" + aa[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
w, ok := words[word]
|
w, ok := words[word]
|
||||||
if !ok {
|
if !ok {
|
||||||
w = &Word{
|
w = &Word{
|
||||||
@@ -87,7 +93,13 @@ func main() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
for _, w := range wordlist {
|
for _, w := range wordlist {
|
||||||
fmt.Printf("%6d\t%s\t%s\n", w.count, w.word, getTag(w.tags))
|
var tail string
|
||||||
|
i := strings.Index(w.word, "\t")
|
||||||
|
if i > 0 {
|
||||||
|
tail = w.word[i:]
|
||||||
|
w.word = w.word[:i]
|
||||||
|
}
|
||||||
|
fmt.Printf("%6d\t%s\t%s%s\n", w.count, w.word, getTag(w.tags), tail)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
24
collect.sh
24
collect.sh
@@ -62,7 +62,11 @@ do
|
|||||||
|
|
||||||
for i in 1 4
|
for i in 1 4
|
||||||
do
|
do
|
||||||
files=$(find .. $(week2files $ds $i) | grep -E "$regex")
|
files=$(find .. $(week2files $ds $i) | grep -E "$regex") || true
|
||||||
|
if [ -z "$files" ]
|
||||||
|
then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
# tellingen met tags
|
# tellingen met tags
|
||||||
|
|
||||||
@@ -86,7 +90,7 @@ do
|
|||||||
|
|
||||||
say $part-nieuwe-woorden-$ds-$i
|
say $part-nieuwe-woorden-$ds-$i
|
||||||
alto \
|
alto \
|
||||||
'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \
|
'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="decap" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \
|
||||||
'tt:%w\t%d\t%I' $files \
|
'tt:%w\t%d\t%I' $files \
|
||||||
| sed -e 's/pubdate: "[-0-9]*"//' \
|
| sed -e 's/pubdate: "[-0-9]*"//' \
|
||||||
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
|
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
|
||||||
@@ -125,18 +129,18 @@ do
|
|||||||
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
|
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
|
||||||
| items2count > $part-overige-namen-$ds-$i
|
| items2count > $part-overige-namen-$ds-$i
|
||||||
|
|
||||||
# tellingen met postags
|
# tellingen met tags en postags
|
||||||
|
|
||||||
say $part-nieuwe-woorden-extra-$ds-$i
|
say $part-nieuwe-woorden-extra-$ds-$i
|
||||||
alto \
|
alto \
|
||||||
'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="name" or @his="prefix_name" or @his_1="decap" or @
|
'fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="decap" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \
|
||||||
his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]' \
|
'tt:%w\t%d\t%l\t%P\t%I' $files \
|
||||||
'tt:%w\t%l\t%P\t%I' $files \
|
| sed -e 's/pubdate: "[-0-9]*"//' \
|
||||||
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
|
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq \
|
||||||
| sed -e 's/\(.*\)\t.*/\1/' | uniq -c \
|
| items2count > $part-nieuwe-woorden-extra-$ds-$i
|
||||||
| grep -v '^ *1 ' \
|
top20 $part-nieuwe-woorden-extra-$ds-$i
|
||||||
| sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \
|
|
||||||
> $part-nieuwe-woorden-extra-$ds-$i
|
# tellingen met postags
|
||||||
|
|
||||||
say $part-nieuwe-adjww-extra-$ds-$i
|
say $part-nieuwe-adjww-extra-$ds-$i
|
||||||
alto \
|
alto \
|
||||||
|
|||||||
4
namen.sh
4
namen.sh
@@ -76,13 +76,13 @@ case $XN in
|
|||||||
;;
|
;;
|
||||||
2)
|
2)
|
||||||
# nieuwe woorden
|
# nieuwe woorden
|
||||||
EXPR='fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]'
|
EXPR='fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="decap" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]'
|
||||||
TEMPLATE='tt:%w'
|
TEMPLATE='tt:%w'
|
||||||
XVALID=1
|
XVALID=1
|
||||||
;;
|
;;
|
||||||
3)
|
3)
|
||||||
# nieuwe woorden met postag en lemma
|
# nieuwe woorden met postag en lemma
|
||||||
EXPR='fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]'
|
EXPR='fp://node[@his and not(@rel="mwp" or @cat="mwu") and not(@his="normal" or @his="decap" or @his="name" or @his="prefix_name" or @his_1="decap" or @his_1="0" or @his="skip" or @his="robust_skip" or @his="w_dia" or @his="wo_dia" or @his="within_word_conjunct")]'
|
||||||
TEMPLATE='tt:%w\t%l\t%P'
|
TEMPLATE='tt:%w\t%l\t%P'
|
||||||
XVALID=1
|
XVALID=1
|
||||||
;;
|
;;
|
||||||
|
|||||||
27
www/app.js
27
www/app.js
@@ -56,7 +56,7 @@ function makeTD(title, values) {
|
|||||||
if (i < values.length) {
|
if (i < values.length) {
|
||||||
value = values[i]
|
value = values[i]
|
||||||
} else {
|
} else {
|
||||||
value = [0, '\xa0', '']
|
value = [0, '\xa0', '', '', '']
|
||||||
}
|
}
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
max = value[0]
|
max = value[0]
|
||||||
@@ -68,9 +68,24 @@ function makeTD(title, values) {
|
|||||||
tr.classList.add('tags')
|
tr.classList.add('tags')
|
||||||
t2 = '<br><small>' + escape(value[2]) + '</small>'
|
t2 = '<br><small>' + escape(value[2]) + '</small>'
|
||||||
}
|
}
|
||||||
|
var t3 = ''
|
||||||
|
if (value[3] && value[4]) {
|
||||||
|
t3 =
|
||||||
|
'<hr><small><em>lemma:</em> ' +
|
||||||
|
escape(value[3]) +
|
||||||
|
'<br><em>postag:</em> ' +
|
||||||
|
escape(value[4]) +
|
||||||
|
'</small>'
|
||||||
|
}
|
||||||
tr.setAttribute(
|
tr.setAttribute(
|
||||||
'onmouseover',
|
'onmouseover',
|
||||||
"tooltip.show('" + value[0] + ' \xa0 ' + escape(value[1]) + t2 + "')",
|
"tooltip.show('" +
|
||||||
|
value[0] +
|
||||||
|
' \xa0 ' +
|
||||||
|
escape(value[1]) +
|
||||||
|
t2 +
|
||||||
|
t3 +
|
||||||
|
"')",
|
||||||
)
|
)
|
||||||
tr.setAttribute('onmouseout', 'tooltip.hide()')
|
tr.setAttribute('onmouseout', 'tooltip.hide()')
|
||||||
}
|
}
|
||||||
@@ -97,7 +112,7 @@ async function loadSource(source, week) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
document.getElementById('subtitle').innerHTML =
|
document.getElementById('subtitle').innerHTML =
|
||||||
source + ' — ' + data[week].year + ' week ' + data[week].week
|
source + '<br>' + data[week].start + ' t/m ' + data[week].last
|
||||||
|
|
||||||
const d = document.createElement('div')
|
const d = document.createElement('div')
|
||||||
const tab = document.createElement('table')
|
const tab = document.createElement('table')
|
||||||
@@ -118,7 +133,7 @@ async function loadPart(part, week) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
document.getElementById('subtitle').innerHTML =
|
document.getElementById('subtitle').innerHTML =
|
||||||
part + ' — ' + data[week].year + ' week ' + data[week].week
|
part + '<br>' + data[week].start + ' t/m ' + data[week].last
|
||||||
|
|
||||||
const d = document.createElement('div')
|
const d = document.createElement('div')
|
||||||
const tab = document.createElement('table')
|
const tab = document.createElement('table')
|
||||||
@@ -148,9 +163,7 @@ async function loadWeken(source, part) {
|
|||||||
data[week] = await getJSON('DATA-' + week + '-4.json')
|
data[week] = await getJSON('DATA-' + week + '-4.json')
|
||||||
}
|
}
|
||||||
var values = data[week][source][part]
|
var values = data[week][source][part]
|
||||||
tr.appendChild(
|
tr.appendChild(makeTD('t/m ' + data[week].last, values))
|
||||||
makeTD(data[week].year + ' week ' + data[week].week, values),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -146,6 +146,6 @@ label:hover {
|
|||||||
transition: all 200ms linear;
|
transition: all 200ms linear;
|
||||||
}
|
}
|
||||||
#data.fade {
|
#data.fade {
|
||||||
background-color: #ffa54f;
|
background-color: #00fa9a;
|
||||||
transition: all 20ms linear;
|
transition: all 20ms linear;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user