gone, trends

This commit is contained in:
Peter Kleiweg
2026-06-06 17:10:38 +02:00
parent 9f29222909
commit 1f4a084624
6 changed files with 145 additions and 9 deletions

2
.gitignore vendored
View File

@@ -45,9 +45,11 @@ VRT/vrt
bin/data2json
bin/dates2json
bin/flush
bin/gone
bin/items2count
bin/rang
bin/top20
bin/trends
bin/week2files
20??
corpus

View File

@@ -21,9 +21,11 @@ all:
make bin/data2json
make bin/dates2json
make bin/flush
make bin/gone
make bin/items2count
make bin/rang
make bin/top20
make bin/trends
make bin/week2files
bin/data2json: cmd/data2json/*.go
@@ -35,6 +37,9 @@ bin/dates2json: cmd/dates2json/*.go
bin/flush: cmd/flush/*.go
go build -o $@ $^
bin/gone: cmd/gone/*.go
go build -o $@ $^
bin/items2count: cmd/items2count/*.go
go build -o $@ $^
@@ -44,6 +49,9 @@ bin/rang: cmd/rang/*.go
bin/top20: cmd/top20/*.go
go build -o $@ $^
bin/trends: cmd/trends/*.go
go build -o $@ $^
bin/week2files: cmd/week2files/*.go
go build -o $@ $^

41
cmd/gone/gone.go Normal file
View File

@@ -0,0 +1,41 @@
package main
import (
e "codeberg.org/pebbe/errors"
"bufio"
"fmt"
"os"
"strings"
)
var (
x = e.ExitErr
)
func main() {
current := make(map[string]bool)
fp, err := os.Open(os.Args[2])
x(err)
scanner := bufio.NewScanner(fp)
for scanner.Scan() {
current[strings.Split(scanner.Text(), "\t")[1]] = true
}
x(scanner.Err())
fp.Close()
var last string
fp, err = os.Open(os.Args[1])
x(err)
scanner = bufio.NewScanner(fp)
for scanner.Scan() {
aa := strings.Split(scanner.Text(), "\t")
if !current[aa[1]] {
fmt.Printf("%s\t%s\n", aa[0], aa[1])
}
last = aa[0]
}
x(scanner.Err())
fp.Close()
fmt.Printf("%s\t\n", last)
}

View File

@@ -1,5 +1,7 @@
package main
// alto 'fp://node[....]' 'tt:%w\t%I' $files | sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | rang
import (
e "codeberg.org/pebbe/errors"
@@ -59,9 +61,3 @@ func main() {
}
}
/*
alto 'fp://node[@pt="n"]' 'tt:%w\t%I' $files | sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | rang
*/

88
cmd/trends/trends.go Normal file
View File

@@ -0,0 +1,88 @@
package main
import (
e "codeberg.org/pebbe/errors"
"bufio"
"fmt"
"os"
"sort"
"strconv"
"strings"
)
type Item struct {
word string
diff float64
}
var (
x = e.ExitErr
)
func main() {
refs := make(map[string]int)
refmax := 0
fp, err := os.Open(os.Args[1])
x(err)
scanner := bufio.NewScanner(fp)
for scanner.Scan() {
aa := strings.Split(scanner.Text(), "\t")
n, err := strconv.Atoi(aa[0])
x(err)
refs[aa[1]] = n
if n > refmax {
refmax = n
}
}
x(scanner.Err())
fp.Close()
refmax++
lines := make([]string, 0)
fp, err = os.Open(os.Args[2])
x(err)
scanner = bufio.NewScanner(fp)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
x(scanner.Err())
fp.Close()
curmax, err := strconv.Atoi(strings.Split(lines[len(lines)-1], "\t")[0])
x(err)
curmax++
items := make([]Item, 0)
for _, line := range lines {
aa := strings.Split(line, "\t")
n, err := strconv.Atoi(aa[0])
x(err)
m, ok := refs[aa[1]]
if !ok {
//continue
m = refmax
}
diff := float64(m)/float64(refmax) - float64(n)/float64(curmax)
if diff > 0.05 || diff < -0.05 {
items = append(items, Item{
word: aa[1],
diff: diff,
})
}
}
sort.Slice(items, func(a, b int) bool {
if items[a].diff == items[b].diff {
return items[a].word < items[b].word
}
return items[a].diff > items[b].diff
})
for _, item := range items {
fmt.Printf("%f\t%s\n", item.diff, item.word)
}
}

View File

@@ -165,11 +165,12 @@ do
# ranglijsten
say $part-rang-noun=$ds=$i
say $part-rang-$ds-$i
alto \
'fp://node[@pt="n"]' 'tt:%w\t%I' $files \
'fp://node[((@pt="n" or @neclass) and not(@rel="mwp")) or (@cat="mwu" and .//node[@neclass])]' \
'tt:%w\t%I' $files \
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | rang \
> $part-rang-noun=$ds=$i
> $part-rang-$ds-$i
done
done