gone, trends
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -45,9 +45,11 @@ VRT/vrt
|
||||
bin/data2json
|
||||
bin/dates2json
|
||||
bin/flush
|
||||
bin/gone
|
||||
bin/items2count
|
||||
bin/rang
|
||||
bin/top20
|
||||
bin/trends
|
||||
bin/week2files
|
||||
20??
|
||||
corpus
|
||||
|
||||
8
Makefile
8
Makefile
@@ -21,9 +21,11 @@ all:
|
||||
make bin/data2json
|
||||
make bin/dates2json
|
||||
make bin/flush
|
||||
make bin/gone
|
||||
make bin/items2count
|
||||
make bin/rang
|
||||
make bin/top20
|
||||
make bin/trends
|
||||
make bin/week2files
|
||||
|
||||
bin/data2json: cmd/data2json/*.go
|
||||
@@ -35,6 +37,9 @@ bin/dates2json: cmd/dates2json/*.go
|
||||
bin/flush: cmd/flush/*.go
|
||||
go build -o $@ $^
|
||||
|
||||
bin/gone: cmd/gone/*.go
|
||||
go build -o $@ $^
|
||||
|
||||
bin/items2count: cmd/items2count/*.go
|
||||
go build -o $@ $^
|
||||
|
||||
@@ -44,6 +49,9 @@ bin/rang: cmd/rang/*.go
|
||||
bin/top20: cmd/top20/*.go
|
||||
go build -o $@ $^
|
||||
|
||||
bin/trends: cmd/trends/*.go
|
||||
go build -o $@ $^
|
||||
|
||||
bin/week2files: cmd/week2files/*.go
|
||||
go build -o $@ $^
|
||||
|
||||
|
||||
41
cmd/gone/gone.go
Normal file
41
cmd/gone/gone.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
e "codeberg.org/pebbe/errors"
|
||||
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
x = e.ExitErr
|
||||
)
|
||||
|
||||
func main() {
|
||||
current := make(map[string]bool)
|
||||
fp, err := os.Open(os.Args[2])
|
||||
x(err)
|
||||
scanner := bufio.NewScanner(fp)
|
||||
for scanner.Scan() {
|
||||
current[strings.Split(scanner.Text(), "\t")[1]] = true
|
||||
}
|
||||
x(scanner.Err())
|
||||
fp.Close()
|
||||
|
||||
var last string
|
||||
fp, err = os.Open(os.Args[1])
|
||||
x(err)
|
||||
scanner = bufio.NewScanner(fp)
|
||||
for scanner.Scan() {
|
||||
aa := strings.Split(scanner.Text(), "\t")
|
||||
if !current[aa[1]] {
|
||||
fmt.Printf("%s\t%s\n", aa[0], aa[1])
|
||||
}
|
||||
last = aa[0]
|
||||
}
|
||||
x(scanner.Err())
|
||||
fp.Close()
|
||||
fmt.Printf("%s\t\n", last)
|
||||
}
|
||||
@@ -1,5 +1,7 @@
|
||||
package main
|
||||
|
||||
// alto 'fp://node[....]' 'tt:%w\t%I' $files | sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | rang
|
||||
|
||||
import (
|
||||
e "codeberg.org/pebbe/errors"
|
||||
|
||||
@@ -59,9 +61,3 @@ func main() {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
alto 'fp://node[@pt="n"]' 'tt:%w\t%I' $files | sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | rang
|
||||
|
||||
*/
|
||||
|
||||
88
cmd/trends/trends.go
Normal file
88
cmd/trends/trends.go
Normal file
@@ -0,0 +1,88 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
e "codeberg.org/pebbe/errors"
|
||||
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Item struct {
|
||||
word string
|
||||
diff float64
|
||||
}
|
||||
|
||||
var (
|
||||
x = e.ExitErr
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
refs := make(map[string]int)
|
||||
refmax := 0
|
||||
fp, err := os.Open(os.Args[1])
|
||||
x(err)
|
||||
scanner := bufio.NewScanner(fp)
|
||||
for scanner.Scan() {
|
||||
aa := strings.Split(scanner.Text(), "\t")
|
||||
n, err := strconv.Atoi(aa[0])
|
||||
x(err)
|
||||
refs[aa[1]] = n
|
||||
if n > refmax {
|
||||
refmax = n
|
||||
}
|
||||
}
|
||||
x(scanner.Err())
|
||||
fp.Close()
|
||||
refmax++
|
||||
|
||||
lines := make([]string, 0)
|
||||
fp, err = os.Open(os.Args[2])
|
||||
x(err)
|
||||
scanner = bufio.NewScanner(fp)
|
||||
for scanner.Scan() {
|
||||
lines = append(lines, scanner.Text())
|
||||
}
|
||||
x(scanner.Err())
|
||||
fp.Close()
|
||||
|
||||
curmax, err := strconv.Atoi(strings.Split(lines[len(lines)-1], "\t")[0])
|
||||
x(err)
|
||||
curmax++
|
||||
|
||||
items := make([]Item, 0)
|
||||
|
||||
for _, line := range lines {
|
||||
aa := strings.Split(line, "\t")
|
||||
n, err := strconv.Atoi(aa[0])
|
||||
x(err)
|
||||
m, ok := refs[aa[1]]
|
||||
if !ok {
|
||||
//continue
|
||||
m = refmax
|
||||
}
|
||||
diff := float64(m)/float64(refmax) - float64(n)/float64(curmax)
|
||||
if diff > 0.05 || diff < -0.05 {
|
||||
items = append(items, Item{
|
||||
word: aa[1],
|
||||
diff: diff,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
sort.Slice(items, func(a, b int) bool {
|
||||
if items[a].diff == items[b].diff {
|
||||
return items[a].word < items[b].word
|
||||
}
|
||||
return items[a].diff > items[b].diff
|
||||
})
|
||||
|
||||
for _, item := range items {
|
||||
fmt.Printf("%f\t%s\n", item.diff, item.word)
|
||||
}
|
||||
|
||||
}
|
||||
@@ -165,11 +165,12 @@ do
|
||||
|
||||
# ranglijsten
|
||||
|
||||
say $part-rang-noun=$ds=$i
|
||||
say $part-rang-$ds-$i
|
||||
alto \
|
||||
'fp://node[@pt="n"]' 'tt:%w\t%I' $files \
|
||||
'fp://node[((@pt="n" or @neclass) and not(@rel="mwp")) or (@cat="mwu" and .//node[@neclass])]' \
|
||||
'tt:%w\t%I' $files \
|
||||
| sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | rang \
|
||||
> $part-rang-noun=$ds=$i
|
||||
> $part-rang-$ds-$i
|
||||
|
||||
done
|
||||
done
|
||||
|
||||
Reference in New Issue
Block a user