diff --git a/.gitignore b/.gitignore index 713ab78..61ba594 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,7 @@ bin/data2json bin/dates2json bin/flush bin/items2count +bin/rang bin/top20 bin/week2files 20?? diff --git a/Makefile b/Makefile index 0513cd9..ff35e3e 100644 --- a/Makefile +++ b/Makefile @@ -22,6 +22,7 @@ all: make bin/dates2json make bin/flush make bin/items2count + make bin/rang make bin/top20 make bin/week2files @@ -37,6 +38,9 @@ bin/flush: cmd/flush/*.go bin/items2count: cmd/items2count/*.go go build -o $@ $^ +bin/rang: cmd/rang/*.go + go build -o $@ $^ + bin/top20: cmd/top20/*.go go build -o $@ $^ diff --git a/cmd/rang/rang.go b/cmd/rang/rang.go new file mode 100644 index 0000000..daa3368 --- /dev/null +++ b/cmd/rang/rang.go @@ -0,0 +1,67 @@ +package main + +import ( + e "codeberg.org/pebbe/errors" + + "bufio" + "fmt" + "os" + "sort" + "strings" +) + +var ( + x = e.ExitErr +) + +type Item struct { + word string + count int +} + +func main() { + + counts := make(map[string]int) + + scanner := bufio.NewScanner(os.Stdin) + for scanner.Scan() { + word := strings.Split(scanner.Text(), "\t")[0] + counts[word] = counts[word] + 1 + } + x(scanner.Err()) + + items := make([]Item, 0) + for key, value := range counts { + items = append(items, Item{ + word: key, + count: value, + }) + } + + sort.Slice(items, func(a, b int) bool { + if items[a].count == items[b].count { + return items[a].word < items[b].word + } + return items[a].count > items[b].count + }) + + rang := 0 + prev := 0 + for _, item := range items { + if item.count < 2 { + break + } + if item.count != prev { + rang++ + prev = item.count + } + fmt.Printf("%d\t%s\n", rang, item.word) + } + +} + +/* + +alto 'fp://node[@pt="n"]' 'tt:%w\t%I' $files | sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | rang + +*/ diff --git a/collect.sh b/collect.sh index 056ba1b..ac25118 100755 --- a/collect.sh +++ b/collect.sh @@ -163,6 +163,14 @@ do | sed -e 's/\([0-9]\) */\1\t/' | sort -f -k 2 | sort -n -r -k 1,1 -s \ > $part-nieuwe-adjww-extra-$ds-$i + # ranglijsten + + say $part-rang-noun=$ds=$i + alto \ + 'fp://node[@pt="n"]' 'tt:%w\t%I' $files \ + | sed -e 's/\.[0-9][0-9]*$//' | sort | uniq | rang \ + > $part-rang-noun=$ds=$i + done done