package main import ( e "codeberg.org/pebbe/errors" // "github.com/kr/pretty" "bufio" "encoding/xml" "fmt" "os" "sort" "strings" ) type Item struct { XMLName xml.Name `xml:"i"` Msg string `xml:"m"` Tags []string `xml:"t"` Word string `xml:"w"` } type Word struct { word string sortkey string count int tags map[string]map[string]int } type Tag struct { tag string sortkey string count int } var ( x = e.ExitErr words = make(map[string]*Word) ignore = map[string]bool{ "Algemeen": true, "Artikelen": true, "Nieuws": true, "Recensies": true, } ) func main() { scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { var item Item line := scanner.Text() x(xml.Unmarshal([]byte(line), &item)) w, ok := words[item.Word] if !ok { w = &Word{ word: item.Word, sortkey: strings.ToLower(item.Word), tags: make(map[string]map[string]int), } words[item.Word] = w } w.count++ lbl := item.Msg[:strings.Index(item.Msg, ".")] for _, tag := range item.Tags { if !ignore[tag] { if _, ok := w.tags[lbl]; !ok { w.tags[lbl] = make(map[string]int) } if tag != item.Word { w.tags[lbl][tag] = w.tags[lbl][tag] + 1 } } } } x(scanner.Err()) wordlist := make([]*Word, 0, len(words)) for _, value := range words { if value.count > 1 { wordlist = append(wordlist, value) } } sort.Slice(wordlist, func(a, b int) bool { if wordlist[a].count != wordlist[b].count { return wordlist[a].count > wordlist[b].count } return wordlist[a].sortkey < wordlist[b].sortkey }) for _, w := range wordlist { fmt.Printf("%6d\t%s\t%s\n", w.count, w.word, getTag(w.tags)) } } func getTag(tags map[string]map[string]int) string { all := make([]Tag, 0) for _, tagv := range tags { n := 0 tt := make([]string, 0) for key, value := range tagv { if value > n { n = value tt = []string{key} } else if value == n { tt = append(tt, key) } } for _, t := range tt { all = append(all, Tag{tag: t, count: n, sortkey: strings.ToLower(t)}) } } sort.Slice(all, func(a, b int) bool { if all[a].count != all[b].count { return all[a].count > all[b].count } if all[a].sortkey != all[b].sortkey { return all[a].sortkey < all[b].sortkey } return all[a].tag < all[b].tag }) needSort := false for i := 1; i < len(all); i++ { if all[i-1].sortkey == all[i].sortkey { all[i-1].count += all[i].count all = append(all[:i], all[i+1:]...) i-- needSort = true } } if needSort { sort.Slice(all, func(a, b int) bool { if all[a].count != all[b].count { return all[a].count > all[b].count } if all[a].sortkey != all[b].sortkey { return all[a].sortkey < all[b].sortkey } return all[a].tag < all[b].tag }) } aa := make([]string, 0, len(all)) for _, n := range all { if n.count > 1 { aa = append(aa, n.tag) } } return strings.Join(aa, ", ") }