geen xquery meer gebruikt, te traag
This commit is contained in:
@@ -5,20 +5,13 @@ import (
|
||||
// "github.com/kr/pretty"
|
||||
|
||||
"bufio"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Item struct {
|
||||
XMLName xml.Name `xml:"i"`
|
||||
Msg string `xml:"m"`
|
||||
Tags []string `xml:"t"`
|
||||
Word string `xml:"w"`
|
||||
}
|
||||
|
||||
type Word struct {
|
||||
word string
|
||||
sortkey string
|
||||
@@ -33,8 +26,10 @@ type Tag struct {
|
||||
}
|
||||
|
||||
var (
|
||||
x = e.ExitErr
|
||||
words = make(map[string]*Word)
|
||||
x = e.ExitErr
|
||||
words = make(map[string]*Word)
|
||||
reTag = regexp.MustCompile(`tag: "((?:\\.|[^\\"])*)"`)
|
||||
reUnquote = regexp.MustCompile(`\\.`)
|
||||
|
||||
ignore = map[string]bool{
|
||||
"Algemeen": true,
|
||||
@@ -48,26 +43,28 @@ func main() {
|
||||
|
||||
scanner := bufio.NewScanner(os.Stdin)
|
||||
for scanner.Scan() {
|
||||
var item Item
|
||||
line := scanner.Text()
|
||||
x(xml.Unmarshal([]byte(line), &item))
|
||||
w, ok := words[item.Word]
|
||||
aa := strings.Split(line, "\t")
|
||||
word := aa[0]
|
||||
tags := aa[1]
|
||||
lbl := aa[2]
|
||||
w, ok := words[word]
|
||||
if !ok {
|
||||
w = &Word{
|
||||
word: item.Word,
|
||||
sortkey: strings.ToLower(item.Word),
|
||||
word: word,
|
||||
sortkey: strings.ToLower(word),
|
||||
tags: make(map[string]map[string]int),
|
||||
}
|
||||
words[item.Word] = w
|
||||
words[word] = w
|
||||
}
|
||||
w.count++
|
||||
lbl := item.Msg[:strings.Index(item.Msg, ".")]
|
||||
for _, tag := range item.Tags {
|
||||
lbl = lbl[:strings.Index(lbl, ".")]
|
||||
for _, tag := range parseTags(tags) {
|
||||
if !ignore[tag] {
|
||||
if _, ok := w.tags[lbl]; !ok {
|
||||
w.tags[lbl] = make(map[string]int)
|
||||
}
|
||||
if tag != item.Word {
|
||||
if tag != word {
|
||||
w.tags[lbl][tag] = w.tags[lbl][tag] + 1
|
||||
}
|
||||
}
|
||||
@@ -95,6 +92,15 @@ func main() {
|
||||
|
||||
}
|
||||
|
||||
func parseTags(s string) []string {
|
||||
tags := make([]string, 0)
|
||||
aa := reTag.FindAllStringSubmatch(s, -1)
|
||||
for _, a := range aa {
|
||||
tags = append(tags, unquote(a[1]))
|
||||
}
|
||||
return tags
|
||||
}
|
||||
|
||||
func getTag(tags map[string]map[string]int) string {
|
||||
|
||||
all := make([]Tag, 0)
|
||||
@@ -155,3 +161,9 @@ func getTag(tags map[string]map[string]int) string {
|
||||
|
||||
return strings.Join(aa, ", ")
|
||||
}
|
||||
|
||||
func unquote(text string) string {
|
||||
return reUnquote.ReplaceAllStringFunc(text, func(s string) string {
|
||||
return s[1:]
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user