Files
nlnieuws/oud/xquery/new2old.go
2026-05-29 12:22:57 +02:00

67 lines
1.2 KiB
Go

package main
import (
e "codeberg.org/pebbe/errors"
"bufio"
"encoding/xml"
"fmt"
"os"
"regexp"
"strings"
)
type Item struct {
XMLName xml.Name `xml:"i"`
Msg string `xml:"m"`
Tags []string `xml:"t"`
Word string `xml:"w"`
}
var (
x = e.ExitErr
reTag = regexp.MustCompile(`tag: "((?:\\.|[^\\"])*)"`)
reUnquote = regexp.MustCompile(`\\.`)
)
func main() {
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
line := scanner.Text()
aa := strings.Split(line, "\t")
item := Item{
// Msg: aa[2][:strings.LastIndex(aa[2], ".")],
Msg: aa[2],
Word: aa[0],
Tags: make([]string, 0),
}
for _, tag := range parseTags(aa[1]) {
item.Tags = append(item.Tags, tag)
}
b, err := xml.Marshal(item)
x(err)
fmt.Println(
strings.ReplaceAll(
strings.ReplaceAll(string(b), "'", "'"),
""", `"`))
}
x(scanner.Err())
}
func parseTags(s string) []string {
tags := make([]string, 0)
aa := reTag.FindAllStringSubmatch(s, -1)
for _, a := range aa {
tags = append(tags, unquote(a[1]))
}
return tags
}
func unquote(text string) string {
return reUnquote.ReplaceAllStringFunc(text, func(s string) string {
return s[1:]
})
}