package main import ( e "codeberg.org/pebbe/errors" "bufio" "encoding/json" "fmt" "os" "strconv" "strings" "time" ) type Data struct { Year int `json:"year"` Week int `json:"week"` First string `json:"first"` Last string `json:"last"` Period int `json:"period"` Start string `json:"start"` Max int `json:"max"` Sources map[string]int `json:"sources"` Algemeen *Parts `json:"Algemeen"` Groningen *Parts `json:"Groningen"` Amsterdam *Parts `json:"Amsterdam"` Literatuur *Parts `json:"Literatuur"` Vlaanderen *Parts `json:"Vlaanderen"` } type Parts struct { NieuweNamen [][5]any `json:"nieuwe namen"` NieuweWoorden [][5]any `json:"nieuwe woorden"` Personen [][5]any `json:"personen"` AndereNamen [][5]any `json:"andere namen"` Locaties [][5]any `json:"locaties"` Organisaties [][5]any `json:"organisaties"` } var ( sources = map[string]string{ "Algemeen": "algemeen", "Amsterdam": "amsterdam", "Groningen": "groningen", "Literatuur": "literatuur", "Vlaanderen": "vlaanderen", } parts = map[string]struct { file string suffix string }{ "nieuwe namen": {"nieuwe-namen", ".t20"}, "nieuwe woorden": {"nieuwe-woorden-extra", ".t20"}, "personen": {"personen", ""}, "andere namen": {"overige-namen", ""}, "locaties": {"locaties", ""}, "organisaties": {"organisaties", ""}, } maanden = strings.Fields("x januari februari maart april mei juni juli augustus september oktober november december") dagen = strings.Fields("zondag maandag dinsdag woensdag donderdag vrijdag zaterdag") year int week int size int x = e.ExitErr ) func main() { aa := strings.Split(os.Args[1], "-") if len(aa) != 2 { x(fmt.Errorf("ongeldig argument, moet in formaat yyyy-dd zijn")) } var err error year, err = strconv.Atoi(aa[0]) x(err) week, err = strconv.Atoi(aa[1]) x(err) size, err = strconv.Atoi(os.Args[2]) x(err) if year < 1000 || year > 9999 { x(fmt.Errorf("ongeldig year: %d", year)) } if week < 1 || week > 53 { x(fmt.Errorf("ongeldige week: %d", week)) } start, first, last, names := dates() max, sources := makeCounts(names) data := &Data{ Year: year, Week: week, First: first, Last: last, Period: size, Start: start, Max: max, Sources: sources, Algemeen: makeParts("Algemeen"), Groningen: makeParts("Groningen"), Amsterdam: makeParts("Amsterdam"), Literatuur: makeParts("Literatuur"), Vlaanderen: makeParts("Vlaanderen"), } b, err := json.Marshal(data) x(err) fmt.Println(string(b)) } func makeParts(source string) *Parts { return &Parts{ NieuweNamen: makeValues(source, "nieuwe namen"), NieuweWoorden: makeValues(source, "nieuwe woorden"), Personen: makeValues(source, "personen"), AndereNamen: makeValues(source, "andere namen"), Locaties: makeValues(source, "locaties"), Organisaties: makeValues(source, "organisaties"), } } func makeValues(source, part string) [][5]any { v := make([][5]any, 0) filename := fmt.Sprintf("/net/corpora/nlnieuws/data/%s-%s-%d-%02d-%d%s", sources[source], parts[part].file, year, week, size, parts[part].suffix) fp, err := os.Open(filename) x(err) scanner := bufio.NewScanner(fp) lineno := 0 for scanner.Scan() { lineno++ line := scanner.Text() aa := strings.Split(line, "\t") count, err := strconv.Atoi(strings.TrimSpace(aa[0])) x(err) word := aa[1] var tags, lemma, postag string if len(aa) > 2 { tags = aa[2] } if len(aa) > 3 { lemma = aa[3] } if len(aa) > 4 { postag = aa[4] } v = append(v, [5]any{count, word, tags, lemma, postag}) if lineno == 20 { break } } x(scanner.Err()) return v } func makeCounts(names []string) (int, map[string]int) { max := 0 counts := make(map[string]int) x(os.Chdir("/net/corpora/nlnieuws")) files, err := os.ReadDir(".") x(err) for _, file := range files { if !file.IsDir() { continue } filename := file.Name() if filename[0] < 'A' || filename[0] > 'Z' { continue } count := 0 for _, name := range names { files2, err := os.ReadDir(filename + "/" + name) if err != nil { continue } for _, f := range files2 { if n := f.Name(); strings.HasSuffix(n, ".xml") { count++ } else if strings.HasSuffix(n, ".skip") { count-- } } } counts[filename] = count if count > max { max = count } } return max, counts } func dates() (start, first, last string, names []string) { // 1 januari t := time.Date(year, 1, 1, 12, 0, 0, 0, time.UTC) // zoek eerste donderdag day := int(t.Weekday()) // 0 = zondag donderdag := 4 - day if donderdag < 0 { donderdag = donderdag + 7 } // schuif naar maandag voor eerste donderdag: donderdag - 3 // in de gewenste week: 7 * (week - 1) t = t.AddDate(0, 0, donderdag-3+7*(week-1)) t2 := t.AddDate(0, 0, 6) tStart := t.AddDate(0, 0, (1-size)*7) names = make([]string, 0) t3 := tStart for range size { y, w := t3.ISOWeek() names = append(names, fmt.Sprintf("%d/%02d", y, w)) t3 = t3.AddDate(0, 0, 7) } t3 = tStart for range 7 * size { names = append(names, fmt.Sprintf("%d/%02d/%02d", t3.Year(), t3.Month(), t3.Day())) t3 = t3.AddDate(0, 0, 1) } return makeDate(tStart), makeDate(t), makeDate(t2), names } func makeDate(d time.Time) string { return fmt.Sprintf("%s %d %s %d", dagen[d.Weekday()][:2], d.Day(), maanden[int(d.Month())][:3], d.Year()) }