top20.go: aangepast aan grote reorganisatie

This commit is contained in:
Peter Kleiweg
2026-05-28 02:36:55 +02:00
parent 7f23212fc3
commit e53049e62f

View File

@@ -11,7 +11,7 @@ import (
var (
x = e.ExitErr
reFile = regexp.MustCompile(`(.*)(2[0-9][0-9][0-9]-[0-5][0-9])(.*)`)
reFile = regexp.MustCompile(`(.*)(2[0-9][0-9][0-9]\.[0-5][0-9])(.*)`)
seen = make(map[string]bool)
)
@@ -23,21 +23,30 @@ func main() {
suffix := m[3] + ".t20"
target := infile + ".t20"
x(os.Chdir("/net/corpora/nlnieuws/data"))
files, err := os.ReadDir(".")
dirs, err := os.ReadDir("..")
x(err)
for _, file := range files {
name := file.Name()
if strings.HasPrefix(name, prefix) && strings.HasSuffix(name, suffix) && name < target {
fp, err := os.Open(name)
x(err)
scanner := bufio.NewScanner(fp)
for scanner.Scan() {
seen[strings.Split(scanner.Text(), "\t")[1]] = true
for _, dir := range dirs {
if !dir.IsDir() {
continue
}
dirname := dir.Name()
if dirname[0] != '2' {
continue
}
files, err := os.ReadDir("../" + dirname)
x(err)
for _, file := range files {
name := file.Name()
if strings.HasPrefix(name, prefix) && strings.HasSuffix(name, suffix) && name < target {
fp, err := os.Open("../" + dirname + "/" + name)
x(err)
scanner := bufio.NewScanner(fp)
for scanner.Scan() {
seen[strings.Split(scanner.Text(), "\t")[1]] = true
}
x(scanner.Err())
x(fp.Close())
}
x(scanner.Err())
x(fp.Close())
}
}