From 14590570bacbc49b9595e0a1a5e3fef0ce37a8a0 Mon Sep 17 00:00:00 2001 From: Peter Kleiweg Date: Fri, 29 May 2026 17:22:10 +0200 Subject: [PATCH] fix voor zinnen aan elkaar --- AT5/cmd/xml2txt/xml2txt.go | 2 +- BuurtAdam/cmd/buurtadam/buurtadam.go | 2 ++ BuurtGrn/cmd/buurtgrn/buurtgrn.go | 2 ++ GG/cmd/gg/gg.go | 2 ++ HLN/cmd/hln/hln.go | 2 ++ LitNL/cmd/xml2txt/xml2txt.go | 2 +- NOS/cmd/json2txt/json2txt.go | 4 +++- NU/cmd/nu/nu.go | 19 ++++-------------- NieuwsNL/cmd/nieuwsnl/nieuwsnl.go | 2 ++ Oog/cmd/xml2txt/xml2txt.go | 2 +- Parool/cmd/parool/parool.go | 2 ++ RO/cmd/xml2txt/xml2txt.go | 2 +- RTVNoord/cmd/rtvnoord/rtvnoord.go | 6 ++---- Sargasso/cmd/xml2txt/xml2txt.go | 2 +- Sikkom/cmd/sikkom/sikkom.go | 2 ++ Tzum/cmd/xml2txt/xml2txt.go | 2 +- VRT/cmd/vrt/vrt.go | 2 ++ internal/util/util.go | 29 ++++++++++++++++++++++------ 18 files changed, 54 insertions(+), 32 deletions(-) diff --git a/AT5/cmd/xml2txt/xml2txt.go b/AT5/cmd/xml2txt/xml2txt.go index 3f26994..eb3548b 100644 --- a/AT5/cmd/xml2txt/xml2txt.go +++ b/AT5/cmd/xml2txt/xml2txt.go @@ -58,7 +58,7 @@ func main() { var item Item x(xml.Unmarshal(b, &item), filename) x(fp.WriteString(u.AddEnd(u.FixSpace(item.Title)))) - doc, err := gokogiri.ParseHtml([]byte(`` + item.Text + ``)) + doc, err := gokogiri.ParseHtml([]byte(`` + u.HtmlFixString(item.Text) + ``)) x(err) root := doc.Root() pp, err := root.Search(`//body/p | //body/h2`) diff --git a/BuurtAdam/cmd/buurtadam/buurtadam.go b/BuurtAdam/cmd/buurtadam/buurtadam.go index 8d7dee5..30ecb31 100644 --- a/BuurtAdam/cmd/buurtadam/buurtadam.go +++ b/BuurtAdam/cmd/buurtadam/buurtadam.go @@ -159,6 +159,8 @@ func doArticle(filename string, url string, title string, timestamp time.Time, n p(err) p(resp.Body.Close()) + body = u.HtmlFix(body) + doc, err := gokogiri.ParseHtml(body) p(err) diff --git a/BuurtGrn/cmd/buurtgrn/buurtgrn.go b/BuurtGrn/cmd/buurtgrn/buurtgrn.go index 3e27ac8..8bf4c62 100644 --- a/BuurtGrn/cmd/buurtgrn/buurtgrn.go +++ b/BuurtGrn/cmd/buurtgrn/buurtgrn.go @@ -158,6 +158,8 @@ func doArticle(filename string, url string, title string, timestamp time.Time, n p(err) p(resp.Body.Close()) + body = u.HtmlFix(body) + doc, err := gokogiri.ParseHtml(body) p(err) diff --git a/GG/cmd/gg/gg.go b/GG/cmd/gg/gg.go index 1a8c19a..7ed7e51 100644 --- a/GG/cmd/gg/gg.go +++ b/GG/cmd/gg/gg.go @@ -155,6 +155,8 @@ func doArticle(filename string, url string, title string, timestamp time.Time, n p(err) p(resp.Body.Close()) + body = u.HtmlFix(body) + doc, err := gokogiri.ParseHtml(body) p(err) diff --git a/HLN/cmd/hln/hln.go b/HLN/cmd/hln/hln.go index 30bc851..eea31b3 100644 --- a/HLN/cmd/hln/hln.go +++ b/HLN/cmd/hln/hln.go @@ -168,6 +168,8 @@ func doArticle(filename string, url string, title string, timestamp time.Time, n p(err) p(resp.Body.Close()) + body = u.HtmlFix(body) + /* s := string(body) ok = true diff --git a/LitNL/cmd/xml2txt/xml2txt.go b/LitNL/cmd/xml2txt/xml2txt.go index 45ca364..b55eb1a 100644 --- a/LitNL/cmd/xml2txt/xml2txt.go +++ b/LitNL/cmd/xml2txt/xml2txt.go @@ -63,7 +63,7 @@ func main() { x(fmt.Fprintf(fp, "##META text tag = %s\n", u.FixSpace(cat))) } x(fp.WriteString(u.AddEnd(u.FixSpace(item.Title)))) - doc, err := gokogiri.ParseHtml([]byte(`` + item.Text + ``)) + doc, err := gokogiri.ParseHtml([]byte(`` + u.HtmlFixString(item.Text) + ``)) x(err) root := doc.Root() pp, err := root.Search(`//body//p`) diff --git a/NOS/cmd/json2txt/json2txt.go b/NOS/cmd/json2txt/json2txt.go index 36e377c..1b7b20c 100644 --- a/NOS/cmd/json2txt/json2txt.go +++ b/NOS/cmd/json2txt/json2txt.go @@ -69,7 +69,9 @@ func main() { x(fmt.Fprintf(fp, "##META text tag = %s\n", u.FixSpace(tag))) } x(fp.WriteString(u.AddEnd(u.FixSpace(item.Title)))) - x(fp.WriteString(u.FixSpace(item.Text))) + for _, line := range strings.SplitAfter(item.Text, "\n") { + x(fp.WriteString(u.AddEnd(u.FixSpace(line, true)))) + } x(fp.Close()) } } diff --git a/NU/cmd/nu/nu.go b/NU/cmd/nu/nu.go index de469a5..bfc5195 100644 --- a/NU/cmd/nu/nu.go +++ b/NU/cmd/nu/nu.go @@ -161,6 +161,8 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool p(err) p(resp.Body.Close()) + body = u.HtmlFix(body) + s := string(body) ok := true i1 := strings.Index(s, `