From 4b56c0cd708d017264be700444d5ef5986b22ff0 Mon Sep 17 00:00:00 2001 From: Peter Kleiweg Date: Sat, 23 May 2026 14:43:07 +0200 Subject: [PATCH] HLN, Parool: fix relatief xpath --- HLN/cmd/hln/hln.go | 12 ++++++------ Parool/cmd/parool/parool.go | 14 +++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/HLN/cmd/hln/hln.go b/HLN/cmd/hln/hln.go index e96c0a0..7cf4317 100644 --- a/HLN/cmd/hln/hln.go +++ b/HLN/cmd/hln/hln.go @@ -185,7 +185,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool article := articles[0] tags := make([]string, 0) - ell, err := article.Search(`//*[@data-content-type="LABEL"]`) + ell, err := article.Search(`.//*[@data-content-type="LABEL"]`) p(err) for _, el := range ell { s := strings.TrimSpace(el.Content()) @@ -196,7 +196,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool pars := make([]string, 0) - ell, err = article.Search(`//*[@data-content-type="TITLE"]`) + ell, err = article.Search(`.//*[@data-content-type="TITLE"]`) p(err) if len(ell) != 1 { _ = w(fmt.Errorf("found %d titles: %s", len(ell), url)) @@ -209,7 +209,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool } hasIntro := false - ell, err = article.Search(`//*[@data-content-type="INTRO"]`) + ell, err = article.Search(`.//*[@data-content-type="INTRO"]`) p(err) for _, el := range ell { s := strings.TrimSpace(el.Content()) @@ -222,17 +222,17 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool _ = w(fmt.Errorf("no intro: %s", url)) } - specials, err := article.Search(`//*[@data-content-type="GROUP"]`) + specials, err := article.Search(`.//*[@data-content-type="GROUP"]`) p(err) for _, special := range specials { special.Remove() } - other, err := article.Search(`//*[@data-content-type="PODCAST"]`) + other, err := article.Search(`.//*[@data-content-type="PODCAST"]`) p(err) hasOther := len(other) > 0 - ell, err = article.Search(`//*[@data-content-type="PARAGRAPH"]`) + ell, err = article.Search(`.//*[@data-content-type="PARAGRAPH"]`) p(err) if len(ell) == 0 && !hasOther && !hasIntro { _ = w(fmt.Errorf("no paragraphs: %s", url)) diff --git a/Parool/cmd/parool/parool.go b/Parool/cmd/parool/parool.go index c53b558..37105d0 100644 --- a/Parool/cmd/parool/parool.go +++ b/Parool/cmd/parool/parool.go @@ -179,7 +179,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool } article := articles[0] - live, err := article.Search(`//*[@data-test-id="live-blog-label"]`) + live, err := article.Search(`.//*[@data-test-id="live-blog-label"]`) p(err) if len(live) > 0 { fp, err := os.Create(filename + ".skip") @@ -189,7 +189,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool return true } - headers, err := article.Search(`//header`) + headers, err := article.Search(`.//header`) p(err) if len(headers) == 0 { _ = w(fmt.Errorf("no header: %s", url)) @@ -212,7 +212,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool header := headers[0] tags := make([]string, 0) - ell, err := header.Search(`//*[@data-test-id="article-label"]`) + ell, err := header.Search(`.//*[@data-test-id="article-label"]`) p(err) if len(ell) == 0 { _ = w(fmt.Errorf("no labels: %s", url)) @@ -226,7 +226,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool pars := make([]string, 0) - ell, err = header.Search(`//*[@data-test-id="article-title"]`) + ell, err = header.Search(`.//*[@data-test-id="article-title"]`) p(err) if len(ell) != 1 { _ = w(fmt.Errorf("found %d titles: %s", len(ell), url)) @@ -239,7 +239,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool } found := false - ell, err = header.Search(`//*[@data-test-id="header-intro"]`) + ell, err = header.Search(`.//*[@data-test-id="header-intro"]`) p(err) for _, el := range ell { s := strings.TrimSpace(el.Content()) @@ -252,13 +252,13 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool _ = w(fmt.Errorf("no intro: %s", url)) } - specials, err := article.Search(`//section//aside | //section//figure | //section//b`) + specials, err := article.Search(`.//section//aside | .//section//figure | .//section//b`) p(err) for _, special := range specials { special.Remove() } - ell, err = article.Search(`//section//*[@data-article-element-index]`) + ell, err = article.Search(`.//section//*[@data-article-element-index]`) p(err) if len(ell) == 0 { _ = w(fmt.Errorf("no elements: %s", url))