diff --git a/HLN/cmd/hln/hln.go b/HLN/cmd/hln/hln.go index eea31b3..6209cad 100644 --- a/HLN/cmd/hln/hln.go +++ b/HLN/cmd/hln/hln.go @@ -278,8 +278,8 @@ func doArticle(filename string, url string, title string, timestamp time.Time, n specials, err := article.Search(`.//*[@data-content-type="GROUP"]`) p(err) - for _, special := range specials { - special.Remove() + for i := len(specials) - 1; i >= 0; i-- { + specials[i].Remove() } other, err := article.Search(`.//*[@data-content-type="PODCAST"]`) diff --git a/Parool/cmd/parool/parool.go b/Parool/cmd/parool/parool.go index b446953..294ff4c 100644 --- a/Parool/cmd/parool/parool.go +++ b/Parool/cmd/parool/parool.go @@ -313,10 +313,10 @@ func doArticle(filename string, url string, title string, timestamp time.Time, n _ = w(fmt.Errorf("no intro: %s", url)) } - specials, err := article.Search(`.//section//aside | .//section//figure | .//section//b`) + specials, err := article.Search(`.//aside | .//figure | .//figcaption | .//section//b`) p(err) - for _, special := range specials { - special.Remove() + for i := len(specials) - 1; i >= 0; i-- { + specials[i].Remove() } ell, err = article.Search(`.//section//*[@data-article-element-index]`) diff --git a/RO/cmd/xml2txt/xml2txt.go b/RO/cmd/xml2txt/xml2txt.go index b7618eb..dbb72c3 100644 --- a/RO/cmd/xml2txt/xml2txt.go +++ b/RO/cmd/xml2txt/xml2txt.go @@ -69,8 +69,8 @@ func main() { root := doc.Root() divs, err := root.Search(`//div[@class="donatieformlinks"]`) x(err) - for _, div := range divs { - div.Remove() + for i := len(divs) - 1; i >= 0; i-- { + divs[i].Remove() } pp, err := root.Search(`//body//p[not(.//a[contains(@href,"reportersonline.nl/support")])]`) x(err) diff --git a/Volkskrant/cmd/volkskrant/volkskrant.go b/Volkskrant/cmd/volkskrant/volkskrant.go index 0e25ffe..f6b0b8a 100644 --- a/Volkskrant/cmd/volkskrant/volkskrant.go +++ b/Volkskrant/cmd/volkskrant/volkskrant.go @@ -315,10 +315,10 @@ func doArticle(filename string, url string, title string, timestamp time.Time, n _ = w(fmt.Errorf("no intro: %s", url)) } - specials, err := article.Search(`.//section//aside | .//section//figure | .//section//b`) + specials, err := article.Search(`.//aside | .//figure | .//figcaption | .//section//b`) p(err) - for _, special := range specials { - special.Remove() + for i := len(specials) - 1; i >= 0; i-- { + specials[i].Remove() } ell, err = article.Search(`.//section//*[@data-article-element-index]`)