HLN: ontbrekende paragrafen

This commit is contained in:
Peter Kleiweg
2026-05-23 13:49:39 +02:00
parent 0a43773ec8
commit c0335f5b57

View File

@@ -208,17 +208,17 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
}
}
found := false
hasIntro := false
ell, err = article.Search(`//*[@data-content-type="INTRO"]`)
p(err)
for _, el := range ell {
s := strings.TrimSpace(el.Content())
if s != "" {
pars = append(pars, s)
found = true
hasIntro = true
}
}
if !found {
if !hasIntro {
_ = w(fmt.Errorf("no intro: %s", url))
}
@@ -234,7 +234,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
ell, err = article.Search(`//*[@data-content-type="PARAGRAPH"]`)
p(err)
if len(ell) == 0 && !hasOther {
if len(ell) == 0 && !hasOther && !hasIntro {
_ = w(fmt.Errorf("no paragraphs: %s", url))
fp, err := os.Create(filename + ".err")
@@ -252,16 +252,16 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
return false
}
found = false
hasPar := false
for _, el := range ell {
s := strings.TrimSpace(el.Content())
if s != "" {
pars = append(pars, s)
found = true
hasPar = true
}
}
if !found {
if !hasOther {
if !hasPar {
if !hasOther && !hasIntro {
_ = w(fmt.Errorf("no text, skipping: %s", url))
}
fp, err := os.Create(filename + ".skip")