HLN: ontbrekende paragrafen

This commit is contained in:
Peter Kleiweg
2026-05-23 13:49:39 +02:00
parent 0a43773ec8
commit c0335f5b57

View File

@@ -208,17 +208,17 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
} }
} }
found := false hasIntro := false
ell, err = article.Search(`//*[@data-content-type="INTRO"]`) ell, err = article.Search(`//*[@data-content-type="INTRO"]`)
p(err) p(err)
for _, el := range ell { for _, el := range ell {
s := strings.TrimSpace(el.Content()) s := strings.TrimSpace(el.Content())
if s != "" { if s != "" {
pars = append(pars, s) pars = append(pars, s)
found = true hasIntro = true
} }
} }
if !found { if !hasIntro {
_ = w(fmt.Errorf("no intro: %s", url)) _ = w(fmt.Errorf("no intro: %s", url))
} }
@@ -234,7 +234,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
ell, err = article.Search(`//*[@data-content-type="PARAGRAPH"]`) ell, err = article.Search(`//*[@data-content-type="PARAGRAPH"]`)
p(err) p(err)
if len(ell) == 0 && !hasOther { if len(ell) == 0 && !hasOther && !hasIntro {
_ = w(fmt.Errorf("no paragraphs: %s", url)) _ = w(fmt.Errorf("no paragraphs: %s", url))
fp, err := os.Create(filename + ".err") fp, err := os.Create(filename + ".err")
@@ -252,16 +252,16 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
return false return false
} }
found = false hasPar := false
for _, el := range ell { for _, el := range ell {
s := strings.TrimSpace(el.Content()) s := strings.TrimSpace(el.Content())
if s != "" { if s != "" {
pars = append(pars, s) pars = append(pars, s)
found = true hasPar = true
} }
} }
if !found { if !hasPar {
if !hasOther { if !hasOther && !hasIntro {
_ = w(fmt.Errorf("no text, skipping: %s", url)) _ = w(fmt.Errorf("no text, skipping: %s", url))
} }
fp, err := os.Create(filename + ".skip") fp, err := os.Create(filename + ".skip")