diff --git a/HLN/cmd/hln/hln.go b/HLN/cmd/hln/hln.go index 21c7a75..e96c0a0 100644 --- a/HLN/cmd/hln/hln.go +++ b/HLN/cmd/hln/hln.go @@ -208,17 +208,17 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool } } - found := false + hasIntro := false ell, err = article.Search(`//*[@data-content-type="INTRO"]`) p(err) for _, el := range ell { s := strings.TrimSpace(el.Content()) if s != "" { pars = append(pars, s) - found = true + hasIntro = true } } - if !found { + if !hasIntro { _ = w(fmt.Errorf("no intro: %s", url)) } @@ -234,7 +234,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool ell, err = article.Search(`//*[@data-content-type="PARAGRAPH"]`) p(err) - if len(ell) == 0 && !hasOther { + if len(ell) == 0 && !hasOther && !hasIntro { _ = w(fmt.Errorf("no paragraphs: %s", url)) fp, err := os.Create(filename + ".err") @@ -252,16 +252,16 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool return false } - found = false + hasPar := false for _, el := range ell { s := strings.TrimSpace(el.Content()) if s != "" { pars = append(pars, s) - found = true + hasPar = true } } - if !found { - if !hasOther { + if !hasPar { + if !hasOther && !hasIntro { _ = w(fmt.Errorf("no text, skipping: %s", url)) } fp, err := os.Create(filename + ".skip")