diff --git a/HLN/cmd/hln/hln.go b/HLN/cmd/hln/hln.go index 3f809d4..21c7a75 100644 --- a/HLN/cmd/hln/hln.go +++ b/HLN/cmd/hln/hln.go @@ -228,9 +228,13 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool special.Remove() } + other, err := article.Search(`//*[@data-content-type="PODCAST"]`) + p(err) + hasOther := len(other) > 0 + ell, err = article.Search(`//*[@data-content-type="PARAGRAPH"]`) p(err) - if len(ell) == 0 { + if len(ell) == 0 && !hasOther { _ = w(fmt.Errorf("no paragraphs: %s", url)) fp, err := os.Create(filename + ".err") @@ -257,7 +261,9 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool } } if !found { - _ = w(fmt.Errorf("no text, skipping: %s", url)) + if !hasOther { + _ = w(fmt.Errorf("no text, skipping: %s", url)) + } fp, err := os.Create(filename + ".skip") p(fp.WriteString(url + "\n")) p(err)