HLN: ontbrekende paragrafen
This commit is contained in:
@@ -208,17 +208,17 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
||||
}
|
||||
}
|
||||
|
||||
found := false
|
||||
hasIntro := false
|
||||
ell, err = article.Search(`//*[@data-content-type="INTRO"]`)
|
||||
p(err)
|
||||
for _, el := range ell {
|
||||
s := strings.TrimSpace(el.Content())
|
||||
if s != "" {
|
||||
pars = append(pars, s)
|
||||
found = true
|
||||
hasIntro = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
if !hasIntro {
|
||||
_ = w(fmt.Errorf("no intro: %s", url))
|
||||
}
|
||||
|
||||
@@ -234,7 +234,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
||||
|
||||
ell, err = article.Search(`//*[@data-content-type="PARAGRAPH"]`)
|
||||
p(err)
|
||||
if len(ell) == 0 && !hasOther {
|
||||
if len(ell) == 0 && !hasOther && !hasIntro {
|
||||
_ = w(fmt.Errorf("no paragraphs: %s", url))
|
||||
|
||||
fp, err := os.Create(filename + ".err")
|
||||
@@ -252,16 +252,16 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
||||
return false
|
||||
}
|
||||
|
||||
found = false
|
||||
hasPar := false
|
||||
for _, el := range ell {
|
||||
s := strings.TrimSpace(el.Content())
|
||||
if s != "" {
|
||||
pars = append(pars, s)
|
||||
found = true
|
||||
hasPar = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
if !hasOther {
|
||||
if !hasPar {
|
||||
if !hasOther && !hasIntro {
|
||||
_ = w(fmt.Errorf("no text, skipping: %s", url))
|
||||
}
|
||||
fp, err := os.Create(filename + ".skip")
|
||||
|
||||
Reference in New Issue
Block a user