HLN: ontbrekende paragrafen
This commit is contained in:
@@ -208,17 +208,17 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
found := false
|
hasIntro := false
|
||||||
ell, err = article.Search(`//*[@data-content-type="INTRO"]`)
|
ell, err = article.Search(`//*[@data-content-type="INTRO"]`)
|
||||||
p(err)
|
p(err)
|
||||||
for _, el := range ell {
|
for _, el := range ell {
|
||||||
s := strings.TrimSpace(el.Content())
|
s := strings.TrimSpace(el.Content())
|
||||||
if s != "" {
|
if s != "" {
|
||||||
pars = append(pars, s)
|
pars = append(pars, s)
|
||||||
found = true
|
hasIntro = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !found {
|
if !hasIntro {
|
||||||
_ = w(fmt.Errorf("no intro: %s", url))
|
_ = w(fmt.Errorf("no intro: %s", url))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -234,7 +234,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
|
|
||||||
ell, err = article.Search(`//*[@data-content-type="PARAGRAPH"]`)
|
ell, err = article.Search(`//*[@data-content-type="PARAGRAPH"]`)
|
||||||
p(err)
|
p(err)
|
||||||
if len(ell) == 0 && !hasOther {
|
if len(ell) == 0 && !hasOther && !hasIntro {
|
||||||
_ = w(fmt.Errorf("no paragraphs: %s", url))
|
_ = w(fmt.Errorf("no paragraphs: %s", url))
|
||||||
|
|
||||||
fp, err := os.Create(filename + ".err")
|
fp, err := os.Create(filename + ".err")
|
||||||
@@ -252,16 +252,16 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
found = false
|
hasPar := false
|
||||||
for _, el := range ell {
|
for _, el := range ell {
|
||||||
s := strings.TrimSpace(el.Content())
|
s := strings.TrimSpace(el.Content())
|
||||||
if s != "" {
|
if s != "" {
|
||||||
pars = append(pars, s)
|
pars = append(pars, s)
|
||||||
found = true
|
hasPar = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !found {
|
if !hasPar {
|
||||||
if !hasOther {
|
if !hasOther && !hasIntro {
|
||||||
_ = w(fmt.Errorf("no text, skipping: %s", url))
|
_ = w(fmt.Errorf("no text, skipping: %s", url))
|
||||||
}
|
}
|
||||||
fp, err := os.Create(filename + ".skip")
|
fp, err := os.Create(filename + ".skip")
|
||||||
|
|||||||
Reference in New Issue
Block a user