HLN, Parool: fix relatief xpath
This commit is contained in:
@@ -185,7 +185,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
article := articles[0]
|
article := articles[0]
|
||||||
|
|
||||||
tags := make([]string, 0)
|
tags := make([]string, 0)
|
||||||
ell, err := article.Search(`//*[@data-content-type="LABEL"]`)
|
ell, err := article.Search(`.//*[@data-content-type="LABEL"]`)
|
||||||
p(err)
|
p(err)
|
||||||
for _, el := range ell {
|
for _, el := range ell {
|
||||||
s := strings.TrimSpace(el.Content())
|
s := strings.TrimSpace(el.Content())
|
||||||
@@ -196,7 +196,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
|
|
||||||
pars := make([]string, 0)
|
pars := make([]string, 0)
|
||||||
|
|
||||||
ell, err = article.Search(`//*[@data-content-type="TITLE"]`)
|
ell, err = article.Search(`.//*[@data-content-type="TITLE"]`)
|
||||||
p(err)
|
p(err)
|
||||||
if len(ell) != 1 {
|
if len(ell) != 1 {
|
||||||
_ = w(fmt.Errorf("found %d titles: %s", len(ell), url))
|
_ = w(fmt.Errorf("found %d titles: %s", len(ell), url))
|
||||||
@@ -209,7 +209,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
}
|
}
|
||||||
|
|
||||||
hasIntro := false
|
hasIntro := false
|
||||||
ell, err = article.Search(`//*[@data-content-type="INTRO"]`)
|
ell, err = article.Search(`.//*[@data-content-type="INTRO"]`)
|
||||||
p(err)
|
p(err)
|
||||||
for _, el := range ell {
|
for _, el := range ell {
|
||||||
s := strings.TrimSpace(el.Content())
|
s := strings.TrimSpace(el.Content())
|
||||||
@@ -222,17 +222,17 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
_ = w(fmt.Errorf("no intro: %s", url))
|
_ = w(fmt.Errorf("no intro: %s", url))
|
||||||
}
|
}
|
||||||
|
|
||||||
specials, err := article.Search(`//*[@data-content-type="GROUP"]`)
|
specials, err := article.Search(`.//*[@data-content-type="GROUP"]`)
|
||||||
p(err)
|
p(err)
|
||||||
for _, special := range specials {
|
for _, special := range specials {
|
||||||
special.Remove()
|
special.Remove()
|
||||||
}
|
}
|
||||||
|
|
||||||
other, err := article.Search(`//*[@data-content-type="PODCAST"]`)
|
other, err := article.Search(`.//*[@data-content-type="PODCAST"]`)
|
||||||
p(err)
|
p(err)
|
||||||
hasOther := len(other) > 0
|
hasOther := len(other) > 0
|
||||||
|
|
||||||
ell, err = article.Search(`//*[@data-content-type="PARAGRAPH"]`)
|
ell, err = article.Search(`.//*[@data-content-type="PARAGRAPH"]`)
|
||||||
p(err)
|
p(err)
|
||||||
if len(ell) == 0 && !hasOther && !hasIntro {
|
if len(ell) == 0 && !hasOther && !hasIntro {
|
||||||
_ = w(fmt.Errorf("no paragraphs: %s", url))
|
_ = w(fmt.Errorf("no paragraphs: %s", url))
|
||||||
|
|||||||
@@ -179,7 +179,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
}
|
}
|
||||||
article := articles[0]
|
article := articles[0]
|
||||||
|
|
||||||
live, err := article.Search(`//*[@data-test-id="live-blog-label"]`)
|
live, err := article.Search(`.//*[@data-test-id="live-blog-label"]`)
|
||||||
p(err)
|
p(err)
|
||||||
if len(live) > 0 {
|
if len(live) > 0 {
|
||||||
fp, err := os.Create(filename + ".skip")
|
fp, err := os.Create(filename + ".skip")
|
||||||
@@ -189,7 +189,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
headers, err := article.Search(`//header`)
|
headers, err := article.Search(`.//header`)
|
||||||
p(err)
|
p(err)
|
||||||
if len(headers) == 0 {
|
if len(headers) == 0 {
|
||||||
_ = w(fmt.Errorf("no header: %s", url))
|
_ = w(fmt.Errorf("no header: %s", url))
|
||||||
@@ -212,7 +212,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
header := headers[0]
|
header := headers[0]
|
||||||
|
|
||||||
tags := make([]string, 0)
|
tags := make([]string, 0)
|
||||||
ell, err := header.Search(`//*[@data-test-id="article-label"]`)
|
ell, err := header.Search(`.//*[@data-test-id="article-label"]`)
|
||||||
p(err)
|
p(err)
|
||||||
if len(ell) == 0 {
|
if len(ell) == 0 {
|
||||||
_ = w(fmt.Errorf("no labels: %s", url))
|
_ = w(fmt.Errorf("no labels: %s", url))
|
||||||
@@ -226,7 +226,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
|
|
||||||
pars := make([]string, 0)
|
pars := make([]string, 0)
|
||||||
|
|
||||||
ell, err = header.Search(`//*[@data-test-id="article-title"]`)
|
ell, err = header.Search(`.//*[@data-test-id="article-title"]`)
|
||||||
p(err)
|
p(err)
|
||||||
if len(ell) != 1 {
|
if len(ell) != 1 {
|
||||||
_ = w(fmt.Errorf("found %d titles: %s", len(ell), url))
|
_ = w(fmt.Errorf("found %d titles: %s", len(ell), url))
|
||||||
@@ -239,7 +239,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
}
|
}
|
||||||
|
|
||||||
found := false
|
found := false
|
||||||
ell, err = header.Search(`//*[@data-test-id="header-intro"]`)
|
ell, err = header.Search(`.//*[@data-test-id="header-intro"]`)
|
||||||
p(err)
|
p(err)
|
||||||
for _, el := range ell {
|
for _, el := range ell {
|
||||||
s := strings.TrimSpace(el.Content())
|
s := strings.TrimSpace(el.Content())
|
||||||
@@ -252,13 +252,13 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
_ = w(fmt.Errorf("no intro: %s", url))
|
_ = w(fmt.Errorf("no intro: %s", url))
|
||||||
}
|
}
|
||||||
|
|
||||||
specials, err := article.Search(`//section//aside | //section//figure | //section//b`)
|
specials, err := article.Search(`.//section//aside | .//section//figure | .//section//b`)
|
||||||
p(err)
|
p(err)
|
||||||
for _, special := range specials {
|
for _, special := range specials {
|
||||||
special.Remove()
|
special.Remove()
|
||||||
}
|
}
|
||||||
|
|
||||||
ell, err = article.Search(`//section//*[@data-article-element-index]`)
|
ell, err = article.Search(`.//section//*[@data-article-element-index]`)
|
||||||
p(err)
|
p(err)
|
||||||
if len(ell) == 0 {
|
if len(ell) == 0 {
|
||||||
_ = w(fmt.Errorf("no elements: %s", url))
|
_ = w(fmt.Errorf("no elements: %s", url))
|
||||||
|
|||||||
Reference in New Issue
Block a user