update Parool
This commit is contained in:
@@ -182,6 +182,9 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
tags := make([]string, 0)
|
tags := make([]string, 0)
|
||||||
ell, err := article.Search(`//header//*[@data-test-id="article-label"]`)
|
ell, err := article.Search(`//header//*[@data-test-id="article-label"]`)
|
||||||
p(err)
|
p(err)
|
||||||
|
if len(ell) == 0 {
|
||||||
|
_ = w(fmt.Errorf("no labels: %s", url))
|
||||||
|
}
|
||||||
for _, el := range ell {
|
for _, el := range ell {
|
||||||
s := strings.TrimSpace(el.Content())
|
s := strings.TrimSpace(el.Content())
|
||||||
if s != "" {
|
if s != "" {
|
||||||
@@ -189,11 +192,13 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fouten := make([]string, 0)
|
|
||||||
pars := make([]string, 0)
|
pars := make([]string, 0)
|
||||||
|
|
||||||
ell, err = article.Search(`//header//*[@data-test-id="article-title"]`)
|
ell, err = article.Search(`//header//*[@data-test-id="article-title"]`)
|
||||||
p(err)
|
p(err)
|
||||||
|
if len(ell) != 1 {
|
||||||
|
_ = w(fmt.Errorf("found %d titles: %s", len(ell), url))
|
||||||
|
}
|
||||||
for _, el := range ell {
|
for _, el := range ell {
|
||||||
s := strings.TrimSpace(el.Content())
|
s := strings.TrimSpace(el.Content())
|
||||||
if s != "" {
|
if s != "" {
|
||||||
@@ -212,8 +217,7 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !found {
|
if !found {
|
||||||
fouten = append(fouten, fmt.Sprintf("no heading: %s\n", url))
|
_ = w(fmt.Errorf("no intro: %s", url))
|
||||||
_ = w(fmt.Errorf("no heading: %s", url))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
specials, err := article.Search(`//section//aside | //section//figure | //section//b`)
|
specials, err := article.Search(`//section//aside | //section//figure | //section//b`)
|
||||||
@@ -222,27 +226,14 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
special.Remove()
|
special.Remove()
|
||||||
}
|
}
|
||||||
|
|
||||||
found = false
|
|
||||||
ell, err = article.Search(`//section//*[@data-article-element-index]`)
|
ell, err = article.Search(`//section//*[@data-article-element-index]`)
|
||||||
p(err)
|
p(err)
|
||||||
for _, el := range ell {
|
if len(ell) == 0 {
|
||||||
s := strings.TrimSpace(el.Content())
|
_ = w(fmt.Errorf("no elements: %s", url))
|
||||||
if s != "" {
|
|
||||||
pars = append(pars, s)
|
|
||||||
found = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !found {
|
|
||||||
fouten = append(fouten, fmt.Sprintf("no text: %s\n", url))
|
|
||||||
_ = w(fmt.Errorf("no text: %s", url))
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(fouten) > 0 {
|
|
||||||
fp, err := os.Create(filename + ".err")
|
fp, err := os.Create(filename + ".err")
|
||||||
p(err)
|
p(err)
|
||||||
for _, fout := range fouten {
|
p(fmt.Fprintf(fp, "no elements: %s\n", url))
|
||||||
p(fp.WriteString(fout))
|
|
||||||
}
|
|
||||||
p(fp.Close())
|
p(fp.Close())
|
||||||
p(os.Chtimes(filename+".err", timestamp, timestamp))
|
p(os.Chtimes(filename+".err", timestamp, timestamp))
|
||||||
|
|
||||||
@@ -255,6 +246,30 @@ func doArticle(filename string, url string, timestamp time.Time, needUpdate bool
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
found = false
|
||||||
|
for _, el := range ell {
|
||||||
|
s := strings.TrimSpace(el.Content())
|
||||||
|
if s != "" {
|
||||||
|
pars = append(pars, s)
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
_ = w(fmt.Errorf("no text, skipping: %s", url))
|
||||||
|
fp, err := os.Create(filename + ".skip")
|
||||||
|
p(fp.WriteString(url + "\n"))
|
||||||
|
p(err)
|
||||||
|
p(os.Chtimes(filename+".skip", timestamp, timestamp))
|
||||||
|
|
||||||
|
fp, err = os.Create(filename + ".html")
|
||||||
|
p(err)
|
||||||
|
p(fp.Write(body))
|
||||||
|
p(fp.Close())
|
||||||
|
p(os.Chtimes(filename+".html", timestamp, timestamp))
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
fp, err := os.Create(filename + ".txt")
|
fp, err := os.Create(filename + ".txt")
|
||||||
p(err)
|
p(err)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user