cats en tags
This commit is contained in:
@@ -57,7 +57,7 @@ func main() {
|
||||
var item Item
|
||||
x(xml.Unmarshal(b, &item))
|
||||
for _, cat := range item.Cats {
|
||||
x(fmt.Fprintf(fp, "##META text cat = %s\n", fixSpace(cat)))
|
||||
x(fmt.Fprintf(fp, "##META text tag = %s\n", fixSpace(cat)))
|
||||
}
|
||||
x(fp.WriteString(addEnd(fixSpace(item.Title))))
|
||||
doc, err := gokogiri.ParseHtml([]byte(`<html><body>` + item.Text + `</body></html>`))
|
||||
|
||||
@@ -58,6 +58,9 @@ Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.
|
||||
cd xml
|
||||
alto -o $corpus.data.dz *.xml 2> /dev/null
|
||||
|
||||
# telling per bericht, niet per zin
|
||||
/net/corpora/nlnieuws/namen.sh -x 10 -s $corpus.data.dz > $corpus.tag.txt
|
||||
|
||||
cd ../..
|
||||
rm -fr out
|
||||
|
||||
|
||||
Reference in New Issue
Block a user