cats en tags
This commit is contained in:
@@ -151,7 +151,9 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
func doArticle(filename string, url string, title string, tags []string, labels []string, timestamp time.Time, needUpdate bool) bool {
|
||||
// Nstag -> tag
|
||||
// Nslabeltag -> cat
|
||||
func doArticle(filename string, url string, title string, tags []string, cats []string, timestamp time.Time, needUpdate bool) bool {
|
||||
if exists(filename + ".skip") {
|
||||
return true
|
||||
}
|
||||
@@ -236,18 +238,18 @@ func doArticle(filename string, url string, title string, tags []string, labels
|
||||
}
|
||||
}
|
||||
|
||||
if len(tags) == 0 {
|
||||
if len(cats) == 0 {
|
||||
p(fmt.Fprintln(&buf, "##META text cat ="))
|
||||
} else {
|
||||
for _, tag := range tags {
|
||||
p(fmt.Fprintf(&buf, "##META text cat = %s\n", fixSpace(tag)))
|
||||
for _, cat := range cats {
|
||||
p(fmt.Fprintf(&buf, "##META text cat = %s\n", fixSpace(cat)))
|
||||
}
|
||||
}
|
||||
if len(labels) == 0 {
|
||||
p(fmt.Fprintln(&buf, "##META text label ="))
|
||||
if len(tags) == 0 {
|
||||
p(fmt.Fprintln(&buf, "##META text tag ="))
|
||||
} else {
|
||||
for _, label := range labels {
|
||||
p(fmt.Fprintf(&buf, "##META text label = %s\n", fixSpace(label)))
|
||||
for _, tag := range tags {
|
||||
p(fmt.Fprintf(&buf, "##META text tag = %s\n", fixSpace(tag)))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -56,6 +56,10 @@ Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.
|
||||
cd xml
|
||||
alto -o $corpus.data.dz *.xml 2> /dev/null
|
||||
|
||||
# telling per bericht, niet per zin
|
||||
/net/corpora/nlnieuws/namen.sh -x 9 -s $corpus.data.dz > $corpus.cat.txt
|
||||
/net/corpora/nlnieuws/namen.sh -x 10 -s $corpus.data.dz > $corpus.tag.txt
|
||||
|
||||
cd ../..
|
||||
rm -fr out
|
||||
|
||||
|
||||
Reference in New Issue
Block a user