diff --git a/.gitignore b/.gitignore index 6754d6c..82e6053 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ bin/week2files 20?? corpus data +tmp diff --git a/xquery/howto b/xquery/howto new file mode 100644 index 0000000..6dd4a53 --- /dev/null +++ b/xquery/howto @@ -0,0 +1,12 @@ +alto *.data.dz tq:nieuwe_namen.xq | sort | uniq > items.txt + +voor elk item dit bijwerken: + + type Item struct { + count int + tags map[string]int + } + + items := make(map[string]Item) + + diff --git a/xquery/nieuwe_namen.xq b/xquery/nieuwe_namen.xq new file mode 100644 index 0000000..cb48f45 --- /dev/null +++ b/xquery/nieuwe_namen.xq @@ -0,0 +1,20 @@ +for $x in //node[((@cat="mwu" and node[@pt="spec"]) or (@pt and @*="eigen" and not(@rel="mwp"))) and not(@his="normal") and not(@his_1="decap" or @his_1="0")] + return ( + {replace(data(/alpino_ds/sentence/@sentid), "\.[^.]*$", "")} +{ +for $i in data(/alpino_ds/metadata/meta[@name="cat"]/@value) + return {$i} +} +{ +for $i in data(/alpino_ds/metadata/meta[@name="tag"]/@value) + return {$i} +} + { data($x//@word) } +, ' ' ) + +(: + +{ data(/alpino_ds/sentence/@sentid) } +{ replace(data(/alpino_ds/sentence/@sentid), "\.[^.]*$", "") } + +:)