From b05ff1a4a96f98343db634345c0399fc77a2a7c1 Mon Sep 17 00:00:00 2001 From: Peter Kleiweg Date: Wed, 8 Apr 2026 20:24:52 +0200 Subject: [PATCH] xquery/nieuwe_namen.xq --- .gitignore | 1 + xquery/howto | 12 ++++++++++++ xquery/nieuwe_namen.xq | 20 ++++++++++++++++++++ 3 files changed, 33 insertions(+) create mode 100644 xquery/howto create mode 100644 xquery/nieuwe_namen.xq diff --git a/.gitignore b/.gitignore index 6754d6c..82e6053 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ bin/week2files 20?? corpus data +tmp diff --git a/xquery/howto b/xquery/howto new file mode 100644 index 0000000..6dd4a53 --- /dev/null +++ b/xquery/howto @@ -0,0 +1,12 @@ +alto *.data.dz tq:nieuwe_namen.xq | sort | uniq > items.txt + +voor elk item dit bijwerken: + + type Item struct { + count int + tags map[string]int + } + + items := make(map[string]Item) + + diff --git a/xquery/nieuwe_namen.xq b/xquery/nieuwe_namen.xq new file mode 100644 index 0000000..cb48f45 --- /dev/null +++ b/xquery/nieuwe_namen.xq @@ -0,0 +1,20 @@ +for $x in //node[((@cat="mwu" and node[@pt="spec"]) or (@pt and @*="eigen" and not(@rel="mwp"))) and not(@his="normal") and not(@his_1="decap" or @his_1="0")] + return ( + {replace(data(/alpino_ds/sentence/@sentid), "\.[^.]*$", "")} +{ +for $i in data(/alpino_ds/metadata/meta[@name="cat"]/@value) + return {$i} +} +{ +for $i in data(/alpino_ds/metadata/meta[@name="tag"]/@value) + return {$i} +} + { data($x//@word) } +, ' ' ) + +(: + +{ data(/alpino_ds/sentence/@sentid) } +{ replace(data(/alpino_ds/sentence/@sentid), "\.[^.]*$", "") } + +:)