grote reorganisatie:

- HLN, NOS, NU, VRT: per week -> per dag
- yyyy-ww -> yyyy.ww
- yyyy*  -> yyyy/yyyy*
etc
This commit is contained in:
Peter Kleiweg
2026-05-27 22:42:03 +02:00
parent e430ff576b
commit 5c651387af
46 changed files with 328 additions and 227 deletions

View File

@@ -77,7 +77,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/AT5/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/AT5/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -22,7 +22,7 @@ type Item struct {
var (
x = e.ExitErr
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]-[0-5][0-9]$`)
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]\.[0-5][0-9]$`)
)
func main() {
@@ -31,16 +31,16 @@ func main() {
switch len(os.Args) {
case 1:
year, week := time.Now().AddDate(0, 0, -7).ISOWeek()
ds = fmt.Sprintf("%d-%02d", year, week)
ds = fmt.Sprintf("%d.%02d", year, week)
case 2:
if !reYearWeek.MatchString(os.Args[1]) {
x(fmt.Errorf("arg must be yyyy-ww"))
x(fmt.Errorf("arg must be yyyy.ww"))
}
ds = os.Args[1]
default:
x(fmt.Errorf("too many arguments"))
}
dp := ds[:4] + "/" + ds[5:]
dp := ds[:4] + "/w" + ds[5:]
x(os.Chdir("/net/corpora/nlnieuws/AT5/" + dp))
x(os.MkdirAll("out", 0777))

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/AT5
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/AT5/corpus/$ds
cd /net/corpora/nlnieuws/AT5/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -38,7 +43,7 @@ fi
rm -fr out
mkdir out
../../xml2txt $ds
xml2txt $ds
rm -f $corpus.lines
for i in out/*.txt
@@ -53,7 +58,7 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index

View File

@@ -95,7 +95,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/BuurtAdam/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/BuurtAdam/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/BuurtAdam
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/BuurtAdam/corpus/$ds
cd /net/corpora/nlnieuws/BuurtAdam/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -51,14 +56,14 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -94,7 +94,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/BuurtGrn/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/BuurtGrn/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/BuurtGrn
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/BuurtGrn/corpus/$ds
cd /net/corpora/nlnieuws/BuurtGrn/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -51,14 +56,14 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -95,7 +95,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/GG/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/GG/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/GG
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/GG/corpus/$ds
cd /net/corpora/nlnieuws/GG/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -51,7 +56,7 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index

View File

@@ -101,8 +101,7 @@ func main() {
t, err = time.Parse(time.RFC1123, item.PubDate)
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/HLN/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/HLN/%d/%02d/%02d", t.Year(), int(t.Month()), t.Day())
if exists(dirname + "/lock") {
continue
}

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/HLN
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -2days +%Y-%m-%d`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9]-[01][0-9]-[0-3][0-9])
ds=$1
;;
*)
@@ -23,10 +26,11 @@ else
fi
dp=${ds//-//}
year=${ds%%-*}
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/HLN/corpus/$ds
cd /net/corpora/nlnieuws/HLN/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -51,14 +55,14 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -77,7 +77,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/LitNL/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/LitNL/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -24,7 +24,7 @@ var (
w = e.WarnErr
x = e.ExitErr
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]-[0-5][0-9]$`)
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]\.[0-5][0-9]$`)
)
func main() {
@@ -33,16 +33,16 @@ func main() {
switch len(os.Args) {
case 1:
year, week := time.Now().AddDate(0, 0, -7).ISOWeek()
ds = fmt.Sprintf("%d-%02d", year, week)
ds = fmt.Sprintf("%d.%02d", year, week)
case 2:
if !reYearWeek.MatchString(os.Args[1]) {
x(fmt.Errorf("arg must be yyyy-ww"))
x(fmt.Errorf("arg must be yyyy.ww"))
}
ds = os.Args[1]
default:
x(fmt.Errorf("too many arguments"))
}
dp := ds[:4] + "/" + ds[5:]
dp := ds[:4] + "/w" + ds[5:]
x(os.Chdir("/net/corpora/nlnieuws/LitNL/" + dp))
x(os.MkdirAll("out", 0777))

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/LitNL
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/LitNL/corpus/$ds
cd /net/corpora/nlnieuws/LitNL/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -38,7 +43,7 @@ fi
rm -fr out
mkdir out
../../xml2txt $ds
xml2txt $ds
rm -f $corpus.lines
for i in out/*.txt
@@ -53,14 +58,14 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -28,7 +28,7 @@ type Item struct {
var (
x = e.ExitErr
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]-[0-5][0-9]$`)
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]-[01][0-9]-[0-3][0-9]$`)
)
func main() {
@@ -36,17 +36,17 @@ func main() {
var ds string
switch len(os.Args) {
case 1:
year, week := time.Now().AddDate(0, 0, -7).ISOWeek()
ds = fmt.Sprintf("%d-%02d", year, week)
t := time.Now().AddDate(0, 0, -2)
ds = fmt.Sprintf("%d-%02d-%02d", t.Year(), int(t.Month()), t.Day())
case 2:
if !reYearWeek.MatchString(os.Args[1]) {
x(fmt.Errorf("arg must be yyyy-ww"))
x(fmt.Errorf("arg must be yyyy-mm-dd"))
}
ds = os.Args[1]
default:
x(fmt.Errorf("too many arguments"))
}
dp := ds[:4] + "/" + ds[5:]
dp := strings.ReplaceAll(ds, "-", "/")
x(os.Chdir("/net/corpora/nlnieuws/NOS/" + dp))
x(os.MkdirAll("out", 0777))

View File

@@ -94,8 +94,7 @@ func main() {
}
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/NOS/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/NOS/%d/%02d/%02d", t.Year(), int(t.Month()), t.Day())
if exists(dirname + "/lock") {
continue
}

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/NOS
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -2days +%Y-%m-%d`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9]-[01][0-9]-[0-3][0-9])
ds=$1
;;
*)
@@ -23,10 +26,11 @@ else
fi
dp=${ds//-//}
year=${ds%%-*}
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/NOS/corpus/$ds
cd /net/corpora/nlnieuws/NOS/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -38,7 +42,7 @@ fi
rm -fr out
mkdir out
../../json2txt $ds
json2txt $ds
rm -f $corpus.lines
for i in out/*.txt
@@ -53,15 +57,15 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x C -s $corpus.data.dz > $corpus.cat.txt
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x C -s $corpus.data.dz > $corpus.cat.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -98,8 +98,7 @@ func main() {
t, err = time.Parse(time.RFC1123, item.PubDate)
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/NU/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/NU/%d/%02d/%02d", t.Year(), int(t.Month()), t.Day())
if exists(dirname + "/lock") {
continue
}

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/NU
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -2days +%Y-%m-%d`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9]-[01][0-9]-[0-3][0-9])
ds=$1
;;
*)
@@ -23,10 +26,11 @@ else
fi
dp=${ds//-//}
year=${ds%%-*}
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/NU/corpus/$ds
cd /net/corpora/nlnieuws/NU/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -51,14 +55,14 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -2,8 +2,11 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/NieuwsNL
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
@@ -25,10 +28,11 @@ else
fi
dp=${ds//-//}
year=${ds%%-*}
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/NieuwsNL/corpus/$ds
cd /net/corpora/nlnieuws/NieuwsNL/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -53,14 +57,14 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -77,7 +77,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/Oog/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/Oog/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -23,7 +23,7 @@ type Item struct {
var (
x = e.ExitErr
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]-[0-5][0-9]$`)
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]\.[0-5][0-9]$`)
)
func main() {
@@ -32,16 +32,16 @@ func main() {
switch len(os.Args) {
case 1:
year, week := time.Now().AddDate(0, 0, -7).ISOWeek()
ds = fmt.Sprintf("%d-%02d", year, week)
ds = fmt.Sprintf("%d.%02d", year, week)
case 2:
if !reYearWeek.MatchString(os.Args[1]) {
x(fmt.Errorf("arg must be yyyy-ww"))
x(fmt.Errorf("arg must be yyyy.ww"))
}
ds = os.Args[1]
default:
x(fmt.Errorf("too many arguments"))
}
dp := ds[:4] + "/" + ds[5:]
dp := ds[:4] + "/w" + ds[5:]
x(os.Chdir("/net/corpora/nlnieuws/Oog/" + dp))
x(os.MkdirAll("out", 0777))

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/Oog
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/Oog/corpus/$ds
cd /net/corpora/nlnieuws/Oog/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -38,7 +43,7 @@ fi
rm -fr out
mkdir out
../../xml2txt $ds
xml2txt $ds
rm -f $corpus.lines
for i in out/*.txt
@@ -53,14 +58,14 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -102,7 +102,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/Parool/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/Parool/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/Parool
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/Parool/corpus/$ds
cd /net/corpora/nlnieuws/Parool/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -51,14 +56,14 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -77,7 +77,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/RO/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/RO/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -26,7 +26,7 @@ var (
x = e.ExitErr
w = e.WarnErr
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]-[0-5][0-9]$`)
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]\.[0-5][0-9]$`)
)
func main() {
@@ -38,16 +38,16 @@ func main() {
switch len(os.Args) {
case 1:
year, week := time.Now().AddDate(0, 0, -7).ISOWeek()
ds = fmt.Sprintf("%d-%02d", year, week)
ds = fmt.Sprintf("%d.%02d", year, week)
case 2:
if !reYearWeek.MatchString(os.Args[1]) {
x(fmt.Errorf("arg must be yyyy-ww"))
x(fmt.Errorf("arg must be yyyy.ww"))
}
ds = os.Args[1]
default:
x(fmt.Errorf("too many arguments"))
}
dp := ds[:4] + "/" + ds[5:]
dp := ds[:4] + "/w" + ds[5:]
x(os.Chdir("/net/corpora/nlnieuws/RO/" + dp))
x(os.MkdirAll("out", 0777))

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/RO
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/RO/corpus/$ds
cd /net/corpora/nlnieuws/RO/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -38,7 +43,7 @@ fi
rm -fr out
mkdir out
../../xml2txt $ds
xml2txt $ds
rm -f $corpus.lines
for i in out/*.txt
@@ -53,14 +58,14 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -101,7 +101,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/RTVNoord/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/RTVNoord/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/RTVNoord
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/RTVNoord/corpus/$ds
cd /net/corpora/nlnieuws/RTVNoord/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -51,15 +56,15 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x C -s $corpus.data.dz > $corpus.cat.txt
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x C -s $corpus.data.dz > $corpus.cat.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -77,7 +77,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/Sargasso/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/Sargasso/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -23,7 +23,7 @@ type Item struct {
var (
x = e.ExitErr
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]-[0-5][0-9]$`)
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]\.[0-5][0-9]$`)
)
func main() {
@@ -32,16 +32,16 @@ func main() {
switch len(os.Args) {
case 1:
year, week := time.Now().AddDate(0, 0, -7).ISOWeek()
ds = fmt.Sprintf("%d-%02d", year, week)
ds = fmt.Sprintf("%d.%02d", year, week)
case 2:
if !reYearWeek.MatchString(os.Args[1]) {
x(fmt.Errorf("arg must be yyyy-ww"))
x(fmt.Errorf("arg must be yyyy.ww"))
}
ds = os.Args[1]
default:
x(fmt.Errorf("too many arguments"))
}
dp := ds[:4] + "/" + ds[5:]
dp := ds[:4] + "/w" + ds[5:]
x(os.Chdir("/net/corpora/nlnieuws/Sargasso/" + dp))
x(os.MkdirAll("out", 0777))

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/Sargasso
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/Sargasso/corpus/$ds
cd /net/corpora/nlnieuws/Sargasso/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -38,7 +43,7 @@ fi
rm -fr out
mkdir out
../../xml2txt $ds
xml2txt $ds
rm -f $corpus.lines
for i in out/*.txt
@@ -53,14 +58,14 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -90,7 +90,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/Sikkom/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/Sikkom/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/Sikkom
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/Sikkom/corpus/$ds
cd /net/corpora/nlnieuws/Sikkom/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -51,7 +56,7 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index

View File

@@ -77,7 +77,7 @@ func main() {
}
p(err)
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/Tzum/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/Tzum/%d/w%02d", year, week)
if exists(dirname + "/lock") {
continue
}

View File

@@ -23,7 +23,7 @@ type Item struct {
var (
x = e.ExitErr
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]-[0-5][0-9]$`)
reYearWeek = regexp.MustCompile(`^2[0-9][0-9][0-9]\.[0-5][0-9]$`)
)
func main() {
@@ -32,16 +32,16 @@ func main() {
switch len(os.Args) {
case 1:
year, week := time.Now().AddDate(0, 0, -7).ISOWeek()
ds = fmt.Sprintf("%d-%02d", year, week)
ds = fmt.Sprintf("%d.%02d", year, week)
case 2:
if !reYearWeek.MatchString(os.Args[1]) {
x(fmt.Errorf("arg must be yyyy-ww"))
x(fmt.Errorf("arg must be yyyy.ww"))
}
ds = os.Args[1]
default:
x(fmt.Errorf("too many arguments"))
}
dp := ds[:4] + "/" + ds[5:]
dp := ds[:4] + "/w" + ds[5:]
x(os.Chdir("/net/corpora/nlnieuws/Tzum/" + dp))
x(os.MkdirAll("out", 0777))

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/Tzum
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -22,11 +25,13 @@ else
esac
fi
dp=${ds//-//}
year=${ds%.*}
week=${ds#*.}
dp=$year/w$week
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/Tzum/corpus/$ds
cd /net/corpora/nlnieuws/Tzum/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -38,7 +43,7 @@ fi
rm -fr out
mkdir out
../../xml2txt $ds
xml2txt $ds
rm -f $corpus.lines
for i in out/*.txt
@@ -53,14 +58,14 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -110,8 +110,7 @@ func main() {
if t2.After(t) {
t = t2
}
year, week := t.ISOWeek()
dirname := fmt.Sprintf("/net/corpora/nlnieuws/VRT/%d/%02d", year, week)
dirname := fmt.Sprintf("/net/corpora/nlnieuws/VRT/%d/%02d/%02d", t.Year(), int(t.Month()), t.Day())
if exists(dirname + "/lock") {
continue
}

View File

@@ -2,17 +2,20 @@
set -e
BASE=/net/corpora/nlnieuws
PART=$BASE/VRT
unset CDPATH
PATH=/net/corpora/nlnieuws/bin:/net/aps/bin:$PATH
PATH=$PART:$BASE/bin:$BASE:/net/aps/bin:$PATH
export TZ=Europe/Amsterdam
. /net/aps/etc/alpino-activate.sh > /dev/null
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -2days +%Y-%m-%d`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9]-[01][0-9]-[0-3][0-9])
ds=$1
;;
*)
@@ -23,10 +26,11 @@ else
fi
dp=${ds//-//}
year=${ds%%-*}
corpus=$PART/corpus/$year/$ds
mkdir -p $PART/corpus/$year
corpus=/net/corpora/nlnieuws/VRT/corpus/$ds
cd /net/corpora/nlnieuws/VRT/$dp
cd $PART/$dp
ln -s lock.$$ lock
if [ "`readlink lock`" != lock.$$ ]
@@ -51,15 +55,15 @@ cd out
mkdir xml
Alpino -flag treebank xml debug=1 end_hook=xml user_max=900000 -parse < $corpus.lines 2> $corpus.log
../../../metadata
metadata
cd xml
rm -f $corpus.data.dz $corpus.index
alto -q -o $corpus.data.dz *.xml
# telling per bericht, niet per zin
/net/corpora/nlnieuws/namen.sh -x C -s $corpus.data.dz > $corpus.cat.txt
/net/corpora/nlnieuws/namen.sh -x T -s $corpus.data.dz > $corpus.tag.txt
query.sh -x C -s $corpus.data.dz > $corpus.cat.txt
query.sh -x T -s $corpus.data.dz > $corpus.tag.txt
cd ../..
rm -fr out

View File

@@ -70,9 +70,9 @@ var (
func main() {
aa := strings.Split(os.Args[1], "-")
aa := strings.Split(os.Args[1], ".")
if len(aa) != 2 {
x(fmt.Errorf("ongeldig argument, moet in formaat yyyy-dd zijn"))
x(fmt.Errorf("ongeldig argument, moet in formaat yyyy.dd zijn"))
}
var err error
@@ -128,7 +128,8 @@ func makeParts(source string) *Parts {
func makeValues(source, part string) [][5]any {
v := make([][5]any, 0)
filename := fmt.Sprintf("/net/corpora/nlnieuws/data/%s-%s-%d-%02d-%d%s",
filename := fmt.Sprintf("/net/corpora/nlnieuws/data/%d/%s-%s-%d-%02d-%d%s",
year,
sources[source],
parts[part].file,
year,
@@ -226,7 +227,7 @@ func dates() (start, first, last string, names []string) {
t3 := tStart
for range size {
y, w := t3.ISOWeek()
names = append(names, fmt.Sprintf("%d/%02d", y, w))
names = append(names, fmt.Sprintf("%d/w%02d", y, w))
t3 = t3.AddDate(0, 0, 7)
}
t3 = tStart

View File

@@ -26,13 +26,24 @@ var (
func main() {
files, err := os.ReadDir("/net/corpora/nlnieuws/data/json")
dirs, err := os.ReadDir("/net/corpora/nlnieuws/data/json")
x(err)
for _, dir := range dirs {
if !dir.IsDir() {
continue
}
dirname := dir.Name()
if dirname[0] != '2' {
continue
}
files, err := os.ReadDir("/net/corpora/nlnieuws/data/json/" + dirname)
x(err)
for _, file := range files {
filename := file.Name()
if strings.HasPrefix(filename, "DATA-") && strings.HasSuffix(filename, "-4.json") {
addWeek(filename[5:12])
for _, file := range files {
filename := file.Name()
if strings.HasPrefix(filename, "DATA-") && strings.HasSuffix(filename, "-4.json") {
addWeek(filename[5:12])
}
}
}

View File

@@ -15,9 +15,9 @@ var (
)
func main() {
aa := strings.Split(os.Args[1], "-")
aa := strings.Split(os.Args[1], ".")
if len(aa) != 2 {
x(fmt.Errorf("ongeldig argument, moet in formaat yyyy-dd zijn"))
x(fmt.Errorf("ongeldig argument, moet in formaat yyyy.dd zijn"))
}
year, err := strconv.Atoi(aa[0])
@@ -58,7 +58,7 @@ func main() {
fmt.Print(" -or")
}
y, w := t2.ISOWeek()
fmt.Printf(" -name %d-%02d.data.dz", y, w)
fmt.Printf(" -name %d.%02d.data.dz", y, w)
t2 = t2.AddDate(0, 0, 7)
}

View File

@@ -22,10 +22,10 @@ say () {
if [ "$1" = "" ]
then
ds=`date -d -7days +%G-%V`
ds=`date -d -7days +%G.%V`
else
case "$1" in
2[0-9][0-9][0-9]-[0-5][0-9])
2[0-9][0-9][0-9].[0-5][0-9])
ds=$1
;;
*)
@@ -35,7 +35,11 @@ else
esac
fi
cd /net/corpora/nlnieuws/data
year=${ds%%.*}
mkdir -p /net/corpora/nlnieuws/data/$year
mkdir -p /net/corpora/nlnieuws/data/json/$year
cd /net/corpora/nlnieuws/data/$year
declare -A parts
#parts[alles]='.'
@@ -68,7 +72,7 @@ do
for i in 1 4
do
files=$(find .. $(week2files $ds $i) | grep -E "$regex") || true
files=$(find ../.. $(week2files $ds $i) | grep -E "$regex") || true
if [ -z "$files" ]
then
continue
@@ -161,10 +165,10 @@ do
done
done
data2json $ds 1 > json/DATA-$ds-1.json
data2json $ds 4 > json/DATA-$ds-4.json
dates2json > json/index1.json
dates2json > json/index4.json
data2json $ds 1 > ../json/$year/DATA-$ds-1.json
data2json $ds 4 > ../json/$year/DATA-$ds-4.json
dates2json > ../json/index1.json
dates2json > ../json/index4.json
# rechten bijwerken
chmod -R g+w /net/corpora/nlnieuws

View File

@@ -17,8 +17,11 @@ var (
func AddEnd(s string) string {
s = strings.TrimSpace(s)
if s == "" || reEOL.MatchString(s) {
return s
if s == "" {
return ""
}
if reEOL.MatchString(s) {
return s + "\n"
}
return s + ".\n"
}

View File

@@ -133,7 +133,9 @@ function makeTD(title, values) {
async function loadSource(source, week) {
if (!data.has(week)) {
data[week] = await getJSON('DATA-' + week + '-4.json')
data[week] = await getJSON(
week.substring(0, 4) + '/DATA-' + week + '-4.json',
)
}
idSubtitle.innerHTML = source + ' — t/m ' + data[week].last
@@ -154,7 +156,9 @@ async function loadSource(source, week) {
async function loadPart(part, week) {
if (!data.has(week)) {
data[week] = await getJSON('DATA-' + week + '-4.json')
data[week] = await getJSON(
week.substring(0, 4) + '/DATA-' + week + '-4.json',
)
}
idSubtitle.innerHTML = part + ' — t/m ' + data[week].last
@@ -185,7 +189,9 @@ async function loadWeken(source, part) {
if (i < dates.length) {
var week = dates[i].week
if (!data.has(week)) {
data[week] = await getJSON('DATA-' + week + '-4.json')
data[week] = await getJSON(
week.substring(0, 4) + '/DATA-' + week + '-4.json',
)
}
var values = data[week][source][part]
tr.appendChild(makeTD('t/m ' + data[week].last, values))