grote reorganisatie:
- HLN, NOS, NU, VRT: per week -> per dag - yyyy-ww -> yyyy.ww - yyyy* -> yyyy/yyyy* etc
This commit is contained in:
20
collect.sh
20
collect.sh
@@ -22,10 +22,10 @@ say () {
|
||||
|
||||
if [ "$1" = "" ]
|
||||
then
|
||||
ds=`date -d -7days +%G-%V`
|
||||
ds=`date -d -7days +%G.%V`
|
||||
else
|
||||
case "$1" in
|
||||
2[0-9][0-9][0-9]-[0-5][0-9])
|
||||
2[0-9][0-9][0-9].[0-5][0-9])
|
||||
ds=$1
|
||||
;;
|
||||
*)
|
||||
@@ -35,7 +35,11 @@ else
|
||||
esac
|
||||
fi
|
||||
|
||||
cd /net/corpora/nlnieuws/data
|
||||
year=${ds%%.*}
|
||||
|
||||
mkdir -p /net/corpora/nlnieuws/data/$year
|
||||
mkdir -p /net/corpora/nlnieuws/data/json/$year
|
||||
cd /net/corpora/nlnieuws/data/$year
|
||||
|
||||
declare -A parts
|
||||
#parts[alles]='.'
|
||||
@@ -68,7 +72,7 @@ do
|
||||
|
||||
for i in 1 4
|
||||
do
|
||||
files=$(find .. $(week2files $ds $i) | grep -E "$regex") || true
|
||||
files=$(find ../.. $(week2files $ds $i) | grep -E "$regex") || true
|
||||
if [ -z "$files" ]
|
||||
then
|
||||
continue
|
||||
@@ -161,10 +165,10 @@ do
|
||||
done
|
||||
done
|
||||
|
||||
data2json $ds 1 > json/DATA-$ds-1.json
|
||||
data2json $ds 4 > json/DATA-$ds-4.json
|
||||
dates2json > json/index1.json
|
||||
dates2json > json/index4.json
|
||||
data2json $ds 1 > ../json/$year/DATA-$ds-1.json
|
||||
data2json $ds 4 > ../json/$year/DATA-$ds-4.json
|
||||
dates2json > ../json/index1.json
|
||||
dates2json > ../json/index4.json
|
||||
|
||||
# rechten bijwerken
|
||||
chmod -R g+w /net/corpora/nlnieuws
|
||||
|
||||
Reference in New Issue
Block a user