Commit validated bk, 118922 Feb 7

2023-02-07 16:49:16 +01:00
parent 2271c24b19
commit 32ca21ab60
21 changed files with 1946 additions and 77 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1 @@
-.Rproj.user
-.Rhistory
-.RData
-.Ruserdata
-*.Rproj
-.DS_Store
-bk_2019.rds
+copyFromNetwork.sh
--- a/anonymize.R
+++ b/anonymize.R
@@ -0,0 +1,56 @@
+require(tidyverse, quietly = T)
+require(lubridate, quietly = T)
+require(readr)
+require(uuid, quietly = T)
+
+pinWidth = 7
+
+intTopin = function(x) {
+  return(formatC(x, width = 7, format = "d", flag = "0"))
+}
+
+# tPinMapping = tibble(origpin = intTopin(0), 
+#                     uupin = '78c3fe82-9343-4aa8-8b06-afc6da79a39a')
+source('config.R', echo = F)
+tPinMapping = read_rds(pinMappingDictFilename)
+
+getUupin = function(pin) {
+  if (length(pin) == 1) {
+    if (class(pin) == 'character') {
+      if (pin %in% tPinMapping$origpin) {
+        return((tPinMapping %>% filter(origpin == pin) %>% pull(uupin)))
+      } else {
+        if (str_length(pin) == pinWidth) {
+          newUupin = UUIDgenerate()
+          tPinMapping <<- tPinMapping %>% 
+            add_row(origpin = pin, uupin = newUupin)
+          tPinMapping %>% write_rds('data/pinMapping.rds', compress = 'gz')
+          return(newUupin)
+        } else  {
+          stop(paste0('pin width <> ', pinWidth))
+        }
+      }
+    } else {
+      stop(paste0('class of pin should be character, but is ', class(pin)))
+    }
+  } else {
+    stop('Please supply exactly 1 pin')
+  }
+}
+
+getOrigpin = function(uuId) {
+  tPinMapping %>% filter(uupin == uuId) %>% pull(origpin)
+}
+
+anonymizeRds = function(x, ymdStartDate, ymdEndDate, suffix) {
+  y = x %>%
+    rowwise() %>%
+    mutate(patientid = getUupin(patientid)) %>%
+    ungroup() %>%
+    mutate(birth_date = ymd('1970-01-01'))
+
+  filename = paste0('data/bk_', ymdStartDate, '_', ymdEndDate, '_', suffix, '.rds')
+  if (!file.exists(filename)) {
+    write_rds(y, file = filename)
+  }
+}
--- a/config.R
+++ b/config.R
@@ -0,0 +1,6 @@
+
+pinMappingDictFilename = '/share/mmbi/radar2/data/pinMapping.rds'
+
+# anonymized data for deployment
+radarDataFilename = 'data/radarData.rds' # '/share/mmbi/radar2/interfaces/4D_final_results/data/radarData.rds'
+
--- a/copyGlimsData.R
+++ b/copyGlimsData.R
@@ -0,0 +1,53 @@
+require(lubridate)
+require(tidyverse)
+
+fromBasePath = '/share/transport_4d'
+toBasePath = '/share/mmbi/radar2/data/archive'
+
+currDay = today()
+
+getTraverseDirectory = function(aDate) {
+  dirPattern = format(aDate, format = '%Y-%m')
+  dirsGlimsExport = list.dirs(path = fromBasePath, full.names = F)
+  
+  return(dirsGlimsExport[which(str_detect(dirsGlimsExport, dirPattern))])
+  
+}
+
+getFilesToCopy = function(aDate, aDir) {
+  filesGlimsExport = list.files(aDir)
+  
+  filePattern = format(aDate, format = '%y%m%d')
+  
+  return(filesGlimsExport[which(str_detect(filesGlimsExport, filePattern))])
+}
+
+copyFiles = function(aDate, overwrite = F, verbose = T) {
+  traverseDir = getTraverseDirectory(aDate)
+  fromDir = file.path(fromBasePath, traverseDir)
+  toDir   = file.path(toBasePath, traverseDir)
+  filesToCopy = getFilesToCopy(aDate, fromDir)
+  if (verbose)
+    cat(paste0('\nCopying from ', fromDir, ' => ', toDir, ' ..\n'))
+  for (f in filesToCopy) {
+    copySuccess = file.copy(from = file.path(fromDir, f), 
+                            to = file.path(toDir, f),
+                            overwrite = overwrite,
+                            copy.mode = T, 
+                            copy.date = T)
+    cat(paste0(f, '\11', copySuccess, '\n'))
+  }
+}
+
+copyFilesDateRange = function(nDays) {
+  ds = today() - nDays
+  d8s = ds + 1:nDays
+  for (d in 1:length(d8s)) {
+    d8 = d8s[d]
+    cat(paste0('copyFiles(', d8, ')\n'))
+    copyFiles(d8, verbose = F)
+  }
+}
+
+
+copyFilesDateRange(nDays = 3)
--- a/createPatches.R
+++ b/createPatches.R
@@ -0,0 +1,22 @@
+require(tidyverse)
+require(lubridate)
+
+source('globalData.R')
+source('interfaces.R') # readGlimsArchive()
+
+
+createPatches = function(tPeriods, dataPath, tFiles, tFileOvw) {
+  for (m in 1:nrow(tPeriods)) {
+    patch = readGlimsArchive(
+      ymdStartDate = tPeriods$startDate[m],
+      ymdEndDate = tPeriods$endDate[m],
+      readPreprocessed = F,
+      tFiles = tFiles,
+      tFileOvw = tFileOvw
+    )
+    filename = paste0(dataPath,'/bk_',
+                      tPeriods$startDate[m], '_', 
+                      tPeriods$endDate[m], '.rds')
+    write_rds(patch, filename, compress = 'gz')
+  }
+}
--- a/executeRead.R
+++ b/executeRead.R
@@ -0,0 +1,68 @@
+source('interfaces.R')
+source('createPatches.R')
+source('stitchArchive.R')
+
+dataPath = 'data'
+inspectRead = 10
+inspectReason = 'test selection of max IsolNr (only). mo and RIS can be NA'
+
+reads = read_rds(file.path(dataPath, 'reads.rds'))
+reads = reads %>% add_row(read = inspectRead, desription = inspectReason)
+write_rds(reads, file.path(dataPath, 'reads.rds'))
+
+tFiles = read_rds(file.path(dataPath, 'tFilesInfo.rds'))
+tFileOvw = read_rds(file.path(dataPath, 'tFileOvw.rds'))
+
+updateFileOvw = F
+if (updateFileOvw) {
+  # on 2022-10-20 tFiles was up-to-date, due to the daily update.
+  tFiles = read_rds(file.path(dataPath, 'tFilesInfo.rds'))
+  # However, tFileOvw somehow wasn't
+  
+  # get the last date that is present in the data sofar
+  maxDate = tFileOvw %>% pull(fileDate) %>% max(na.rm = T)
+  # catch up with today
+  nDaysToUpdate = difftime(today(), maxDate) %>% as.numeric()
+  daysToUpdate = (maxDate + 1:nDaysToUpdate)
+  
+  tInfoDays = tFiles %>% filter(fileDate %in% daysToUpdate)
+  tFileOvwToday = deriveFileOverview(tInfoDays)
+  a = assert_that(nrow(tFileOvwToday) > 0, 
+                  msg = 'derivederiveFileOverview() failed')
+  
+  tFileOvw = tFileOvw %>% ungroup() %>% 
+    add_row(tFileOvwToday) %>% 
+    filter(!is.na(fileDate))
+  
+  # write_rds(tFiles, file.path(dataPath, 'tFilesInfo.rds')) Was up-to-date on 2022-10-20
+  write_rds(tFileOvw, file.path(dataPath, 'tFileOvw.rds'))
+}
+
+# define the patch periods
+tPeriods = tribble(
+  ~startDate,        ~endDate,
+  ymd('2022-01-01'), ymd('2022-02-01') - 1,
+  ymd('2022-02-01'), ymd('2022-03-01') - 1
+  # ymd('2022-03-01'), ymd('2022-04-01') - 1,
+  # ymd('2022-04-01'), ymd('2022-05-01') - 1,
+  # ymd('2022-05-01'), ymd('2022-06-01') - 1,
+  # ymd('2022-06-01'), ymd('2022-07-01') - 1,
+  # ymd('2022-07-01'), ymd('2022-08-01') - 1,
+  # ymd('2022-08-01'), ymd('2022-09-01') - 1,
+  # ymd('2022-09-01'), ymd('2022-10-01') - 1,
+  # ymd('2022-10-01'), ymd('2022-10-20') - 1
+  # ymd('2022-09-01'), ymd('2022-10-20')
+)
+
+createPatches(tPeriods, dataPath, tFiles, tFileOvw)
+# stitch together patches created by createPatches.R
+# readAllPatches() assumes that all bk_*.rds files are in the 'data' path and 
+# that they all need to be read for stitching
+tPatches = readAllPatches()# %>% filter(from >= '2022-01-01', to <= (ymd('2022-05-01') - 1))
+minFrom = min(tPatches$from, na.rm = T)
+maxTo   = max(tPatches$to, na.rm = T)
+cat(paste0('Stitching patches: ', minFrom, ' - ', maxTo, '\n'))
+tStitched = stitchPatches(tPatches, dataPath = 'data', reqColumns = abAbbrevs)
+tStitched %>% write_rds(file = paste0('data/read_', inspectRead, '_', minFrom, '_', 
+                                      maxTo, '_x', today(), '.rds'),
+                        compress = 'gz')
--- a/global.R
+++ b/global.R
@@ -18,7 +18,7 @@ library(cleaner)
 library(scales)
 library(ggiraph)

-specialties <- read_csv("specialties.csv", lazy = FALSE)
+source('interfaces.R')

 trsns <- read_tsv("translations.txt", lazy = FALSE)
 # to do: make system settings dependent
@@ -30,6 +30,25 @@ trnslt <- function(english, new_lang = "nl", file = trsns) {
  }
 }

+pathogens <- read_csv("data/grouping microorganisms.csv")
+
+# created by calling createAbMapping()
+tAntibiotics = readxl::read_xlsx('data/ab_mapping.xlsx') %>% 
+  select(Mnemonic, Naam, `EARS-Net.Mnemonic`, `EARS-Net.Name`)
+
+# radar_data <- readGlimsArchive(ymdStartDate = ymdStartDate,
+#                                ymdEndDate = ymdEndDate, 
+#                                readPreprocessed = T)
+radar_data <- readGlimsFinalResults()
+
+if (!'CRO' %in% colnames(radar_data))
+  radar_data <- radar_data %>% add_column('CRO' = '')
+
+if (!'OXA' %in% colnames(radar_data))
+  radar_data <- radar_data %>% add_column('OXA' = '')
+
+specialties <- read_csv("data/specialties.csv", lazy = FALSE)
+
 specialties <- specialties %>%
  mutate(specialty_shiny = if_else(specialty != "Outpatients", paste0(specialty, " (", str_replace_all(specialism, ";", ", "), ")"), "Outpatients"),
         department = if_else(is.na(department), "NO DEPARTMENT GIVEN", department))
@@ -38,9 +57,6 @@ specialties <- separate_rows(specialties, specialism, sep = ";")

 specialties <- separate_rows(specialties, department, sep = ";")

-pathogens <- read_csv("grouping microorganisms.csv")
-
-radar_data <- readRDS("bk_2019.rds")

 radar_data <- radar_data %>% 
  left_join(specialties) %>% 
@@ -50,10 +66,10 @@ radar_data <- radar_data %>%
         FLC = OXA) %>% 
  left_join(pathogens) 

-# lab_ab <- c("CFCC", "CFCL", "CFOC", "CFSC", "CFTC", "COCL", "GEHI", "QUDA")
+lab_ab <- c("CFCC", "CFCL", "CFOC", "CFSC", "CFTC", "COCL", "GEHI", "QUDA")

 radar_data <- radar_data %>%
-  # select_at(vars(!contains(lab_ab))) %>% 
+  select_at(vars(!contains(lab_ab))) %>%
  rename_if(is.rsi, function(x) ifelse(is.na(suppressWarnings(as.ab(x))),
                                       x,
                                       suppressWarnings(as.ab(x)))) %>% 
--- a/globalData.R
+++ b/globalData.R
@@ -0,0 +1,12 @@
+abAbbrevs = c('AMB', 'AMC', 'AMK', 'AMP', 'AMX', 'ANI', 'ATM', 'AZA', 'CAS', 'CAZ', 'CCV', 
+              'CHL', 'CIP', 'CIX', 'CLF', 'CLI', 'COL', 'CPC', 'CRO', 'CTC', 'CTX', 'CXM', 
+              'CZA', 'CZO', 'CZT', 'DAP', 'DOX', 'ERY', 'FCT', 'FEP', 'FLC', 'FLU', 
+              'FOS', 'FOX', 'FOX1', 'FUS', 'GEN', 'GRI', 'IPM', 'ITR', 'KAN', 'LNZ', 
+              'LVX', 'MCZ', 'MEM', 'MFX', 'MIF', 'MNO', 'MTR', 'MUP', 'NAL', 'NIT', 
+              'NOR', 'PEN', 'PIP', 'PLB', 'POS', 'QDA', 'RIF', 'SAM', 'STR1', 
+              'SXT', 'TEC', 'TEM', 'TGC', 'TMP', 'TOB', 'TRB', 'TZP', 'VAN', 'VOR')
+
+patchFirstColumns = c('sampleid', 'date', 'is_clinical', 'is_icu', 'is_outward', 
+                      'IsolNr', 'Materiaal', 'specimen_type', 'specimen_group', 
+                      'patientid', 'birth_date', 'department', 'type_dept', 
+                      'specialism', 'age', 'gender', 'MIC', 'RAP', 'mo')
--- a/interface_4D.R
+++ b/interface_4D.R
@@ -0,0 +1,206 @@
+# initially only interfacing 4D data
+
+require(assertthat, quietly = T)
+
+source('4D_archive.R')
+source('anonymize.R')
+source('globalData.R')
+
+
+readGlimsArchive = function(ymdStartDate, ymdEndDate, 
+                            readPreprocessed = T, 
+                            tFiles = NULL, tFileOvw = NULL) {
+  if (readPreprocessed) {
+    # return(read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds'))
+    # return(read_rds('data/total_2022-09-30.rds'))
+    return(read_rds('data/bk_2019.rds'))
+    # return(read_rds('data/bk_2022.09.01_2022.10.13.rds'))
+    # return(read_rds('data/archive_2022-01-01_2022-10-19_v2022-10-20_1530.rds')) # read_2_2022-10-20_1530
+    # return(read_rds('data/read_10_2022-01-01_2022-10-25_x2022-10-25.rds'))
+  } else {
+    assert_that(!is.null(tFiles), 
+                msg = 'readGlimsArchive(.., readPreprocessed = F, ..)')
+    tArchive = readArchive(
+      tFileInfo = tFiles %>% 
+        filter(
+          fileDate >= ymdStartDate,
+          fileDate <= ymdEndDate
+        ),
+      tFileOvw = tFileOvw,
+      createDebugList = F # save memory 
+    )
+    
+    bk = NULL
+    if (!is.null(tArchive)) {
+      radarData <- tArchive %>%
+        # work towards RadaR nomenclature
+        rename(sampleid = Monsternummer,
+               patientid = PIN.Lot,
+               department = ligafdeling,
+               specialism = B,
+               mo = MO) %>% 
+        # some processing towards RadaR format
+        mutate(
+          gender = Geslacht,
+          # case_when(Geslacht == 'M' ~ 'Male',
+          #                    Geslacht == 'V' ~ 'Female') %>% as.factor(),
+          type_dept = specialism,
+          birth_date = dmy(Geboortedatum),
+          age = trunc((birth_date %--% date) / years(1)),
+          Materiaal = replace_na(Materiaal, ''),
+          Materiaal = str_trim(Materiaal),
+          # Rapport = replace_na(Rapport, ''),
+          # Rapport = str_trim(Rapport),
+          specimen_group = 'bk',
+          specimen_type = '',
+          is_icu = is_icu(department),
+          is_clinical = is_clinical(department),
+          is_outward = is_outward(department)
+        ) %>%
+        # get AMR package (WHONET, EARS-Net std) mnemonics and names
+        left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>%
+        rename(ab.amr = `EARS-Net.Mnemonic`) %>%
+        # get the fields used in RadaR only
+        select(
+          sampleid, date, IsolNr, Materiaal,
+          specimen_group, specimen_type, 
+          department, type_dept, specialism,
+          is_icu, is_clinical, is_outward,
+          birth_date, age, gender, patientid, mo, RIS, ab.amr, MIC, RAP
+        ) 
+      
+      # return(radarData)
+      
+      # these sub steps are for debugging purposes
+      z <- radarData %>% 
+        mutate(IsolNr = as.numeric(IsolNr)) %>% 
+        arrange(sampleid, department) %>%
+        group_by(sampleid) %>% 
+        # fill repeating data
+        fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
+        filter(#!is.na(mo), 
+               #!is.na(RIS), 
+               IsolNr == max(IsolNr, na.rm = T)) %>% 
+        ungroup() %>%
+        distinct(sampleid, date, IsolNr, mo, ab.amr, .keep_all = T)
+        # # get distinctive rows, arrange not strictly necessary
+        # # arrange(sampleid, date, department, type_dept, specialism, 
+        # #         birth_date, age, gender, patientid, mo, ab.amr, RIS) %>% 
+        # # commented 2022-10-14 by GB in order to get the slicing done correctly
+        # # group_by(date, sampleid, IsolNr, department, type_dept, specialism, 
+        # #           birth_date, age, gender, patientid, mo, ab.amr) %>%
+        # group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
+        # slice(n()) %>% 
+        # ungroup() %>%
+        # distinct(date, sampleid, IsolNr, department, type_dept, specialism, 
+        #          birth_date, age, gender, patientid, mo, ab.amr, .keep_all = T)  %>%
+        # # filter only reported rows, with a micro-organism and RIS value
+        # filter(
+        #   !is.na(RAP)
+        #   # !is.na(mo),
+        #   # !is.na(RIS)
+        # )
+        
+      return(z)
+      
+      # pivot to wider format
+      bk <- z %>% 
+        filter(!is.na(RIS)) %>%
+        mutate(
+          RIS = as.rsi(RIS),
+          MIC = as.mic(MIC),
+          mo = as.mo(mo)) %>%
+        pivot_wider(names_from = ab.amr, values_from = RIS) %>%
+        select(order(colnames(.))) %>%
+        relocate(all_of(patchFirstColumns), .before = 1) 
+      
+      
+      # bk = bk %>%
+      #   mutate(
+      #     across( 
+      #       (which(colnames(.) == 'mo') + 1) : length(colnames(.)), 
+      #       as.rsi) 
+      #   )
+    }
+    return(bk)
+  }
+}
+
+
+readArchiveOfDay = function(tFiles, tFileOvw, ymdDate) {
+  readGlimsArchive(ymdStartDate = ymdDate,
+                   ymdEndDate   = ymdDate, 
+                   readPreprocessed = F, 
+                   tFiles = tFiles, 
+                   tFileOvw = tFileOvw)
+}
+
+readArchiveToday = function(tFiles, tFileOvw) {
+  readGlimsArchive(ymdStartDate = today(), 
+                   ymdEndDate   = today(), 
+                   readPreprocessed = F, 
+                   tFiles = tFiles, 
+                   tFileOvw = tFileOvw)
+}
+
+# anonymize the dataset
+if (F) {
+  nopd = readArchiveToday()
+  # nopd = nopd %>%
+  # filter(str_length(patientid) == 7) %>%
+  # rowwise() %>%
+  # mutate(patientid = getUupin(patientid)) %>%
+  # ungroup() %>%
+  # mutate(birth_date = ymd('1970-01-01'))
+  
+  
+  filename = paste0('data/bk_', ymdStartDate, '_', ymdEndDate, '.rds')
+  if (!file.exists(filename)) {
+    write_rds(nopd, file = filename)
+  }
+}
+
+# stich together processed parts of the archive
+if (F) {
+  tibble(name = list.files(path = 'data/', pattern = 'bk_.*_.*\\.rds'))
+}
+
+# testing the slicing
+if (F) {
+  ymdStartDate = ymd('2022-09-01')
+  
+  # find interesting cases
+  radarData %>%
+    arrange(sampleid, department) %>% 
+    group_by(sampleid) %>% 
+    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% 
+    ungroup() %>% 
+    group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
+    # slice(n()) %>% 
+    ungroup() %>%
+    select(1:3, 13:ncol(.)) %>% 
+    filter(ab.amr %in% c('CAZ', 'CTX')) %>% filter(!is.na(mo)) %>% count(date, sampleid, IsolNr, mo, ab.amr) %>% filter(n>1)
+  
+  # show without slicing last row
+  radarData %>%
+    arrange(sampleid, department) %>% 
+    group_by(sampleid) %>% 
+    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% 
+    ungroup() %>% 
+    group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
+    # slice(n()) %>% 
+    select(1:3, 13:ncol(.)) %>% 
+    filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))
+  
+  # show with slicing last row
+  radarData %>%
+    arrange(sampleid, department) %>% 
+    group_by(sampleid) %>% 
+    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% 
+    ungroup() %>% 
+    group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
+    slice(n()) %>% 
+    select(1:3, 13:ncol(.)) %>% 
+    filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))
+  
+}
--- a/interface_4D_final_results.R
+++ b/interface_4D_final_results.R
@@ -0,0 +1,307 @@
+suppressMessages({
+  require(tidyverse)
+  require(lubridate)
+  require(vroom)
+  require(writexl)
+  require(AMR)
+})
+
+
+minMemoryAsap = F
+
+
+# Rapport ab: 
+#   altijd wel hebben, 
+# # = niet gerapporteerd
+# 
+# Rap st: 
+#   # = niet gerapporteerd, niet opnemen als niet gerapporteerd
+# niet gerapporteerd kan fout zijn
+  
+dataPath = 'interfaces/4D_final_results/data'
+# Antibiotica data
+fnAB = 'AB-2021-22.csv'
+# Microorganism data
+fnST = 'ST-Alles.csv'
+# Monster data
+fnMS = 'MSv2-2022-tm10.csv'
+# Isolaattesten data
+fnTS = 'TS-Alles-221130-092256.csv'
+# Tests data
+fnTM = 'TM-2022-tm10.csv'
+
+source('globalData.R')
+source('interfaces.R')
+
+# temporarily put here for dev
+tAntibiotics = readxl::read_xlsx('data/ab_mapping.xlsx') %>% 
+  select(Mnemonic, Naam, `EARS-Net.Mnemonic`, `EARS-Net.Name`)
+
+
+tAB = vroom(file = file.path(dataPath, fnAB), show_col_types = F)
+tST = vroom(file = file.path(dataPath, fnST), show_col_types = F)
+tMS = vroom(file = file.path(dataPath, fnMS), skip = 5, show_col_types = F)
+tTS = vroom(file = file.path(dataPath, fnTS), show_col_types = F)
+tTM = vroom(file = file.path(dataPath, fnTM), show_col_types = F)
+
+tAB = tAB %>%
+  filter(Monsternummer %>% str_sub(1, 2) == '22') %>%
+  mutate(Monsternummer = as.character(Monsternummer))
+
+# gerapporteerde isolaten
+tST = tST %>%
+  filter(
+    Monsternummer %>% str_sub(1, 2) == '22',
+    # '#' = niet gerapporteerd, deze niet includeren
+    # in ST betekent dit: alleen NA wel includeren
+    is.na(Rap) # = gerapporteert
+  ) %>%
+  select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>%
+  mutate(Monsternummer = as.character(Monsternummer))
+
+tMS = tMS %>%
+  filter(
+    Monsternr %>% str_sub(1, 2) == '22',
+    ObjType != 'L'
+    # Materiaal == 'MP_BLOED' # 118922, 2023-02-07
+  ) %>%
+  rename(Monsternummer = Monsternr,
+         patientid = `PIN-lot`) %>%
+  select(-`Materiaal Variabelen`, ObjType, Lot, Kamer) %>%
+  mutate(date = dmy_hms(paste(OntvOLabDt, as.character(Tijd)))) %>%
+  select(-OntvOLabDt, -Tijd) %>%
+  mutate(Monsternummer = as.character(Monsternummer))
+
+tTS = tTS %>%
+  mutate(Monsternummer = as.character(Monsternummer))
+
+# MS monsters die geen isolaat hebben zijn negatief
+# vandaar nu de right_join
+# Vervolgens moeten uit de MS file alleen de kweekbepalingen worden geincludeerd
+
+x = tMS %>% 
+  mutate(Monsternummer = str_trim(Monsternummer)) %>%
+  left_join(tST, by = c('Monsternummer')) %>%
+  left_join(tAB, by = c('Monsternummer', 'Isolnr' = 'IsolNr')) %>%
+  left_join(tTS, 
+            by = c('Monsternummer', 'Isolnr' = 'Isolaatnr'),
+            suffix = c('.MSxSTxAB', '.TS_IsolaatTest'))
+
+if (minMemoryAsap) {
+  # save memory
+  rm(tAB, tST, tMS)
+}
+
+x = x %>%
+  # select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>%
+  mutate(year = str_sub(Monsternummer, 1, 2) %>% as.numeric(),
+         weeknr = str_sub(Monsternummer, 3, 4) %>% as.numeric()) %>%
+  # smaller subset in order to keep the first explorations workable
+  filter(year == 22) 
+         # weeknr >= 41) 
+  
+s = x %>% 
+  # filter(Materiaal == 'MP_BLOED') %>% # 118922, 2023-02-07
+  # fields expected by RadaR:
+  #
+  # sampleid <chr>, date <date>, specimen_group <chr>, specimen_type <chr>, 
+  # department <chr>, type_dept <fct>, specialism <chr>, 
+  # is_icu <lgl>, is_clinical <lgl>, is_outward <lgl>, 
+  # age <int>, gender <fct>, patientid <chr>, mo <mo>, 
+  # 
+  # align column names
+  rename(
+    sampleid = Monsternummer,
+    # mo = MO, # later by as.mo(MO)
+    rap.st = Rap,
+    rap.ab = Rapport.MSxSTxAB,
+    department = LigAfd,
+    specialism = Spec,
+    age = Leeftijd,
+    gender = Geslacht
+  ) %>%
+  # add yet missing fields
+  add_column(
+    specimen_group = 'bk',
+    specimen_type = '',
+    type_dept = '' # needed?
+  ) %>%
+  mutate(
+    is_icu = is_icu(department), 
+    is_clinical = is_clinical(department),
+    is_outward = is_outward(department),
+  ) %>%
+  relocate(rap.ab, .after = rap.st) %>%
+  relocate(c(year, weeknr, date), .after = sampleid) %>%
+  # get AMR package (WHONET, EARS-Net std) mnemonics and names
+  left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>%
+  rename(
+    AB_WHO = `EARS-Net.Mnemonic`
+  ) %>%
+  select(-AB)
+
+if (minMemoryAsap) {
+  # save memory
+  rm(x)
+}
+
+# TODO: find diff in multiple rows per sampleid that yield <list> io <rsi>
+if (F) {
+  s %>% filter(sampleid == '22010009431') %>% 
+    select(sampleid, AB_WHO, RIS, MIC, MO) %>% 
+    mutate(
+      RIS = as.rsi(RIS),
+      MIC = as.mic(MIC),
+      mo = as.mo(MO)) %>% 
+    pivot_wider(names_from = AB_WHO, values_from = RIS)
+  # diff: rap.ab, Etst, Naam, AB_WHO, `EARS-Net.Name`
+}
+
+x = s %>%
+  select(-`EARS-Net.Name`, -Naam,
+         # door de volgende waarden ontstaan dubbelingen in radarData, daarom
+         # tijdelijk wegfilteren. maar gaat later wel nodig zijn
+         -Rapport.TS_IsolaatTest, -Waarde.TS_IsolaatTest, -isolaattest,
+         -MIC, -Etst, -Diff, -rap.ab) %>%
+  # RIS values of 'V' are a pain in the neck
+  mutate(RIS = replace(RIS, RIS == 'V', NA)) %>%
+  group_by(sampleid, Isolnr, MO, AB_WHO) %>% 
+  # mark rows to be deleted
+  mutate(deleteRow = n() > 1 & is.na(RIS) & !is.na(MO)) %>% 
+  ungroup() %>%
+  # delete the rows that are marked with deleteRow
+  filter(!deleteRow) %>% 
+  # remove column deleteRow
+  select(-deleteRow) %>%
+  # all multiple values should be absent now, so get distinct rows
+  distinct(sampleid, Isolnr, AB_WHO, RIS, .keep_all = T)
+
+y = x %>% 
+  pivot_wider(
+    id_cols = c(sampleid, Isolnr, MO),
+    names_from = AB_WHO, 
+    values_from = RIS
+  )
+
+radarData = x %>%
+  # filter(!is.na(AB_WHO)) %>%
+  mutate(
+    RIS = as.rsi(RIS),
+    # MIC = as.mic(MIC),
+    mo = as.mo(MO)) %>%
+  pivot_wider(names_from = AB_WHO, values_from = RIS) %>%
+  # pivot_wider(names_from = Rapport.TS_IsolaatTest,
+  #             values_from = Waarde.TS_IsolaatTest)
+  select(order(colnames(.))) %>%
+  relocate(
+    all_of(
+      c(
+        'sampleid', 'Isolnr', 'patientid', 'year', 'weeknr', 'date', 'MMBProc',
+        'department', 'type_dept', 'specialism', 'is_clinical', 'is_icu', 'is_outward', 
+        'specimen_type', 'specimen_group', 
+        'rap.st', 
+        # 'rap.ab', 
+        'age', 'gender', 
+        # 'Diff', 'Etst', 
+        'Waarde.MSxSTxAB', 
+        # 'Waarde.TS_IsolaatTest', 
+        # 'MIC', 
+        'MO', 'mo', 'Pos'
+      )
+    ), 
+    .before = 1
+  ) %>% 
+  select(-`NA`)
+
+if (minMemoryAsap) {
+  # save memory
+  rm(s)
+}
+
+radarData = radarData %>%
+  filter(date >= '2022-01-01', date <= '2022-09-30')
+
+
+if (F) {
+  # TODO je verwacht meer Neg dan Pos 
+  # DONE hiervoor tMS left_join tST ipv andersom
+  radarData %>% filter(!is.na(Pos), MO == 'mrsa') %>% count(patientid)
+  radarData %>% filter(!is.na(Pos), MO == 'esccol') %>% count(patientid)
+  radarData %>% filter(!is.na(Pos), MO == 'pseaer') %>% count(patientid)
+  radarData %>% filter(!is.na(Pos), MO == 'klepne') %>% count(patientid)
+  radarData %>% filter(!is.na(Pos), MO == 'canalb') %>% count(patientid)
+  radarData %>% filter(!is.na(Pos), MO == 'enbcco') %>% count(patientid)
+  
+  radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'enbcco') %>% count(patientid)
+  radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'esccol') %>% count(patientid)
+  radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'klepne') %>% count(patientid)
+  radarData %>% filter(MO == 'klepne', GEN == 'R') %>% count(patientid)
+  radarData %>% filter(MO == 'klepne', CIP == 'R') %>% count(patientid)
+  radarData %>% filter(MO == 'klepne', MEM == 'R') %>% count(patientid)
+  radarData %>% filter(MO == 'staaur', CLI == 'R') %>% count(patientid)
+
+  radarData %>% filter(is.na(MO), is.na(rap.st))
+  
+  # rijen voor pat 8000005 in MS file: 36
+  tMS %>% filter(patientid == '8000005') %>% count(Monsternummer)
+  # rijen voor pat 8000005 in ST file: 23
+  tST %>% filter(Monsternummer %in% (tMS %>% filter(patientid == '8000005') %>% pull(Monsternummer) %>% unique())) %>% group_by(Monsternummer) %>% mutate(n = n()) %>% print(n=25)
+  # 22 * 36 = 792
+  
+  
+  
+  x %>% filter(sampleid == '22070019631') %>% select(-year, -weeknr, -date, - patientid, -age, -ObjType, -Lot, -gender, -PC, -department, -Type, -Kamer, -Isolnr, MO, -Waarde.MSxSTxAB, -Pos, -rap.st, -MIC, -Diff, -isolaattest, -Waarde.TS_IsolaatTest, -Rapport.TS_IsolaatTest, -Afname, -is_icu, -is_clinical, -is_outward) %>% print(n=38)
+  
+  x %>% filter(sampleid == '22070019631', Isolnr == 3) %>%
+    # door deze waarden ontstaan dubbelingen in radarData, daarom
+    # tijdelijk wegfilteren. maar gaat later wel nodig zijn
+    # select(-MIC, -Etst, -Diff, -rap.ab) %>% 
+    pivot_wider(names_from = AB_WHO, values_from = RIS) %>% view()
+    # pivot_wider(names_from = Rapport.TS_IsolaatTest, 
+    #             values_from = Waarde.TS_IsolaatTest) %>% view()
+
+  listMultiValCols = function(x) {
+    z = x
+    colns = colnames(z)
+    collens = c()
+    for (c in 1:length(colns)) { 
+      collens = c(collens, 
+                  z %>% 
+                    group_by(sampleid, Isolnr) %>% 
+                    pull(colns[c]) %>% unique() %>% length()) 
+    }
+    multValCols = colns[which(collens != 1)]
+    return(z %>% select(sampleid, patientid, multValCols, RIS))
+  }
+  
+  # dit AB levert nog steeds een List
+  x %>% filter(AB_WHO == 'CAZ') %>% count(sampleid, Isolnr, AB_WHO) %>% filter(n>1)
+  
+  x %>% 
+    filter(
+      sampleid == '22070547731' %>% str_trim(), 
+      AB_WHO == 'CAZ',
+      RIS != 'V'
+    ) %>% 
+    select(sampleid, Isolnr, AB_WHO, RIS)
+
+  key = c('sampleid', 'Isolnr')
+  y %>% filter(!is.na(FOX))%>% select(key, FOX)
+    
+    x %>% 
+      select(sampleid, Isolnr, AB_WHO, RIS) %>% 
+      pivot_wider(id_cols = c(sampleid, Isolnr), 
+                  names_from = AB_WHO, 
+                  values_from = RIS) %>% 
+      select(1:5)
+    
+    
+  selectRelCols = function(x, s = c()) {
+    x %>% select(sampleid, Isolnr, MO, AB_WHO, RIS, all_of(s))
+  }
+}
+
+
+# vroom_write(x = x, 
+#             file = file.path(getwd(), dataPath, 'ST x AB_2022.csv'), 
+#             delim = ';')
--- a/interfaces.R
+++ b/interfaces.R
@@ -0,0 +1,66 @@
+suppressMessages({
+  require(tidyverse)
+  require(AMR)
+})
+
+source('config.R')
+source('anonymize.R')
+
+
+# moved from 4D_archive.R
+createAbMapping = function() {
+  options(AMR_locale = 'nl')
+  abOpm = readxl::read_xlsx('data/ab_glims_2022-09-02.xlsx', 
+                            sheet = 1)
+  ab2 = readxl::read_xlsx('ab_glims_2022-09-02.xlsx', 
+                          sheet = 'codes uit Glims 2022-09-02')
+  
+  # first try to retrieve the AB from Naam
+  ab2 = ab2 %>%
+    select(Mnemonic, Naam, Toelichting) %>%
+    mutate(Naam = str_replace(Naam, 'clavulaanzuur', 'clavulanic acid'),
+           `EARS-Net.Mnemonic` = as.ab(Naam),
+           `EARS-Net.Name` = ab_name(Naam)) %>%
+    left_join(abOpm %>% select(name4d, Opm.Greetje), by = c('Mnemonic' = 'name4d'))
+  
+  # secondly, try to retrieve the AB from Mnemonic
+  ab2 = ab2 %>%
+    mutate(`EARS-Net.Mnemonic` = replace(`EARS-Net.Mnemonic`, 
+                                         is.na(`EARS-Net.Mnemonic`), 
+                                         as.ab(Mnemonic)),
+           `EARS-Net.Name` = replace(`EARS-Net.Name`, 
+                                     is.na(`EARS-Net.Name`), ab_name(Mnemonic)))
+}
+
+is_icu = function(departmentName) {
+  if (!is_empty(departmentName)) {
+    departmentName %in% 
+      c('KINN', 'KINZ', 'KICW', 'KICN', 'KINW', # NICU IC
+        'KICG', 'KICR', 'KICB', 'KICK',         # Kinder IC
+        'THIC', 'ICV2',                         # old and new name
+        'NCIC', 'ICV1',                         # old and new name
+        'E1IC', 'ICV3',                         # old and new name
+        'CICA', 'CICB', 'CICD', 'CHIC', 'ICV4' # old and new name
+      )
+  } else {
+    F
+  }
+}
+
+is_clinical = function(departmentName) {
+  is_icu(departmentName) |
+    str_ends(departmentName, 'VA') |
+    departmentName %in% 
+    c('L1SC', 'CIVZ', 'NEUC', 'L1KV', 'OZOK', 'BOA0', 'C1CC', 'BOB0')
+} 
+
+is_outward = function(departmentName) {
+  str_ends(departmentName, 'P') |
+    str_detect(departmentName, 'DB')
+}
+
+readGlimsFinalResults = function() {
+  cat(paste0('Reading ', radarDataFilename, '\n'))
+  return(read_rds(radarDataFilename))
+}
+
--- a/interfaces/4D/src/4D_archive.R
+++ b/interfaces/4D/src/4D_archive.R
@@ -0,0 +1,299 @@
+require(tidyverse)
+if (!require(lubridate)) {
+  install.packages('lubridate')
+  require(lubridate)
+}
+require(crayon)
+require(stringr)
+
+# TODO: check wat ProcessAndMergeDailyFiles ingaat: Monsternummer is.character?
+# TODO: min # rows = bijv. 10 i.v.m. privacy
+# TODO: 2 versies: MMBI & 'Algemeen'
+
+memSize = function() {
+  result = tibble(object='', size=0, sizeMB=0)
+  objs = ls(envir = globalenv())
+  for (i in 1:length(objs) ) { 
+    objName = objs[i]
+    sizeBytes = as.numeric(object.size(get(objName)))
+    result = result %>%
+      add_row(object = objName, size = sizeBytes, sizeMB = sizeBytes / 1024^2)
+  }
+  result = result %>%
+    arrange(desc(size))
+  return(list(result, sum(result$sizeMB)))
+}
+
+gatherAbIds = function() {
+  archFolders = list.dirs(file.path(dataFolder, 'archive'), full.names = F)
+  archFolders = archFolders[str_length(archFolders) == 26]
+  for (archFolder in archFolders) {
+    folder = archFolder
+    abFiles = list.files(file.path(dataFolder, 'archive', folder), pattern = 'AB-*')
+    for (abFileName in abFiles) {
+      abFile = read_4D_file(file.path(dataFolder, 'archive', folder, abFileName))
+      abNamesDict = tibble(name4d = abFile %>% pull(AB) %>% unique())
+      newNamesDict = abNamesDict %>% filter(!name4d %in% a$name4d, !is.na(name4d)) 
+      tNewNames = tibble(name4d = newNamesDict$name4d, 
+                         nameAMR = as.ab(newNamesDict$name4d), 
+                         nameAMRlong = ab_name(newNamesDict$name4d))
+      a = a %>% add_row(tNewNames)
+    }
+  }
+  return(a)
+}
+
+source('4D_day.R')
+
+useCachedFileInfo = T
+
+archivePath = './data/archive'
+folders = list.files(path = archivePath, pattern = 'ImportedFromGlimms-*', full.names = T)
+
+addFileInfo = function(x, filePath, fileName) {
+  mtime = file.mtime(filePath)
+  
+  x = x %>%
+    add_row(filePath = filePath, 
+            fileName = fileName,
+            fileType = str_sub(fileName, 1, 2),
+            fileDate = fileName %>% str_sub(4, 9) %>% ymd(),
+            modified = mtime, 
+            # mdate = as.Date(ymd_hms(modified)),
+            size = file.size(filePath)) %>%
+    group_by(fileDate) %>%
+    mutate(nFilesPerDate = n()) %>%
+    ungroup() %>%
+    arrange(desc(fileDate))
+  
+  return(x)
+}
+
+deriveFileOverview = function(tFiles) {
+  tFiles %>% 
+    group_by(fileDate) %>% 
+    mutate(AB = cur_data() %>% 
+             filter(str_sub(fileName, 1, 2) == 'AB') %>% 
+             nrow(),
+           MS = cur_data() %>% 
+             filter(str_sub(fileName, 1, 2) == 'MS') %>% 
+             nrow(),
+           ST = cur_data() %>% 
+             filter(str_sub(fileName, 1, 2) == 'ST') %>% 
+             nrow(),
+           TM = cur_data() %>% 
+             filter(str_sub(fileName, 1, 2) == 'TM') %>% 
+             nrow(),
+           TS = cur_data() %>% 
+             filter(str_sub(fileName, 1, 2) == 'TS') %>% 
+             nrow()
+    ) %>%
+    slice(1) %>%
+    select(fileDate, AB, MS, ST, TM, TS)
+}
+
+getFilesInfo = function() {
+  tFiles = tibble(filePath = 'deleteme', 
+                  fileName = '', 
+                  fileType = '',
+                  fileDate = ymd('2022-01-01'),
+                  modified = ymd('2022-01-01'),
+                  size = 0
+  )
+  for (folder in folders) {
+    # if (folder == 'ImportedFromGlimms-2014-09') {
+    folderPath = folder #file.path(archivePath, folder)
+    files = list.files(folderPath, pattern = '*')
+    for (file in files) {
+      filePath = file.path(folderPath, file)
+      tFiles = tFiles %>% 
+        addFileInfo(filePath = filePath, fileName = file)
+    }
+  }
+  tFiles = tFiles %>%
+    mutate(mdate = as.Date(ymd_hms(modified))) %>%
+    filter(filePath != 'deleteme') %>%
+    arrange(fileDate, fileName)
+  tFiles = tFiles %>%
+    left_join(tFiles %>% count(fileDate, name = 'nFilesPerDate'), by = 'fileDate')
+  
+  return(tFiles)
+}
+
+addFileInfoOfDay = function(x, folders, ymdDate, overwrite = F) {
+  datePattern = format(ymdDate, format = '%y%m%d')
+  dirPattern = format(today(), format = '%Y-%m')
+  folder = folders[which(folders %>% str_detect(dirPattern))]
+  dayFiles = list.files(path = folder, pattern = datePattern)
+  for (fileName in dayFiles) {
+    cat(paste0('Processing ', fileName, '\n'))
+    x = x %>% addFileInfo(filePath = file.path(folder, fileName),
+                          fileName = fileName)
+  }
+  
+  return(x)
+}
+
+addFileInfoOfToday = function(tFiles, folders) {
+  browser()
+  dirPattern = format(today(), format = '%Y-%m')
+  folder = folders[which(folders %>% str_detect(dirPattern))]
+  tFiles = tFiles %>% 
+    addFileInfoOfDay(folderPath = file.path(archivePath, folder),
+                     ymdDate = today(),
+                     overwrite = F)
+  return(tFiles)
+}
+
+plotFileSize = function(tFiles) {
+  tFiles %>%
+    filter(fileDate > ymd('2014-10-01'),
+           fileDate <= ymd('2014-10-31'),
+           fileType != 'Ee') %>%
+    mutate(dayOfWeek = wday(fileDate),
+           dayOfWeekName = weekdays(fileDate),
+           size = size / (1024)
+    ) %>%
+    ggplot(aes(x = dayOfWeek, y = size, group = dayOfWeek)) +
+    geom_boxplot(aes(fill = fileType)) +
+    scale_y_continuous(limits = c(0, 1250)) +
+    facet_wrap(facets = c(~fileType)) +
+    labs(y = 'size [KB]') +
+    theme_light() +
+    theme(
+      plot.title = element_text(face = 'bold')
+    )
+}
+
+if (useCachedFileInfo) {
+  tFiles = read_rds('./data/tFiles_2022-10-13.rds')
+  tFileOvw = read_rds('./data/tFileOvw_2022-10-13.rds')
+} else {
+  tFiles = getFilesInfo()
+  tFileOvw = deriveFileOverview(tFiles)
+
+  write_rds(tFiles, paste0('./data/tFiles_', today(), '.rds'))
+  write_rds(tFileOvw, paste0('./data/tFileOvw_', today(), '.rds'))
+}
+
+readDailyFile = function(tFiles, date, prefix) {
+  result = NULL
+  fileNames = tFiles %>%
+    filter(fileDate == date, 
+           fileType == prefix) %>%
+    pull(filePath)
+  if (length(fileNames) > 0) {
+    for (f in 1:length(fileNames)) {
+      fileName = fileNames[f]
+      cat(silver(paste0('Reading ', fileName)), '\n')
+      fileContent = read_4D_file(fileName)
+      if (f == 1) {
+        result = fileContent
+      } else  {
+        result = result %>% 
+          add_row(fileContent)
+      }
+    }
+  }
+  return(result)
+}
+
+readDailyMSFile = function(tFiles, date) {
+  result = NULL
+  fileNames = tFiles %>%
+    filter(fileDate == date, 
+           fileType == 'MS') %>%
+    pull(filePath)
+  if (length(fileNames) > 0) {
+    for (f in 1:length(fileNames)) {
+      fileName = fileNames[f]
+      cat(silver(paste0('Reading ', fileName)), '\n')
+      msFileContent = read_4D_MS_file(fileName)
+      if (f == 1) {
+        result = msFileContent
+      } else  {
+        result = result %>% 
+          add_row(msFileContent)
+      }
+    }
+  }
+  return(result)
+  
+}
+
+readDailyFiles = function(tFiles, date) {
+  abFile = readDailyFile(tFiles, date, 'AB')
+  msFile = readDailyMSFile(tFiles, date)
+  stFile = readDailyFile(tFiles, date, 'ST')
+  tmFile = readDailyFile(tFiles, date, 'TM')
+  tsFile = readDailyFile(tFiles, date, 'TS')
+  
+  return(list(abFile, msFile, stFile, tmFile, tsFile))
+}
+
+readArchive = function(tFileInfo, tFileOvw, createDebugList = FALSE) {
+  tMergeDates = NULL
+  tsDates = NULL
+  tMergeDatesDebugList = list()
+  dates = tFileInfo %>% filter(!is.na(fileDate)) %>% pull(fileDate) %>% unique()
+  for (d in 1:length(dates)) {
+    currDate = dates[d]
+    cat(paste0('\nd = ', d, ', date: ', currDate, '\n'))
+    print(tFileOvw %>% filter(fileDate == currDate))
+    
+    dateFiles = readDailyFiles(tFileInfo, currDate)
+    
+    if (!is.null(dateFiles[[1]])) {
+      processed = ProcessAndMergeDailyFiles(
+        dateFiles[[1]] %>% select(-...15), 
+        dateFiles[[2]], 
+        dateFiles[[3]], 
+        dateFiles[[4]], 
+        dateFiles[[5]]
+      )
+      
+      if (!is.null(processed$merged)) {
+        tMergeDate = processed$merged %>% # abFile, stFile and msFile joined
+          mutate(date = currDate, .before = 1)
+        if (createDebugList) tMergeDatesDebugList[[d]] = tMergeDate
+        
+        # separate tables of (ab|st|ms|ts|tm)File of the current day can be found 
+        # in processed$(ab|st|ms|ts|tm)File, for instance:
+        if (!is.null(processed$tsFile)) {
+          tsDate = processed$tsFile %>%
+            mutate(date = currDate, .before = 1)
+        }
+        
+        if (is.null(tMergeDates)) {
+          if (nrow(tMergeDate) > 0) {
+            tMergeDates = tMergeDate
+          }
+          if (exists('tsDate')) {
+            if (nrow(tsDate) > 0) {
+              tsDates = tsDate
+            }
+          }
+        } else {
+          if (nrow(tMergeDates) > 0) {
+            tMergeDates = tMergeDates %>% add_row(tMergeDate)
+          }
+          if (exists('tsDates')) {
+            if (!is.null(tsDates)) {
+              if (nrow(tsDates)) {
+                tsDates = tsDates %>% add_row(tsDate)
+              }
+            }
+          }
+        }
+        tMergeDates = tMergeDates %>% 
+          mutate(Monsternummer = str_trim(Monsternummer))
+      }
+    }
+  }
+  
+  if (createDebugList) {
+    return(list(tMergeDates, tMergeDatesDebugList))
+  } else {
+    return(tMergeDates)
+  }
+}
--- a/interfaces/4D/src/4D_day.R
+++ b/interfaces/4D/src/4D_day.R
@@ -0,0 +1,268 @@
+# initially only interfacing 4D data
+
+# TODO loggen zoveel mogelijk over ontvangen data (nieuwe GLIMS data)
+# TODO pushen R scripts naar git server waar Gerhard en Kornelis bij kunnen
+
+# require(stringr)
+require(tidyverse)
+require(AMR)
+
+dataFolder = './data'
+
+get_most_recent_4D_file = function(path, prefix) {
+  files = list.files(dataFolder, paste0('^', prefix, '*'))
+  tFiles = tibble(name = files, 
+                  mtime = file.mtime(file.path(dataFolder, files))) %>%
+    arrange(desc(mtime))
+  
+  tmpFilename = tFiles$name[1]
+  
+  return(tmpFilename)
+}
+
+read_4D_file = function(path) {
+  tFile = read_tsv(path, col_types = paste0(rep('c', 30), collapse = ''))
+  
+  # remove comment lines
+  if (nrow(tFile) > 0) {
+    tFile = tFile %>%
+      slice( seq(4, nrow(tFile)) ) %>% 
+      filter(str_sub(ID, 1, 3) != '___')
+  }
+  
+  if (colnames(tFile)[1] == 'X1') {
+    if (str_detect(path, 'AB-')) {
+      tFile = tibble(ID = '0', 
+                     Monsternummer = '', 
+                     IsolNr = '', 
+                     AB = '', 
+                     V = '',
+                     PrefMIC ='',
+                     MIC = '',
+                     D = '',
+                     DIF = '',
+                     PrefEtest = '',
+                     E = '',
+                     ETEST = '',
+                     RIS = '',
+                     RAP = '',
+                     ...15 = '')
+    } else if (str_detect(path, 'MS-')) {
+      tFile = tibble(UMCG.Monster= '0',
+                     PIN.Lot= '0',
+                     TypeObject= '0',
+                     Lot= '0',Echtgenoot= '0',
+                     AchternNaam= '0',
+                     Geboortedatum= '0',
+                     Geslacht= '0',
+                     OntvOLabDatum= '0',
+                     Tijdstip.materiaal.afgenomen= '0',
+                     PC= '0',
+                     Aanvrager= '0',
+                     ligafdeling= '0',
+                     A= '0',
+                     B= '0',
+                     C= '0',
+                     Materiaal= '0',
+                     Materiaal.variabelen= '0',
+                     X = F)
+      } else if (str_detect(path, 'ST-')) {
+      tFile = tibble(ID = '0', 
+                     Monsternummer = '', 
+                     MMBProc = '',
+                     Isolnr = '', 
+                     MO = '', 
+                     Waarde = '', 
+                     Pos = '',
+                     Rap = '')
+    } else if (str_detect(path, 'TM-')) {
+      tFile = tibble(ID = '0', 
+                     Monsternummer = '', 
+                     Test = '',
+                     Uitslag = '')
+    } else if (str_detect(path, 'TS-')) {
+      tFile = tibble(ID = '0', 
+                     Monsternummer = '', 
+                     Isolnr = '', 
+                     Test = '', 
+                     Waarde = '', 
+                     Rapport = '')
+    }
+  }
+  if ('ID' %in% colnames(tFile)) {
+    tFile = tFile %>%
+      filter(ID != '0')
+  } else if ('UMCG.Monster' %in% colnames(tFile)) {
+    tFile = tFile %>%
+      filter(UMCG.Monster != '0')
+  }
+  
+  return(tFile)
+}
+
+read_4D_TM_file = function(path) {
+  tmFilename = str_remove(path, paste0(dirname(path), '/'))
+  tmFileL = readLines(path)
+  tmFileL = tmFileL[c(5:length(tmFileL))]
+  for (l in 1:length(tmFileL)) {
+    if (str_count(tmFileL[l], '\t') == 3) {
+      tmFileL = paste0(tmFileL, '\t')
+    }
+  }
+  newTmFilename = file.path(dirname(path), 
+                            tmFilename %>% str_replace('TM', 'bTM'))
+  if (file.exists(newTmFilename)) file.remove(newTmFilename)
+  writeLines(tmFileL, newTmFilename)
+  
+  tFile = read_tsv(newTmFilename, 
+                   col_names = c('ID','Monsternummer','Test','Status.test.tm','Uitslag'), 
+                   col_types = 'ccccc')
+  
+  # remove comment lines
+  tFile = tFile %>%
+    slice(c(5:(tFile %>% nrow()))) %>%
+    mutate(
+      Monsternummer = as.character(Monsternummer)
+    )
+  return(tFile)
+}
+
+get_most_recent_4D_MS_file = function(path) {
+  # get files
+  msFiles = list.files(path, '^MS*')
+  # create table with modification datetimes, for selection of the most recent one
+  tmsFiles = tibble(name = msFiles,
+                    mtime = file.mtime(file.path(path, msFiles))) %>%
+    arrange(desc(mtime))
+  # get the name of the most recently modified file
+  msFilename = tmsFiles$name[1]
+  
+  return(msFilename)
+}
+
+read_4D_MS_file = function(path) {
+  # print(path)
+  msFilename = str_remove(path, paste0(dirname(path), '/'))
+  # read content
+  msFileL = readLines(path)
+  # remove lines with repeated '_', useful for the human eye only
+  msFileL = msFileL[str_sub(msFileL, 1, 5) != '_____']
+  # replace (real) header
+  msHeader = readLines(file.path(dataFolder, 'headerMS.txt'))
+  msFileL[1] = paste0(msHeader, '\t')
+  # replace tabs within a date time combination with a space in line 2 and further,
+  # for example: replace '29/09/2021	14:05' by '29/09/2021 14:05'
+  #                                 ↑ = '\t'               ↑ = ' '
+  for (i in 2:length(msFileL)) {
+    msFileL[i] = msFileL[i] %>% str_replace_all('(?<=[:digit:]{4})\t(?=[:digit:]{2}:[:digit:]{2})', ' ')
+  }
+  # write to new machine readable file
+  newMsFilename = file.path(dirname(path), 
+                            msFilename %>% str_replace('MS', 'bMS'))
+  if (file.exists(newMsFilename)) file.remove(newMsFilename)
+  writeLines(msFileL, newMsFilename)
+  # finally: read the machine readable file
+  result = read.delim(file = newMsFilename, sep = '\t') %>%
+    as_tibble() %>%
+    mutate(
+      UMCG.Monster = as.character(UMCG.Monster),
+      PIN.Lot = as.character(PIN.Lot),
+      Aanvrager = as.character(Aanvrager),
+      C = as.character(C)
+    )
+  # Hurray!
+  return(result)
+}
+
+# abFile = read_4D_file(dataFolder, 'AB')
+# msFile = read_4D_MS_file(dataFolder)
+# stFile = read_4D_file(dataFolder, 'ST')
+# tmFile = read_4D_file(dataFolder, 'TM')
+# tsFile = read_4D_file(dataFolder, 'TS')
+
+ProcessAndMergeDailyFiles = function(abFile, msFile, stFile, tmFile, tsFile) {
+  if (!is.null(abFile))
+    abFile = abFile %>%
+      filter(!is.na(RAP)) #%>%
+  # mutate(
+  #   #Monsternummer = as.character(Monsternummer)
+  #   AB.amr = AMR::ab_property(AB, 'ab') %>% as.character()
+  # )
+  
+  if (!is.null(stFile))
+    stFile = stFile %>%
+      filter(!is.na(Rap)) %>%
+      # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>%
+      rename(IsolNr = Isolnr)
+  
+  if (!is.null(msFile))
+    msFile = msFile %>%
+      rename(Monsternummer = UMCG.Monster) %>%
+      # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>%
+      select(-X)
+  
+  # if (!is.null(tmFile))
+  # tmFile = tmFile %>%
+  #   mutate(Monsternummer = as.character(Monsternummer) %>% str_trim())
+  
+  if (!is.null(tsFile))
+    tsFile = tsFile %>%
+      filter(!is.na(Rapport))
+  #   mutate(Monsternummer = as.character(Monsternummer) %>% str_trim())
+  
+  if (!is.null(abFile) & !is.null(stFile))
+    t_ab_st = abFile %>%
+      full_join(stFile, 
+                by = c('Monsternummer', 'IsolNr'), 
+                suffix = c('.ab', '.st'))
+  
+  if (exists('t_ab_st')) {
+    if (!is.null(t_ab_st) & !is.null(msFile)) {
+      t_ab_st_ms = t_ab_st %>%
+        full_join(msFile, by = 'Monsternummer') %>%
+        rename(Waarde.st = Waarde)
+      
+      return(
+        list(
+          merged = t_ab_st_ms,
+          abFile = abFile,
+          stFile = stFile,
+          msFile = msFile,
+          tsFile = tsFile,
+          tmFile = tmFile
+        )
+      )
+      
+      # t_ab_st_ms_ts = t_ab_st_ms %>%
+      #   full_join(tsFile, by =  c('Monsternummer', 'IsolNr' = 'Isolnr'))
+      # 
+      # t_ab_st_ms_ts_tm = t_ab_st_ms_ts %>%
+      #   add_column(Uitslag = NA,
+      #              is_sampleResult = F) %>%
+      #   add_row(Monsternummer = tmFile$Monsternummer,
+      #           Test = tmFile$Test,
+      #           Uitslag = tmFile$Uitslag,
+      #           is_sampleResult = T)
+      # 
+      # t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>%
+      #   rename(Waarde.ts = Waarde) %>%
+      #   filter(!Test %in% c('no_filter_yet'))
+      # 
+      # # if ('X' %in% colnames(t_ab_st_ms_ts_tm)) {
+      # #   t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>%
+      # #     select(-'X')
+      # #   print('Dit statement is voor debugging')
+      # # }
+    }
+  }
+  return(
+    list(
+      merged = NULL,
+      abFile = abFile,
+      stFile = stFile,
+      msFile = msFile,
+      tsFile = tsFile,
+      tmFile = tmFile
+    )
+  )
+}
--- a/interfaces/4D/src/debug
+++ b/interfaces/4D/src/debug
@@ -0,0 +1,26 @@
+require(tidyverse)
+require(readr)
+
+v171 = read_rds('v171.rds')
+v180 = read_rds('v180.rds')
+
+new_v180 = v180 %>% anti_join(v171 %>% left_join(v180))
+
+source('interfaces.R')
+
+d = readGlimsArchive(NULL, NULL, T, NULL, NULL)
+d %>% filter(str_starts(mo, 'B_ES')) %>% count(mo, CXM) 
+# deze aantallen zie ik niet terug in wat de applicatie presenteert:
+# Ceftazidim: R (705), SI (2705), totaal dus 3410
+
+# misschien dat data_select() voor andere aantallen zorgt?
+data_select = d %>% 
+  # filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>%
+  filter_first_isolate(col_patient_id = "patientid", episode_days = 30) %>% 
+  mutate(mo = as.mo(mo, Becker = TRUE))
+
+data_select %>% filter(str_starts(mo, 'B_ES')) %>% count(mo, CXM) 
+
+# Ja dus!
+
+
--- a/interfaces/4D/src/debug_inspect
+++ b/interfaces/4D/src/debug_inspect
@@ -0,0 +1,37 @@
+require(tidyverse)
+
+inspectRead = 10
+dataPath = 'data'
+
+abNames = read_rds('data/abNames.rds')
+
+filename = list.files(dataPath, paste0('read_', inspectRead, '_*'))
+d = read_rds(file.path(dataPath, filename)) #%>%
+  # left_join(abNames, by = c('ab.amr' = 'nameAMR'))
+# list a sampleid that occurs more often but not too often
+examples = d %>% count(sampleid) %>% arrange(desc(n)) %>% filter(n < 50)
+
+view_d = d %>% 
+  select(sampleid, date, IsolNr, MIC, mo, RIS, ab.amr, RAP) %>% #, name4d, nameAMRlong) %>%
+  mutate(asmo = suppressWarnings(as.mo(mo))) %>%
+  arrange(sampleid, IsolNr) #%>%
+  # filter(sampleid == examples$sampleid[i])
+
+has_ecoli = view_d %>% filter(str_starts(asmo, 'B_ES'))
+examples_having_ecoli = has_ecoli %>% 
+  filter(sampleid %in% examples$sampleid) %>%
+  pull(sampleid) %>%
+  unique()
+
+e = 2
+view(view_d %>% 
+       filter(sampleid == examples_having_ecoli[e]) %>%
+       mutate(IsolNr = as.numeric(IsolNr)) %>%
+       arrange(IsolNr),# %>%
+       #distinct(sampleid, date, IsolNr, mo, ab.amr, .keep_all = T), 
+     title = 'example')
+
+if (F) {
+  view(view_d)
+  view_d %>% filter(!is.na(mo)) %>% pull(mo) %>% unique()
+}
--- a/reads.R
+++ b/reads.R
@@ -0,0 +1,7 @@
+require(tidyverse)
+
+reads = tribble(
+  ~read, ~desription, 
+  3, 'returned z',
+  4, 'returned radarData'
+)
--- a/server.R
+++ b/server.R
@@ -2,18 +2,24 @@

 server <- function(input, output, session) {
  
+  hideTab(inputId = 'RadaRtabs', target = 'Data')
+  
+  output$disclaimerText = renderText('you must confirm understanding the disclaimer')
  
  # define data selection ---------------------------------------------------
  
-  
  data_select <- reactive({if (input$box2.6_first != 365) {
    radar_data %>% 
-      filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>%
+      filter(specialty_shiny %in% input$specialtyInput & 
+               department %in% input$departmentInput &
+               Materiaal %in% input$materialInput) %>%
      filter_first_isolate(col_patient_id = "patientid", episode_days = input$box2.6_first) %>% 
      mutate(mo = as.mo(mo, Becker = TRUE))
  } else {
    radar_data_first %>% 
-      filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput)
+      filter(specialty_shiny %in% input$specialtyInput & 
+               department %in% input$departmentInput &
+               Materiaal %in% input$materialInput)
  }
  })
  
@@ -39,12 +45,16 @@ server <- function(input, output, session) {
  
  observe({
    x <- input$specialtyInput
-    if (!is.null(x)) {
-      update_departments <-
-        radar_data %>% filter(specialty_shiny %in% input$specialtyInput)
-      x <- sort(unique(update_departments$department))
-    }
-    else {
+    if (input$allDepartments) {
+      if (!is.null(x)) {
+        update_departments <-
+          radar_data %>% filter(specialty_shiny %in% input$specialtyInput)
+        x <- sort(unique(update_departments$department))
+      }
+      else {
+        x <- character(0)
+      }
+    } else {
      x <- character(0)
    }
    updateCheckboxGroupInput(
@@ -56,6 +66,12 @@ server <- function(input, output, session) {
    )
  })
  
+  observeEvent(input$btnDisclaimerUnderstood, {
+    if (input$cbDisclaimerRead == T) {
+      showTab(inputId = 'RadaRtabs', target = 'Data', select = T)
+    }
+  })
+  
  
  # sidebar hover -----------------------------------------------------------
  
@@ -104,7 +120,8 @@ server <- function(input, output, session) {
            color = "#d33724",
            size = 0.7
          )
-        ),tabPanel(
+        ),
+        tabPanel(
          title = "Isolates (and pathogens) detected",
          div(
            style = "position: absolute; left: 0.5em; bottom: 0.5em;",
@@ -312,7 +329,9 @@ server <- function(input, output, session) {
  pos_neg_plot <- reactive({
    
    pos_neg_plot <- radar_data %>% 
-      filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>% 
+      filter(specialty_shiny %in% input$specialtyInput & 
+               department %in% input$departmentInput &
+               Materiaal %in% input$materialInput) %>% 
      group_by_at(input$box2.1_group) %>% 
      summarise(patients = n_distinct(patientid),
                total = n_distinct(sampleid),
@@ -413,7 +432,6 @@ server <- function(input, output, session) {
        facet_wrap(input$box2.3_group)
      }} +
      NULL
-    
  }) 
  
  output$patho_plot <- renderGirafe({
@@ -443,7 +461,7 @@ server <- function(input, output, session) {
      group_by_at(input$box2.2_group) %>% 
      summarise("14 days" = sum(first_14, na.rm = TRUE),
                "30 days" = sum(first_30, na.rm = TRUE),
-                "60 days" = sum(first_60, na.rm = TRUE))
+                "60 days" = sum(first_60, na.rm = TRUE)) %>%
      pivot_longer(cols = c("14 days", "30 days", "60 days"))
    
  })
@@ -479,7 +497,7 @@ server <- function(input, output, session) {
  
  output$box1 <- renderUI({
    div(
-      style = "position: relative",
+      style = "position: relative;",
      tabBox(
        id = "box1",
        width = NULL,
@@ -687,6 +705,8 @@ server <- function(input, output, session) {
        }}
    }
    
+    cat('Implicit join in get_resistance_df(..) @server.R:692\n')
+    
    get_resistance_df(data_select(),
                      mo_selected = mo_selected,
                      abx = if (mo_genus(mo_selected) %in% mo_genus(c("E. coli",
@@ -728,9 +748,12 @@ server <- function(input, output, session) {
        theme_void()
    } else {
      
-      ggplot(isolate_prop_data(), aes(value, reorder(antibiotic, value_r), fill = factor(interpretation, levels = c("SI", "R")),
-                                      tooltip = paste0("R = ", value_r, " (", percent_r, ")\n", "SI = ", value_si, " (", percent_si, ")")
-      )) + 
+      ggplot(isolate_prop_data(), 
+             aes(value, reorder(antibiotic, value_r), 
+                 fill = factor(interpretation, levels = c("SI", "R")),
+                 tooltip = paste0("R = ", value_r, " (", percent_r, ")\n", 
+                                  "SI = ", value_si, " (", percent_si, ")")
+             )) + 
        
        geom_col_interactive(colour = "black", position = "fill") +
        scale_fill_manual(limits = c("R", "SI"),
@@ -993,14 +1016,18 @@ server <- function(input, output, session) {
    # isolate_prop_data() %>% 
    if (input$box2 == "Positive & negative cultures") {
      all <- radar_data %>% 
-        filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>% 
+        filter(specialty_shiny %in% input$specialtyInput & 
+                 department %in% input$departmentInput &
+                 Materiaal %in% input$materialInput) %>% 
        rename(group = input$box2.1_group) %>% 
        filter(!is.na(group)) %>% 
        distinct(patientid, .keep_all = TRUE) %>%
        count(group) %>%
        rename(n_all = n)
      pos <- radar_data %>% 
-        filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>% 
+        filter(specialty_shiny %in% input$specialtyInput & 
+                 department %in% input$departmentInput &
+                 Materiaal %in% input$materialInput) %>% 
        rename(group = input$box2.1_group) %>% 
        group_by(group) %>%
        # group_by_at("gender") %>%
--- a/stitchArchive.R
+++ b/stitchArchive.R
@@ -0,0 +1,143 @@
+require(tidyverse)
+
+source('globalData.R')
+reqColumns = abAbbrevs
+
+patchFirstColumns = c('sampleid', 'date', 'is_clinical', 'is_icu', 'is_outward', 
+                      'IsolNr', 'Materiaal', 'specimen_type', 'specimen_group', 
+                      'patientid', 'birth_date', 'department', 'type_dept', 
+                      'specialism', 'age', 'gender', 'MIC', 'RAP', 'mo')
+
+readPatch = function(filename, reqColumns) {
+  patch = read_rds(filename)
+  for (c in 1:length(reqColumns)) {
+    if (!reqColumns[c] %in% colnames(patch)) { 
+      patch = patch %>% 
+        add_column(!!reqColumns[c] := as.rsi(NA)) %>%
+        select(order(colnames(.))) %>%
+        relocate(all_of(patchFirstColumns), .before = 1)
+    }
+  }
+  return(patch)
+}
+
+readAllPatches = function() {
+  tPatches = tibble(name = list.files(path = 'data', pattern = 'bk_.*\\.rds'))
+  
+  tPatches = tPatches %>% 
+    separate(name, into = c('prefix', 'from', 'tail'), sep = '_') %>%
+    separate(tail, into = c('to', 'ext'), sep = '\\.') %>%
+    filter(!is.na(to), !str_detect(to, '[:alpha:]')) %>%
+    mutate(name = paste0(prefix, '_', from, '_', to, '.', ext)) %>%
+    arrange(from, to)
+  
+  return(tPatches)
+}
+
+stitchPatches = function(tPatches, dataPath = 'data', reqColumns) {
+  for (i in 1:nrow(tPatches)) {
+    # patch = read_rds(file.path(dataPath, tPatches[i,]$name))
+    # for (c in 1:length(reqColumns)) {
+    #   if (!reqColumns[c] %in% colnames(patch)) { patch = patch %>% add_column(!!reqColumns[c] := is.rsi(NA)) }
+    # }
+    cat(paste0('  patch ', i, ': ', tPatches$from[i], ' ', tPatches$to[i], '\n'))
+    patch = readPatch(filename = file.path(dataPath, tPatches$name[i]), 
+                      reqColumns = reqColumns)
+    # print(colnames(patch))
+    if (i == 1) {
+      tStitched = patch
+    } else {
+      tStitched = tStitched %>% add_row(patch)
+    }
+  }
+  tStitched = tStitched %>% arrange(date, sampleid)
+  
+  return(tStitched)
+}
+
+appendDailyData = function(fnTotal, fnTotalSofar, fnDay, reqColumns) {
+  assert_that(file.exists(fnTotalSofar))#, 
+              # msg = paste0("File ", fnTotalSofar, " doesn't exist"))
+  assert_that(file.exists(fnDay))
+  
+  tTotalSofar = readPatch(fnTotalSofar, reqColumns = reqColumns)
+  tDay        = readPatch(fnDay, reqColumns = reqColumns)
+  
+  tTotal = tTotalSofar %>% add_row(tDay)
+  
+  write_rds(x = tTotal, file = fnTotal, compress = 'gz')
+  
+  return(tTotal)
+}
+
+manualStitch_2022_09_30 = function() {
+  a = 
+    mutate(RAP = as.character('#')) %>% # correct for accidental inclusion in as.rsi conversion
+    select(order(colnames(.))) %>%
+    relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr,
+               Materiaal, specimen_type, specimen_group, uuid, patientid,
+               old_patientid, birth_date, old_birth_date, department,
+               type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1)
+  # a = a %>% mutate(across(AMC:VOR, as.rsi)) was already done before
+  
+  b = readPatch('bk_2022-06-17_2022-09-29.rds', reqColumns) %>%
+    select(order(colnames(.))) %>%
+    relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr,
+               Materiaal, specimen_type, specimen_group, patientid,
+               birth_date, department,
+               type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1)
+  b = b %>% mutate(across(AMC:VOR, as.rsi))
+  
+  tTotal = a %>% 
+    add_row(b) %>%
+    mutate(MIC = as.mic(MIC),
+           mo = as.mo(mo))
+  # Warning: Problem with `mutate()` column `mo`.
+  # ℹ `mo = as.mo(mo)`.
+  # ℹ 
+  # in `as.mo()`: nine unique values (covering 0.0%) could not be coerced and were considered 'unknown': "}hemstre", "}Primair",
+  # "acgsch", "anrhyd", "bclmeg", "BK_Isolaat_AEF", "corjimi", "strsalg" and "vvStrpne".
+  # Use `mo_failures()` to review them. Edit the `allow_uncertain` argument if needed (see ?as.mo).
+  # You can also use your own reference data with set_mo_source() or directly, e.g.:
+  #   as.mo("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI"))
+  # mo_name("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI"))
+  # Warning message:
+  #   Problem with `mutate()` column `MIC`.
+  # ℹ `MIC = as.mic(MIC)`.
+  # ℹ in `as.mic()`: 3 results truncated (0%) that were invalid MICs: "?", "13" and "17090355431" 
+  
+  write_rds(x = tTotal, file = 'data/total_2022-09-29.rds', compress = 'gz')
+}
+
+lastLargePatchesStitching = function() manualStitch_2022_09_30()
+
+# > mo_uncertainties()
+# Matching scores (in blue) are based on pathogenicity in humans and the resemblance between the input and the full taxonomic name.
+# See `?mo_matching_score`.
+# 
+# "brelut" -> Brevundimonas lutea (B_BRVND_LUTE, 0.316)
+# Also matched: Brevibacterium luteolum (0.304)
+# 
+# "vvEncspp" -> Brevibacterium ravenspurgense (B_BCTRM_RVNS, 0.284)
+
+# read 1
+if (F) {
+  tPatches = readAllPatches()
+  tStitched = stitchPatches(tPatches)
+  tStitched %>% write_rds(file = 'data/bk_archive_2014-08_2022-06_v2022-09-15.rds',
+                          compress = 'gz')
+
+    x = read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15.rds')
+  x = x %>% relocate(all_of(cns), .after = 'date') %>% 
+    relocate(specimen_group, .after = specimen_type) %>%
+    relocate(MIC, .after = mo)
+  y = x %>% mutate(across(AMB:DAP, as.rsi))
+  z = y %>% mutate(mo = as.mo(mo))
+  mo_failures = c('}AeNeSt', '}hemstre', '}nietfe', '}Primair', 'acgsan', 'acgsch', 'bclmeg', 'bctfae', 'BK_Isolaat_AEF', 'BK_Isolaat_ANF', 'BK_Isolaat_PEF', 'chcgle', 'corjimi', 'enbbug', 'encfcmva', 'Meng', 'psspsy', 'staaurar', 'stainp', 'strsalg', 'surwad', 'vvAeNeSt', 'vvAePoSt', 'vvMRSA')
+  
+  write_rds(z, 'data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds', 
+            compress = 'gz')
+  write_rds(z %>% filter(date >= '2021-07-01'), 'data/bk_archive_2021-07_2022-06_v2022-09-23_3.rds', 
+            compress = 'gz')
+}
+
--- a/ui.R
+++ b/ui.R
@@ -1,4 +1,12 @@

+jsc <- '
+$(document).ready(function () {
+  $("#allInput").parent().parent().parent().parent().css({ "width": "600px" });
+  $("#specialtyInput").css({ "width": "590px" });
+  $("#materialInput").parent().parent().css({ "width": "200px" });
+  $("#materialInput").css({ "width": "190px" });
+});
+'

 ui <- dashboardPage(
  title = "RadaR2",
@@ -7,7 +15,7 @@ ui <- dashboardPage(

  dashboardHeader(
    title = span(img(src = "radar.png", height = 30), strong("RadaR2")),
-    titleWidth = 500,
+    titleWidth = 358,

    tags$li(
      a(
@@ -23,29 +31,62 @@ ui <- dashboardPage(

  # SIDEBAR -----------------------------------------------------------------

-  dashboardSidebar(disable = TRUE,
-                   sidebarMenu(id = "sidebar_id",
-                               menuItem(icon = icon("check-square"),
-                                        materialSwitch(
-                                          inputId = "allInput",
-                                          label = trnslt("All specialties selected"),
-                                          value = TRUE, 
-                                          status = "danger")),
-                     menuItem(icon = icon("user-nurse"), 
-                              checkboxGroupInput(inputId = "specialtyInput", 
-                                                 label = trnslt("Select specialties"),
-                                                 choices = sort(unique(radar_data$specialty_shiny)))),
-                     menuItem(icon = icon("hospital"),
-                              checkboxGroupInput(inputId = "departmentInput", 
-                                                 label = trnslt("Select departments"),
-                                                 choices = sort(unique(radar_data$department))))
-                     )
-                   ),
+  dashboardSidebar(
+    disable = TRUE,
+    width = 400,
+    sidebarMenu(
+      id = "sidebar_id",
+      menuItem(
+        icon = icon("user-nurse"),
+        div(
+          materialSwitch(
+            inputId = "allInput",
+            label = trnslt("All specialties selected"),
+            value = TRUE,
+            status = "danger"
+          ),
+          checkboxGroupInput(
+            # tags$head(tags$style("width: 400px; max-width: 400px;")),
+            inputId = "specialtyInput",
+            label = trnslt("Select specialties"),
+            choices = sort(unique(radar_data$specialty_shiny)),
+          )
+        )
+      ),
+      menuItem(
+        icon = icon("hospital"),
+        div(
+          materialSwitch(
+            inputId = "allDepartments",
+            label = trnslt("All deparmtents selected"),
+            value = TRUE,
+            status = "danger"
+          ),
+          checkboxGroupInput(
+            inputId = "departmentInput",
+            label = trnslt("Select departments"),
+            choices = sort(unique(radar_data$department))
+          )
+        )
+      ),
+      menuItem(
+        icon = icon("flask"),
+        div(
+          checkboxGroupInput(
+            inputId = "materialInput",
+            label = trnslt("Select material"),
+            choices = sort(unique(radar_data$Materiaal))
+          )
+        )
+      )
+    )
+  ),

  
  # BODY --------------------------------------------------------------------
  
  dashboardBody(
+    tags$head(tags$script(jsc)),
    
    # fluidRow(
    #   column = 12,
@@ -59,24 +100,62 @@ ui <- dashboardPage(
      ".nav-tabs {background-color: white;}
      .nav-tabs-custom .nav-tabs li.active:hover a, .nav-tabs-custom .nav-tabs li.active a {background-color: transparent; border-color: transparent;}
      .nav-tabs-custom .nav-tabs li.active {border-top-color: black;}"),
-      div(
-        id = "resistance", 
-        column(
-          width = 6,
-          uiOutput("box2")
-
+      tabBox(
+        id = 'RadaRtabs',
+        width = '100%',
+        # column(width = 6, 'Instructie'),
+        # column(width = 6, 'Disclaimer, blabla blab la bla lbal bla lbal blablal', style="height: 300px;")
+        tabPanel(
+          'Start', 
+          div(
+            h3('Disclaimer'), 
+            paste('blabla bla bla, legal legal legal.',
+                  'Ok, someone needs to write some wise stuff here.',
+                  "Like only use for surveillance, not intended for ",
+                  "individual patients' clinical support, etc. etc. ",
+                  "Yadda yadda yadda"),
+            hr()
+            ),
+          fluidRow(
+            column(2, checkboxInput('cbDisclaimerRead', "I understand"), 
+                   style = 'margin-left: 10px;'), 
+            column(2, actionButton('btnDisclaimerUnderstood', 
+                                 label = 'confirm', 
+                                 icon = icon("legal", class = "opt")))
+          ),
+          fluidRow(
+            hr()
+          ),
+          div(
+            h3('Instruction'), 
+            tags$ul(tags$li('do this'), 
+                    tags$li('do that'), 
+                    tags$li('do such'), 
+                    tags$li('do so'))
+          ),
        ),
-        column(
-          width = 6,
-          uiOutput("box1")
-        ),
-        column(
-          width = 6,
-          uiOutput("box3")
-        ),
-        column(
-          width = 6,
-          uiOutput("box4")
+        tabPanel(
+          'Data', 
+          style = "width: 100%;",
+          div(
+            id = "resistance", 
+            column(
+              width = 6,
+              uiOutput("box2")
+            ),
+            column(
+              width = 6,
+              uiOutput("box1")
+            ),
+            column(
+              width = 6,
+              uiOutput("box3")
+            ),
+            column(
+              width = 6,
+              uiOutput("box4")
+            )
+          )
        )
      )
    )
--- a/updateGlimsData.R
+++ b/updateGlimsData.R
@@ -0,0 +1,82 @@
+require(readr)
+require(lubridate)
+
+cat(paste0('\14\n', now(), '\n'))
+
+oldDir = getwd()
+basePath = '/share/mmbi/radar2'
+setwd(basePath)
+
+source('interfaces.R')
+source('stitchArchive.R')
+source('4D_archive.R')
+
+
+dataPath = 'data'
+archivePath = 'data/archive'
+
+tFiles = read_rds(file.path(dataPath, 'tFilesInfo.rds'))
+tFileOvw = read_rds(file.path(dataPath, 'tFileOvw.rds'))
+
+if (today() >= '2022-09-30') {
+  require(assertthat, quietly = T)
+  
+  # get the last date that is present in the data sofar
+  maxDate = tFiles %>% pull(fileDate) %>% max(na.rm = T)
+  # catch up with today
+  nDaysToUpdate = difftime(today(), maxDate) %>% as.numeric()
+  daysToUpdate = (maxDate + 1:nDaysToUpdate)
+  for (d in 1:nDaysToUpdate) {
+    currDate = daysToUpdate[d]
+    # aDate is converted to Excel style numeric format with origin '1970-01-01'
+    # currDate = as.Date(aDate, origin = lubridate::origin)
+    tFiles = tFiles %>% 
+      filter(!is.na(fileDate)) %>% 
+      addFileInfoOfDay(folders = folders, 
+                       ymdDate = currDate)
+    tInfoDay = tFiles %>% filter(fileDate == currDate)
+    a = assert_that(nrow(tInfoDay) > 0, 
+                    msg = paste0('tInfoDay has 0 rows: no files present for ', 
+                                 currDate, '?'))
+    
+  }
+  
+  tInfoDays = tFiles %>% filter(fileDate %in% daysToUpdate)
+  tFileOvwToday = deriveFileOverview(tInfoDays)
+  a = assert_that(nrow(tFileOvwToday) > 0, 
+                  msg = 'derivederiveFileOverview() failed')
+  
+  tFileOvw = tFileOvw %>% ungroup() %>% 
+    add_row(tFileOvwToday) %>% 
+    filter(!is.na(fileDate))
+  
+  write_rds(tFiles, file.path(dataPath, 'tFilesInfo.rds'))
+  write_rds(tFileOvw, file.path(dataPath, 'tFileOvw.rds'))
+  
+  # write GLIMMS data per date in 'daysToUpdate' and add it to the data sofar
+  for (d in 1:nDaysToUpdate) {
+    currDate = daysToUpdate[d]
+    tGlimmsOfDay = readArchiveOfDay(tFiles, tFileOvw, ymdDate = currDate)
+    a = assert_that(nrow(tGlimmsOfDay) > 0, 
+                    msg = paste0('readArchiveDay() yielded empty result for ', 
+                                 currDate))
+    filename = paste0(dataPath,'/bk_', currDate, '_', currDate, '.rds')
+    write_rds(tGlimmsOfDay, filename, compress = 'gz')
+    
+    filenameTotalSofar = paste0(dataPath, '/total_', currDate - 1, '.rds')
+    filenameTotal      = paste0(dataPath, '/total_', currDate, '.rds')
+    filenameOfDay      = filename
+    
+    tTotal = appendDailyData(fnTotal      = filenameTotal, 
+                             fnTotalSofar = filenameTotalSofar, 
+                             fnDay        = filenameOfDay, 
+                             reqColumns   = abAbbrevs)
+    # nNewRows = tTotal %>% filter(date == today()) %>% nrow()
+    
+    # write_rds(tTotal, file = filenameTotal, compress = 'gz')
+  }
+}
+
+
+
+setwd(oldDir)
--- a/validatie.R
+++ b/validatie.R
@@ -0,0 +1,95 @@
+source('global.R')
+
+if (F)  {
+  
+# CoNS
+  radar_data_first %>% 
+    mutate(f = mo_name(mo)) %>% 
+    filter(f == 'Coagulase-negative Staphylococcus (CoNS)') %>% 
+    pull(MO) %>% 
+    unique() %>% 
+    mo_name()
+  # versus
+  radar_data %>% 
+    mutate(f = mo_name(mo)) %>% 
+    filter(f == 'Coagulase-negative Staphylococcus (CoNS)') %>% 
+    pull(MO) %>% 
+    unique() %>% 
+    mo_name()
+  # conclusie: Becker = TRUE maakt het verschil
+
+  
+#  Fluclox onder SA
+radar_data_first %>% 
+  select(sampleid, patientid, Isolnr, date, MO, mo, FLC) %>% 
+  filter(!is.na(FLC), MO == 'staaur') %>% count(patientid, Isolnr, FLC)
+  # conclusie: 212 samples, 142 patienten
+  
+radar_data_first %>% 
+  select(sampleid, patientid, Isolnr, date, MO, mo) %>% 
+  filter(MO == 'staaur') %>% 
+  count(patientid)
+# 93 samples, 91 patienten
+
+countSamplesPatients = function(moName4d) {
+  nSamples = radar_data_first %>% 
+    select(sampleid, patientid, Isolnr, date, MO, mo) %>% 
+    filter(MO == moName4d) %>% 
+    count(sampleid) %>% 
+    nrow()
+  nPatients = radar_data_first %>% 
+    select(sampleid, patientid, Isolnr, date, MO, mo) %>% 
+    filter(MO == moName4d) %>% 
+    count(patientid) %>% 
+    nrow()
+  return(tribble(~mo4d, ~nSamples, ~nPatients,
+                 moName4d, nSamples, nPatients))
+}
+
+t = countSamplesPatients('staaur') %>%
+  add_row(countSamplesPatients('esccol')) %>%
+  add_row(countSamplesPatients('klepne')) %>%
+  add_row(countSamplesPatients('encfcl')) %>%
+  add_row(countSamplesPatients('enbcco')) %>%
+  add_row(countSamplesPatients('strmgr')) %>%
+  add_row(countSamplesPatients('pseaer')) %>%
+  mutate(mo_name = mo_name(mo4d)) %>%
+  relocate(mo_name, .after = mo4d)
+}
+
+
+if (F) {
+  
+allTrue = function(x) { identical(x, rep(T, length(x))) }
+
+allVal = function(x, val) { identical(x, rep(val, length(x))) }
+
+whereVal = function(x, val) { which(x == val) }
+
+whereAll = function(x, val) {
+  rx = which(x %>% map_lgl(~ allVal(., val))) %>% as.numeric() 
+  ry = which(z %>% 
+               rowwise() %>% 
+               mutate(skdsjui3irskjdf = allVal(c_across(), val)) %>% 
+               pull(skdsjui3irskjdf) 
+  ) %>% as.numeric()
+  return(list(cols = colnames(x)[rx], x = rx, y = ry))
+}
+
+whereAny = function(x, val) {
+  rx = which(x %>% map_lgl(~any(. == val))) %>% as.numeric()
+  ry = which(x %>%
+               rowwise() %>%
+               mutate(skdsjui3irskjdf = any(c_across() == val)) %>% 
+               pull(skdsjui3irskjdf)) %>% as.numeric()
+  return(list(cols = colnames(x)[rx], x = rx, y = ry))
+}
+
+whereR = radar_data %>% 
+  filter(MO == 'mrsa') %>% 
+  select(is.rsi) %>% 
+  mutate(across(everything(), as.character)) %>% 
+  whereAny('R')
+
+
+}