Commit validated bk, 118922 Feb 7

2023-02-07 16:49:16 +01:00
parent 2271c24b19
commit 32ca21ab60
21 changed files with 1946 additions and 77 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1 @@
-.Rproj.user
+copyFromNetwork.sh
 .Rhistory
 .RData
 .Ruserdata
 *.Rproj
 .DS_Store
 bk_2019.rds
--- a/anonymize.R
+++ b/anonymize.R
@@ -0,0 +1,56 @@
 require(tidyverse, quietly = T)
 require(lubridate, quietly = T)
 require(readr)
 require(uuid, quietly = T)
 pinWidth = 7
 intTopin = function(x) {
  return(formatC(x, width = 7, format = "d", flag = "0"))
 }
 # tPinMapping = tibble(origpin = intTopin(0), 
 #                     uupin = '78c3fe82-9343-4aa8-8b06-afc6da79a39a')
 source('config.R', echo = F)
 tPinMapping = read_rds(pinMappingDictFilename)
 getUupin = function(pin) {
  if (length(pin) == 1) {
    if (class(pin) == 'character') {
      if (pin %in% tPinMapping$origpin) {
        return((tPinMapping %>% filter(origpin == pin) %>% pull(uupin)))
      } else {
        if (str_length(pin) == pinWidth) {
          newUupin = UUIDgenerate()
          tPinMapping <<- tPinMapping %>% 
            add_row(origpin = pin, uupin = newUupin)
          tPinMapping %>% write_rds('data/pinMapping.rds', compress = 'gz')
          return(newUupin)
        } else  {
          stop(paste0('pin width <> ', pinWidth))
        }
      }
    } else {
      stop(paste0('class of pin should be character, but is ', class(pin)))
    }
  } else {
    stop('Please supply exactly 1 pin')
  }
 }
 getOrigpin = function(uuId) {
  tPinMapping %>% filter(uupin == uuId) %>% pull(origpin)
 }
 anonymizeRds = function(x, ymdStartDate, ymdEndDate, suffix) {
  y = x %>%
    rowwise() %>%
    mutate(patientid = getUupin(patientid)) %>%
    ungroup() %>%
    mutate(birth_date = ymd('1970-01-01'))
  filename = paste0('data/bk_', ymdStartDate, '_', ymdEndDate, '_', suffix, '.rds')
  if (!file.exists(filename)) {
    write_rds(y, file = filename)
  }
 }
--- a/config.R
+++ b/config.R
@@ -0,0 +1,6 @@
 pinMappingDictFilename = '/share/mmbi/radar2/data/pinMapping.rds'
 # anonymized data for deployment
 radarDataFilename = 'data/radarData.rds' # '/share/mmbi/radar2/interfaces/4D_final_results/data/radarData.rds'
--- a/copyGlimsData.R
+++ b/copyGlimsData.R
@@ -0,0 +1,53 @@
 require(lubridate)
 require(tidyverse)
 fromBasePath = '/share/transport_4d'
 toBasePath = '/share/mmbi/radar2/data/archive'
 currDay = today()
 getTraverseDirectory = function(aDate) {
  dirPattern = format(aDate, format = '%Y-%m')
  dirsGlimsExport = list.dirs(path = fromBasePath, full.names = F)
  return(dirsGlimsExport[which(str_detect(dirsGlimsExport, dirPattern))])
 }
 getFilesToCopy = function(aDate, aDir) {
  filesGlimsExport = list.files(aDir)
  filePattern = format(aDate, format = '%y%m%d')
  return(filesGlimsExport[which(str_detect(filesGlimsExport, filePattern))])
 }
 copyFiles = function(aDate, overwrite = F, verbose = T) {
  traverseDir = getTraverseDirectory(aDate)
  fromDir = file.path(fromBasePath, traverseDir)
  toDir   = file.path(toBasePath, traverseDir)
  filesToCopy = getFilesToCopy(aDate, fromDir)
  if (verbose)
    cat(paste0('\nCopying from ', fromDir, ' => ', toDir, ' ..\n'))
  for (f in filesToCopy) {
    copySuccess = file.copy(from = file.path(fromDir, f), 
                            to = file.path(toDir, f),
                            overwrite = overwrite,
                            copy.mode = T, 
                            copy.date = T)
    cat(paste0(f, '\11', copySuccess, '\n'))
  }
 }
 copyFilesDateRange = function(nDays) {
  ds = today() - nDays
  d8s = ds + 1:nDays
  for (d in 1:length(d8s)) {
    d8 = d8s[d]
    cat(paste0('copyFiles(', d8, ')\n'))
    copyFiles(d8, verbose = F)
  }
 }
 copyFilesDateRange(nDays = 3)
--- a/createPatches.R
+++ b/createPatches.R
@@ -0,0 +1,22 @@
 require(tidyverse)
 require(lubridate)
 source('globalData.R')
 source('interfaces.R') # readGlimsArchive()
 createPatches = function(tPeriods, dataPath, tFiles, tFileOvw) {
  for (m in 1:nrow(tPeriods)) {
    patch = readGlimsArchive(
      ymdStartDate = tPeriods$startDate[m],
      ymdEndDate = tPeriods$endDate[m],
      readPreprocessed = F,
      tFiles = tFiles,
      tFileOvw = tFileOvw
    )
    filename = paste0(dataPath,'/bk_',
                      tPeriods$startDate[m], '_', 
                      tPeriods$endDate[m], '.rds')
    write_rds(patch, filename, compress = 'gz')
  }
 }
--- a/executeRead.R
+++ b/executeRead.R
@@ -0,0 +1,68 @@
 source('interfaces.R')
 source('createPatches.R')
 source('stitchArchive.R')
 dataPath = 'data'
 inspectRead = 10
 inspectReason = 'test selection of max IsolNr (only). mo and RIS can be NA'
 reads = read_rds(file.path(dataPath, 'reads.rds'))
 reads = reads %>% add_row(read = inspectRead, desription = inspectReason)
 write_rds(reads, file.path(dataPath, 'reads.rds'))
 tFiles = read_rds(file.path(dataPath, 'tFilesInfo.rds'))
 tFileOvw = read_rds(file.path(dataPath, 'tFileOvw.rds'))
 updateFileOvw = F
 if (updateFileOvw) {
  # on 2022-10-20 tFiles was up-to-date, due to the daily update.
  tFiles = read_rds(file.path(dataPath, 'tFilesInfo.rds'))
  # However, tFileOvw somehow wasn't
  # get the last date that is present in the data sofar
  maxDate = tFileOvw %>% pull(fileDate) %>% max(na.rm = T)
  # catch up with today
  nDaysToUpdate = difftime(today(), maxDate) %>% as.numeric()
  daysToUpdate = (maxDate + 1:nDaysToUpdate)
  tInfoDays = tFiles %>% filter(fileDate %in% daysToUpdate)
  tFileOvwToday = deriveFileOverview(tInfoDays)
  a = assert_that(nrow(tFileOvwToday) > 0, 
                  msg = 'derivederiveFileOverview() failed')
  tFileOvw = tFileOvw %>% ungroup() %>% 
    add_row(tFileOvwToday) %>% 
    filter(!is.na(fileDate))
  # write_rds(tFiles, file.path(dataPath, 'tFilesInfo.rds')) Was up-to-date on 2022-10-20
  write_rds(tFileOvw, file.path(dataPath, 'tFileOvw.rds'))
 }
 # define the patch periods
 tPeriods = tribble(
  ~startDate,        ~endDate,
  ymd('2022-01-01'), ymd('2022-02-01') - 1,
  ymd('2022-02-01'), ymd('2022-03-01') - 1
  # ymd('2022-03-01'), ymd('2022-04-01') - 1,
  # ymd('2022-04-01'), ymd('2022-05-01') - 1,
  # ymd('2022-05-01'), ymd('2022-06-01') - 1,
  # ymd('2022-06-01'), ymd('2022-07-01') - 1,
  # ymd('2022-07-01'), ymd('2022-08-01') - 1,
  # ymd('2022-08-01'), ymd('2022-09-01') - 1,
  # ymd('2022-09-01'), ymd('2022-10-01') - 1,
  # ymd('2022-10-01'), ymd('2022-10-20') - 1
  # ymd('2022-09-01'), ymd('2022-10-20')
 )
 createPatches(tPeriods, dataPath, tFiles, tFileOvw)
 # stitch together patches created by createPatches.R
 # readAllPatches() assumes that all bk_*.rds files are in the 'data' path and 
 # that they all need to be read for stitching
 tPatches = readAllPatches()# %>% filter(from >= '2022-01-01', to <= (ymd('2022-05-01') - 1))
 minFrom = min(tPatches$from, na.rm = T)
 maxTo   = max(tPatches$to, na.rm = T)
 cat(paste0('Stitching patches: ', minFrom, ' - ', maxTo, '\n'))
 tStitched = stitchPatches(tPatches, dataPath = 'data', reqColumns = abAbbrevs)
 tStitched %>% write_rds(file = paste0('data/read_', inspectRead, '_', minFrom, '_', 
                                      maxTo, '_x', today(), '.rds'),
                        compress = 'gz')
--- a/global.R
+++ b/global.R
@@ -18,7 +18,7 @@ library(cleaner)
 library(scales)
 library(ggiraph)
-specialties <- read_csv("specialties.csv", lazy = FALSE)
+source('interfaces.R')
 trsns <- read_tsv("translations.txt", lazy = FALSE)
 # to do: make system settings dependent
@@ -30,6 +30,25 @@ trnslt <- function(english, new_lang = "nl", file = trsns) {
  }
 }
 pathogens <- read_csv("data/grouping microorganisms.csv")
 # created by calling createAbMapping()
 tAntibiotics = readxl::read_xlsx('data/ab_mapping.xlsx') %>% 
  select(Mnemonic, Naam, `EARS-Net.Mnemonic`, `EARS-Net.Name`)
 # radar_data <- readGlimsArchive(ymdStartDate = ymdStartDate,
 #                                ymdEndDate = ymdEndDate, 
 #                                readPreprocessed = T)
 radar_data <- readGlimsFinalResults()
 if (!'CRO' %in% colnames(radar_data))
  radar_data <- radar_data %>% add_column('CRO' = '')
 if (!'OXA' %in% colnames(radar_data))
  radar_data <- radar_data %>% add_column('OXA' = '')
 specialties <- read_csv("data/specialties.csv", lazy = FALSE)
 specialties <- specialties %>%
  mutate(specialty_shiny = if_else(specialty != "Outpatients", paste0(specialty, " (", str_replace_all(specialism, ";", ", "), ")"), "Outpatients"),
         department = if_else(is.na(department), "NO DEPARTMENT GIVEN", department))
@@ -38,9 +57,6 @@ specialties <- separate_rows(specialties, specialism, sep = ";")
 specialties <- separate_rows(specialties, department, sep = ";")
 pathogens <- read_csv("grouping microorganisms.csv")
 radar_data <- readRDS("bk_2019.rds")
 radar_data <- radar_data %>% 
  left_join(specialties) %>% 
@@ -50,10 +66,10 @@ radar_data <- radar_data %>%
         FLC = OXA) %>% 
  left_join(pathogens) 
-# lab_ab <- c("CFCC", "CFCL", "CFOC", "CFSC", "CFTC", "COCL", "GEHI", "QUDA")
+lab_ab <- c("CFCC", "CFCL", "CFOC", "CFSC", "CFTC", "COCL", "GEHI", "QUDA")
 radar_data <- radar_data %>%
-  # select_at(vars(!contains(lab_ab))) %>% 
+  select_at(vars(!contains(lab_ab))) %>%
  rename_if(is.rsi, function(x) ifelse(is.na(suppressWarnings(as.ab(x))),
                                       x,
                                       suppressWarnings(as.ab(x)))) %>% 
--- a/globalData.R
+++ b/globalData.R
@@ -0,0 +1,12 @@
 abAbbrevs = c('AMB', 'AMC', 'AMK', 'AMP', 'AMX', 'ANI', 'ATM', 'AZA', 'CAS', 'CAZ', 'CCV', 
              'CHL', 'CIP', 'CIX', 'CLF', 'CLI', 'COL', 'CPC', 'CRO', 'CTC', 'CTX', 'CXM', 
              'CZA', 'CZO', 'CZT', 'DAP', 'DOX', 'ERY', 'FCT', 'FEP', 'FLC', 'FLU', 
              'FOS', 'FOX', 'FOX1', 'FUS', 'GEN', 'GRI', 'IPM', 'ITR', 'KAN', 'LNZ', 
              'LVX', 'MCZ', 'MEM', 'MFX', 'MIF', 'MNO', 'MTR', 'MUP', 'NAL', 'NIT', 
              'NOR', 'PEN', 'PIP', 'PLB', 'POS', 'QDA', 'RIF', 'SAM', 'STR1', 
              'SXT', 'TEC', 'TEM', 'TGC', 'TMP', 'TOB', 'TRB', 'TZP', 'VAN', 'VOR')
 patchFirstColumns = c('sampleid', 'date', 'is_clinical', 'is_icu', 'is_outward', 
                      'IsolNr', 'Materiaal', 'specimen_type', 'specimen_group', 
                      'patientid', 'birth_date', 'department', 'type_dept', 
                      'specialism', 'age', 'gender', 'MIC', 'RAP', 'mo')
--- a/interface_4D.R
+++ b/interface_4D.R
@@ -0,0 +1,206 @@
 # initially only interfacing 4D data
 require(assertthat, quietly = T)
 source('4D_archive.R')
 source('anonymize.R')
 source('globalData.R')
 readGlimsArchive = function(ymdStartDate, ymdEndDate, 
                            readPreprocessed = T, 
                            tFiles = NULL, tFileOvw = NULL) {
  if (readPreprocessed) {
    # return(read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds'))
    # return(read_rds('data/total_2022-09-30.rds'))
    return(read_rds('data/bk_2019.rds'))
    # return(read_rds('data/bk_2022.09.01_2022.10.13.rds'))
    # return(read_rds('data/archive_2022-01-01_2022-10-19_v2022-10-20_1530.rds')) # read_2_2022-10-20_1530
    # return(read_rds('data/read_10_2022-01-01_2022-10-25_x2022-10-25.rds'))
  } else {
    assert_that(!is.null(tFiles), 
                msg = 'readGlimsArchive(.., readPreprocessed = F, ..)')
    tArchive = readArchive(
      tFileInfo = tFiles %>% 
        filter(
          fileDate >= ymdStartDate,
          fileDate <= ymdEndDate
        ),
      tFileOvw = tFileOvw,
      createDebugList = F # save memory 
    )
    bk = NULL
    if (!is.null(tArchive)) {
      radarData <- tArchive %>%
        # work towards RadaR nomenclature
        rename(sampleid = Monsternummer,
               patientid = PIN.Lot,
               department = ligafdeling,
               specialism = B,
               mo = MO) %>% 
        # some processing towards RadaR format
        mutate(
          gender = Geslacht,
          # case_when(Geslacht == 'M' ~ 'Male',
          #                    Geslacht == 'V' ~ 'Female') %>% as.factor(),
          type_dept = specialism,
          birth_date = dmy(Geboortedatum),
          age = trunc((birth_date %--% date) / years(1)),
          Materiaal = replace_na(Materiaal, ''),
          Materiaal = str_trim(Materiaal),
          # Rapport = replace_na(Rapport, ''),
          # Rapport = str_trim(Rapport),
          specimen_group = 'bk',
          specimen_type = '',
          is_icu = is_icu(department),
          is_clinical = is_clinical(department),
          is_outward = is_outward(department)
        ) %>%
        # get AMR package (WHONET, EARS-Net std) mnemonics and names
        left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>%
        rename(ab.amr = `EARS-Net.Mnemonic`) %>%
        # get the fields used in RadaR only
        select(
          sampleid, date, IsolNr, Materiaal,
          specimen_group, specimen_type, 
          department, type_dept, specialism,
          is_icu, is_clinical, is_outward,
          birth_date, age, gender, patientid, mo, RIS, ab.amr, MIC, RAP
        ) 
      # return(radarData)
      # these sub steps are for debugging purposes
      z <- radarData %>% 
        mutate(IsolNr = as.numeric(IsolNr)) %>% 
        arrange(sampleid, department) %>%
        group_by(sampleid) %>% 
        # fill repeating data
        fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
        filter(#!is.na(mo), 
               #!is.na(RIS), 
               IsolNr == max(IsolNr, na.rm = T)) %>% 
        ungroup() %>%
        distinct(sampleid, date, IsolNr, mo, ab.amr, .keep_all = T)
        # # get distinctive rows, arrange not strictly necessary
        # # arrange(sampleid, date, department, type_dept, specialism, 
        # #         birth_date, age, gender, patientid, mo, ab.amr, RIS) %>% 
        # # commented 2022-10-14 by GB in order to get the slicing done correctly
        # # group_by(date, sampleid, IsolNr, department, type_dept, specialism, 
        # #           birth_date, age, gender, patientid, mo, ab.amr) %>%
        # group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
        # slice(n()) %>% 
        # ungroup() %>%
        # distinct(date, sampleid, IsolNr, department, type_dept, specialism, 
        #          birth_date, age, gender, patientid, mo, ab.amr, .keep_all = T)  %>%
        # # filter only reported rows, with a micro-organism and RIS value
        # filter(
        #   !is.na(RAP)
        #   # !is.na(mo),
        #   # !is.na(RIS)
        # )
      return(z)
      # pivot to wider format
      bk <- z %>% 
        filter(!is.na(RIS)) %>%
        mutate(
          RIS = as.rsi(RIS),
          MIC = as.mic(MIC),
          mo = as.mo(mo)) %>%
        pivot_wider(names_from = ab.amr, values_from = RIS) %>%
        select(order(colnames(.))) %>%
        relocate(all_of(patchFirstColumns), .before = 1) 
      # bk = bk %>%
      #   mutate(
      #     across( 
      #       (which(colnames(.) == 'mo') + 1) : length(colnames(.)), 
      #       as.rsi) 
      #   )
    }
    return(bk)
  }
 }
 readArchiveOfDay = function(tFiles, tFileOvw, ymdDate) {
  readGlimsArchive(ymdStartDate = ymdDate,
                   ymdEndDate   = ymdDate, 
                   readPreprocessed = F, 
                   tFiles = tFiles, 
                   tFileOvw = tFileOvw)
 }
 readArchiveToday = function(tFiles, tFileOvw) {
  readGlimsArchive(ymdStartDate = today(), 
                   ymdEndDate   = today(), 
                   readPreprocessed = F, 
                   tFiles = tFiles, 
                   tFileOvw = tFileOvw)
 }
 # anonymize the dataset
 if (F) {
  nopd = readArchiveToday()
  # nopd = nopd %>%
  # filter(str_length(patientid) == 7) %>%
  # rowwise() %>%
  # mutate(patientid = getUupin(patientid)) %>%
  # ungroup() %>%
  # mutate(birth_date = ymd('1970-01-01'))
  filename = paste0('data/bk_', ymdStartDate, '_', ymdEndDate, '.rds')
  if (!file.exists(filename)) {
    write_rds(nopd, file = filename)
  }
 }
 # stich together processed parts of the archive
 if (F) {
  tibble(name = list.files(path = 'data/', pattern = 'bk_.*_.*\\.rds'))
 }
 # testing the slicing
 if (F) {
  ymdStartDate = ymd('2022-09-01')
  # find interesting cases
  radarData %>%
    arrange(sampleid, department) %>% 
    group_by(sampleid) %>% 
    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% 
    ungroup() %>% 
    group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
    # slice(n()) %>% 
    ungroup() %>%
    select(1:3, 13:ncol(.)) %>% 
    filter(ab.amr %in% c('CAZ', 'CTX')) %>% filter(!is.na(mo)) %>% count(date, sampleid, IsolNr, mo, ab.amr) %>% filter(n>1)
  # show without slicing last row
  radarData %>%
    arrange(sampleid, department) %>% 
    group_by(sampleid) %>% 
    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% 
    ungroup() %>% 
    group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
    # slice(n()) %>% 
    select(1:3, 13:ncol(.)) %>% 
    filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))
  # show with slicing last row
  radarData %>%
    arrange(sampleid, department) %>% 
    group_by(sampleid) %>% 
    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% 
    ungroup() %>% 
    group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
    slice(n()) %>% 
    select(1:3, 13:ncol(.)) %>% 
    filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))
 }
--- a/interface_4D_final_results.R
+++ b/interface_4D_final_results.R
@@ -0,0 +1,307 @@
 suppressMessages({
  require(tidyverse)
  require(lubridate)
  require(vroom)
  require(writexl)
  require(AMR)
 })
 minMemoryAsap = F
 # Rapport ab: 
 #   altijd wel hebben, 
 # # = niet gerapporteerd
 # 
 # Rap st: 
 #   # = niet gerapporteerd, niet opnemen als niet gerapporteerd
 # niet gerapporteerd kan fout zijn
 dataPath = 'interfaces/4D_final_results/data'
 # Antibiotica data
 fnAB = 'AB-2021-22.csv'
 # Microorganism data
 fnST = 'ST-Alles.csv'
 # Monster data
 fnMS = 'MSv2-2022-tm10.csv'
 # Isolaattesten data
 fnTS = 'TS-Alles-221130-092256.csv'
 # Tests data
 fnTM = 'TM-2022-tm10.csv'
 source('globalData.R')
 source('interfaces.R')
 # temporarily put here for dev
 tAntibiotics = readxl::read_xlsx('data/ab_mapping.xlsx') %>% 
  select(Mnemonic, Naam, `EARS-Net.Mnemonic`, `EARS-Net.Name`)
 tAB = vroom(file = file.path(dataPath, fnAB), show_col_types = F)
 tST = vroom(file = file.path(dataPath, fnST), show_col_types = F)
 tMS = vroom(file = file.path(dataPath, fnMS), skip = 5, show_col_types = F)
 tTS = vroom(file = file.path(dataPath, fnTS), show_col_types = F)
 tTM = vroom(file = file.path(dataPath, fnTM), show_col_types = F)
 tAB = tAB %>%
  filter(Monsternummer %>% str_sub(1, 2) == '22') %>%
  mutate(Monsternummer = as.character(Monsternummer))
 # gerapporteerde isolaten
 tST = tST %>%
  filter(
    Monsternummer %>% str_sub(1, 2) == '22',
    # '#' = niet gerapporteerd, deze niet includeren
    # in ST betekent dit: alleen NA wel includeren
    is.na(Rap) # = gerapporteert
  ) %>%
  select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>%
  mutate(Monsternummer = as.character(Monsternummer))
 tMS = tMS %>%
  filter(
    Monsternr %>% str_sub(1, 2) == '22',
    ObjType != 'L'
    # Materiaal == 'MP_BLOED' # 118922, 2023-02-07
  ) %>%
  rename(Monsternummer = Monsternr,
         patientid = `PIN-lot`) %>%
  select(-`Materiaal Variabelen`, ObjType, Lot, Kamer) %>%
  mutate(date = dmy_hms(paste(OntvOLabDt, as.character(Tijd)))) %>%
  select(-OntvOLabDt, -Tijd) %>%
  mutate(Monsternummer = as.character(Monsternummer))
 tTS = tTS %>%
  mutate(Monsternummer = as.character(Monsternummer))
 # MS monsters die geen isolaat hebben zijn negatief
 # vandaar nu de right_join
 # Vervolgens moeten uit de MS file alleen de kweekbepalingen worden geincludeerd
 x = tMS %>% 
  mutate(Monsternummer = str_trim(Monsternummer)) %>%
  left_join(tST, by = c('Monsternummer')) %>%
  left_join(tAB, by = c('Monsternummer', 'Isolnr' = 'IsolNr')) %>%
  left_join(tTS, 
            by = c('Monsternummer', 'Isolnr' = 'Isolaatnr'),
            suffix = c('.MSxSTxAB', '.TS_IsolaatTest'))
 if (minMemoryAsap) {
  # save memory
  rm(tAB, tST, tMS)
 }
 x = x %>%
  # select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>%
  mutate(year = str_sub(Monsternummer, 1, 2) %>% as.numeric(),
         weeknr = str_sub(Monsternummer, 3, 4) %>% as.numeric()) %>%
  # smaller subset in order to keep the first explorations workable
  filter(year == 22) 
         # weeknr >= 41) 
 s = x %>% 
  # filter(Materiaal == 'MP_BLOED') %>% # 118922, 2023-02-07
  # fields expected by RadaR:
  #
  # sampleid <chr>, date <date>, specimen_group <chr>, specimen_type <chr>, 
  # department <chr>, type_dept <fct>, specialism <chr>, 
  # is_icu <lgl>, is_clinical <lgl>, is_outward <lgl>, 
  # age <int>, gender <fct>, patientid <chr>, mo <mo>, 
  # 
  # align column names
  rename(
    sampleid = Monsternummer,
    # mo = MO, # later by as.mo(MO)
    rap.st = Rap,
    rap.ab = Rapport.MSxSTxAB,
    department = LigAfd,
    specialism = Spec,
    age = Leeftijd,
    gender = Geslacht
  ) %>%
  # add yet missing fields
  add_column(
    specimen_group = 'bk',
    specimen_type = '',
    type_dept = '' # needed?
  ) %>%
  mutate(
    is_icu = is_icu(department), 
    is_clinical = is_clinical(department),
    is_outward = is_outward(department),
  ) %>%
  relocate(rap.ab, .after = rap.st) %>%
  relocate(c(year, weeknr, date), .after = sampleid) %>%
  # get AMR package (WHONET, EARS-Net std) mnemonics and names
  left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>%
  rename(
    AB_WHO = `EARS-Net.Mnemonic`
  ) %>%
  select(-AB)
 if (minMemoryAsap) {
  # save memory
  rm(x)
 }
 # TODO: find diff in multiple rows per sampleid that yield <list> io <rsi>
 if (F) {
  s %>% filter(sampleid == '22010009431') %>% 
    select(sampleid, AB_WHO, RIS, MIC, MO) %>% 
    mutate(
      RIS = as.rsi(RIS),
      MIC = as.mic(MIC),
      mo = as.mo(MO)) %>% 
    pivot_wider(names_from = AB_WHO, values_from = RIS)
  # diff: rap.ab, Etst, Naam, AB_WHO, `EARS-Net.Name`
 }
 x = s %>%
  select(-`EARS-Net.Name`, -Naam,
         # door de volgende waarden ontstaan dubbelingen in radarData, daarom
         # tijdelijk wegfilteren. maar gaat later wel nodig zijn
         -Rapport.TS_IsolaatTest, -Waarde.TS_IsolaatTest, -isolaattest,
         -MIC, -Etst, -Diff, -rap.ab) %>%
  # RIS values of 'V' are a pain in the neck
  mutate(RIS = replace(RIS, RIS == 'V', NA)) %>%
  group_by(sampleid, Isolnr, MO, AB_WHO) %>% 
  # mark rows to be deleted
  mutate(deleteRow = n() > 1 & is.na(RIS) & !is.na(MO)) %>% 
  ungroup() %>%
  # delete the rows that are marked with deleteRow
  filter(!deleteRow) %>% 
  # remove column deleteRow
  select(-deleteRow) %>%
  # all multiple values should be absent now, so get distinct rows
  distinct(sampleid, Isolnr, AB_WHO, RIS, .keep_all = T)
 y = x %>% 
  pivot_wider(
    id_cols = c(sampleid, Isolnr, MO),
    names_from = AB_WHO, 
    values_from = RIS
  )
 radarData = x %>%
  # filter(!is.na(AB_WHO)) %>%
  mutate(
    RIS = as.rsi(RIS),
    # MIC = as.mic(MIC),
    mo = as.mo(MO)) %>%
  pivot_wider(names_from = AB_WHO, values_from = RIS) %>%
  # pivot_wider(names_from = Rapport.TS_IsolaatTest,
  #             values_from = Waarde.TS_IsolaatTest)
  select(order(colnames(.))) %>%
  relocate(
    all_of(
      c(
        'sampleid', 'Isolnr', 'patientid', 'year', 'weeknr', 'date', 'MMBProc',
        'department', 'type_dept', 'specialism', 'is_clinical', 'is_icu', 'is_outward', 
        'specimen_type', 'specimen_group', 
        'rap.st', 
        # 'rap.ab', 
        'age', 'gender', 
        # 'Diff', 'Etst', 
        'Waarde.MSxSTxAB', 
        # 'Waarde.TS_IsolaatTest', 
        # 'MIC', 
        'MO', 'mo', 'Pos'
      )
    ), 
    .before = 1
  ) %>% 
  select(-`NA`)
 if (minMemoryAsap) {
  # save memory
  rm(s)
 }
 radarData = radarData %>%
  filter(date >= '2022-01-01', date <= '2022-09-30')
 if (F) {
  # TODO je verwacht meer Neg dan Pos 
  # DONE hiervoor tMS left_join tST ipv andersom
  radarData %>% filter(!is.na(Pos), MO == 'mrsa') %>% count(patientid)
  radarData %>% filter(!is.na(Pos), MO == 'esccol') %>% count(patientid)
  radarData %>% filter(!is.na(Pos), MO == 'pseaer') %>% count(patientid)
  radarData %>% filter(!is.na(Pos), MO == 'klepne') %>% count(patientid)
  radarData %>% filter(!is.na(Pos), MO == 'canalb') %>% count(patientid)
  radarData %>% filter(!is.na(Pos), MO == 'enbcco') %>% count(patientid)
  radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'enbcco') %>% count(patientid)
  radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'esccol') %>% count(patientid)
  radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'klepne') %>% count(patientid)
  radarData %>% filter(MO == 'klepne', GEN == 'R') %>% count(patientid)
  radarData %>% filter(MO == 'klepne', CIP == 'R') %>% count(patientid)
  radarData %>% filter(MO == 'klepne', MEM == 'R') %>% count(patientid)
  radarData %>% filter(MO == 'staaur', CLI == 'R') %>% count(patientid)
  radarData %>% filter(is.na(MO), is.na(rap.st))
  # rijen voor pat 8000005 in MS file: 36
  tMS %>% filter(patientid == '8000005') %>% count(Monsternummer)
  # rijen voor pat 8000005 in ST file: 23
  tST %>% filter(Monsternummer %in% (tMS %>% filter(patientid == '8000005') %>% pull(Monsternummer) %>% unique())) %>% group_by(Monsternummer) %>% mutate(n = n()) %>% print(n=25)
  # 22 * 36 = 792
  x %>% filter(sampleid == '22070019631') %>% select(-year, -weeknr, -date, - patientid, -age, -ObjType, -Lot, -gender, -PC, -department, -Type, -Kamer, -Isolnr, MO, -Waarde.MSxSTxAB, -Pos, -rap.st, -MIC, -Diff, -isolaattest, -Waarde.TS_IsolaatTest, -Rapport.TS_IsolaatTest, -Afname, -is_icu, -is_clinical, -is_outward) %>% print(n=38)
  x %>% filter(sampleid == '22070019631', Isolnr == 3) %>%
    # door deze waarden ontstaan dubbelingen in radarData, daarom
    # tijdelijk wegfilteren. maar gaat later wel nodig zijn
    # select(-MIC, -Etst, -Diff, -rap.ab) %>% 
    pivot_wider(names_from = AB_WHO, values_from = RIS) %>% view()
    # pivot_wider(names_from = Rapport.TS_IsolaatTest, 
    #             values_from = Waarde.TS_IsolaatTest) %>% view()
  listMultiValCols = function(x) {
    z = x
    colns = colnames(z)
    collens = c()
    for (c in 1:length(colns)) { 
      collens = c(collens, 
                  z %>% 
                    group_by(sampleid, Isolnr) %>% 
                    pull(colns[c]) %>% unique() %>% length()) 
    }
    multValCols = colns[which(collens != 1)]
    return(z %>% select(sampleid, patientid, multValCols, RIS))
  }
  # dit AB levert nog steeds een List
  x %>% filter(AB_WHO == 'CAZ') %>% count(sampleid, Isolnr, AB_WHO) %>% filter(n>1)
  x %>% 
    filter(
      sampleid == '22070547731' %>% str_trim(), 
      AB_WHO == 'CAZ',
      RIS != 'V'
    ) %>% 
    select(sampleid, Isolnr, AB_WHO, RIS)
  key = c('sampleid', 'Isolnr')
  y %>% filter(!is.na(FOX))%>% select(key, FOX)
    x %>% 
      select(sampleid, Isolnr, AB_WHO, RIS) %>% 
      pivot_wider(id_cols = c(sampleid, Isolnr), 
                  names_from = AB_WHO, 
                  values_from = RIS) %>% 
      select(1:5)
  selectRelCols = function(x, s = c()) {
    x %>% select(sampleid, Isolnr, MO, AB_WHO, RIS, all_of(s))
  }
 }
 # vroom_write(x = x, 
 #             file = file.path(getwd(), dataPath, 'ST x AB_2022.csv'), 
 #             delim = ';')
--- a/interfaces.R
+++ b/interfaces.R
@@ -0,0 +1,66 @@
 suppressMessages({
  require(tidyverse)
  require(AMR)
 })
 source('config.R')
 source('anonymize.R')
 # moved from 4D_archive.R
 createAbMapping = function() {
  options(AMR_locale = 'nl')
  abOpm = readxl::read_xlsx('data/ab_glims_2022-09-02.xlsx', 
                            sheet = 1)
  ab2 = readxl::read_xlsx('ab_glims_2022-09-02.xlsx', 
                          sheet = 'codes uit Glims 2022-09-02')
  # first try to retrieve the AB from Naam
  ab2 = ab2 %>%
    select(Mnemonic, Naam, Toelichting) %>%
    mutate(Naam = str_replace(Naam, 'clavulaanzuur', 'clavulanic acid'),
           `EARS-Net.Mnemonic` = as.ab(Naam),
           `EARS-Net.Name` = ab_name(Naam)) %>%
    left_join(abOpm %>% select(name4d, Opm.Greetje), by = c('Mnemonic' = 'name4d'))
  # secondly, try to retrieve the AB from Mnemonic
  ab2 = ab2 %>%
    mutate(`EARS-Net.Mnemonic` = replace(`EARS-Net.Mnemonic`, 
                                         is.na(`EARS-Net.Mnemonic`), 
                                         as.ab(Mnemonic)),
           `EARS-Net.Name` = replace(`EARS-Net.Name`, 
                                     is.na(`EARS-Net.Name`), ab_name(Mnemonic)))
 }
 is_icu = function(departmentName) {
  if (!is_empty(departmentName)) {
    departmentName %in% 
      c('KINN', 'KINZ', 'KICW', 'KICN', 'KINW', # NICU IC
        'KICG', 'KICR', 'KICB', 'KICK',         # Kinder IC
        'THIC', 'ICV2',                         # old and new name
        'NCIC', 'ICV1',                         # old and new name
        'E1IC', 'ICV3',                         # old and new name
        'CICA', 'CICB', 'CICD', 'CHIC', 'ICV4' # old and new name
      )
  } else {
    F
  }
 }
 is_clinical = function(departmentName) {
  is_icu(departmentName) |
    str_ends(departmentName, 'VA') |
    departmentName %in% 
    c('L1SC', 'CIVZ', 'NEUC', 'L1KV', 'OZOK', 'BOA0', 'C1CC', 'BOB0')
 } 
 is_outward = function(departmentName) {
  str_ends(departmentName, 'P') |
    str_detect(departmentName, 'DB')
 }
 readGlimsFinalResults = function() {
  cat(paste0('Reading ', radarDataFilename, '\n'))
  return(read_rds(radarDataFilename))
 }
--- a/interfaces/4D/src/4D_archive.R
+++ b/interfaces/4D/src/4D_archive.R
@@ -0,0 +1,299 @@
 require(tidyverse)
 if (!require(lubridate)) {
  install.packages('lubridate')
  require(lubridate)
 }
 require(crayon)
 require(stringr)
 # TODO: check wat ProcessAndMergeDailyFiles ingaat: Monsternummer is.character?
 # TODO: min # rows = bijv. 10 i.v.m. privacy
 # TODO: 2 versies: MMBI & 'Algemeen'
 memSize = function() {
  result = tibble(object='', size=0, sizeMB=0)
  objs = ls(envir = globalenv())
  for (i in 1:length(objs) ) { 
    objName = objs[i]
    sizeBytes = as.numeric(object.size(get(objName)))
    result = result %>%
      add_row(object = objName, size = sizeBytes, sizeMB = sizeBytes / 1024^2)
  }
  result = result %>%
    arrange(desc(size))
  return(list(result, sum(result$sizeMB)))
 }
 gatherAbIds = function() {
  archFolders = list.dirs(file.path(dataFolder, 'archive'), full.names = F)
  archFolders = archFolders[str_length(archFolders) == 26]
  for (archFolder in archFolders) {
    folder = archFolder
    abFiles = list.files(file.path(dataFolder, 'archive', folder), pattern = 'AB-*')
    for (abFileName in abFiles) {
      abFile = read_4D_file(file.path(dataFolder, 'archive', folder, abFileName))
      abNamesDict = tibble(name4d = abFile %>% pull(AB) %>% unique())
      newNamesDict = abNamesDict %>% filter(!name4d %in% a$name4d, !is.na(name4d)) 
      tNewNames = tibble(name4d = newNamesDict$name4d, 
                         nameAMR = as.ab(newNamesDict$name4d), 
                         nameAMRlong = ab_name(newNamesDict$name4d))
      a = a %>% add_row(tNewNames)
    }
  }
  return(a)
 }
 source('4D_day.R')
 useCachedFileInfo = T
 archivePath = './data/archive'
 folders = list.files(path = archivePath, pattern = 'ImportedFromGlimms-*', full.names = T)
 addFileInfo = function(x, filePath, fileName) {
  mtime = file.mtime(filePath)
  x = x %>%
    add_row(filePath = filePath, 
            fileName = fileName,
            fileType = str_sub(fileName, 1, 2),
            fileDate = fileName %>% str_sub(4, 9) %>% ymd(),
            modified = mtime, 
            # mdate = as.Date(ymd_hms(modified)),
            size = file.size(filePath)) %>%
    group_by(fileDate) %>%
    mutate(nFilesPerDate = n()) %>%
    ungroup() %>%
    arrange(desc(fileDate))
  return(x)
 }
 deriveFileOverview = function(tFiles) {
  tFiles %>% 
    group_by(fileDate) %>% 
    mutate(AB = cur_data() %>% 
             filter(str_sub(fileName, 1, 2) == 'AB') %>% 
             nrow(),
           MS = cur_data() %>% 
             filter(str_sub(fileName, 1, 2) == 'MS') %>% 
             nrow(),
           ST = cur_data() %>% 
             filter(str_sub(fileName, 1, 2) == 'ST') %>% 
             nrow(),
           TM = cur_data() %>% 
             filter(str_sub(fileName, 1, 2) == 'TM') %>% 
             nrow(),
           TS = cur_data() %>% 
             filter(str_sub(fileName, 1, 2) == 'TS') %>% 
             nrow()
    ) %>%
    slice(1) %>%
    select(fileDate, AB, MS, ST, TM, TS)
 }
 getFilesInfo = function() {
  tFiles = tibble(filePath = 'deleteme', 
                  fileName = '', 
                  fileType = '',
                  fileDate = ymd('2022-01-01'),
                  modified = ymd('2022-01-01'),
                  size = 0
  )
  for (folder in folders) {
    # if (folder == 'ImportedFromGlimms-2014-09') {
    folderPath = folder #file.path(archivePath, folder)
    files = list.files(folderPath, pattern = '*')
    for (file in files) {
      filePath = file.path(folderPath, file)
      tFiles = tFiles %>% 
        addFileInfo(filePath = filePath, fileName = file)
    }
  }
  tFiles = tFiles %>%
    mutate(mdate = as.Date(ymd_hms(modified))) %>%
    filter(filePath != 'deleteme') %>%
    arrange(fileDate, fileName)
  tFiles = tFiles %>%
    left_join(tFiles %>% count(fileDate, name = 'nFilesPerDate'), by = 'fileDate')
  return(tFiles)
 }
 addFileInfoOfDay = function(x, folders, ymdDate, overwrite = F) {
  datePattern = format(ymdDate, format = '%y%m%d')
  dirPattern = format(today(), format = '%Y-%m')
  folder = folders[which(folders %>% str_detect(dirPattern))]
  dayFiles = list.files(path = folder, pattern = datePattern)
  for (fileName in dayFiles) {
    cat(paste0('Processing ', fileName, '\n'))
    x = x %>% addFileInfo(filePath = file.path(folder, fileName),
                          fileName = fileName)
  }
  return(x)
 }
 addFileInfoOfToday = function(tFiles, folders) {
  browser()
  dirPattern = format(today(), format = '%Y-%m')
  folder = folders[which(folders %>% str_detect(dirPattern))]
  tFiles = tFiles %>% 
    addFileInfoOfDay(folderPath = file.path(archivePath, folder),
                     ymdDate = today(),
                     overwrite = F)
  return(tFiles)
 }
 plotFileSize = function(tFiles) {
  tFiles %>%
    filter(fileDate > ymd('2014-10-01'),
           fileDate <= ymd('2014-10-31'),
           fileType != 'Ee') %>%
    mutate(dayOfWeek = wday(fileDate),
           dayOfWeekName = weekdays(fileDate),
           size = size / (1024)
    ) %>%
    ggplot(aes(x = dayOfWeek, y = size, group = dayOfWeek)) +
    geom_boxplot(aes(fill = fileType)) +
    scale_y_continuous(limits = c(0, 1250)) +
    facet_wrap(facets = c(~fileType)) +
    labs(y = 'size [KB]') +
    theme_light() +
    theme(
      plot.title = element_text(face = 'bold')
    )
 }
 if (useCachedFileInfo) {
  tFiles = read_rds('./data/tFiles_2022-10-13.rds')
  tFileOvw = read_rds('./data/tFileOvw_2022-10-13.rds')
 } else {
  tFiles = getFilesInfo()
  tFileOvw = deriveFileOverview(tFiles)
  write_rds(tFiles, paste0('./data/tFiles_', today(), '.rds'))
  write_rds(tFileOvw, paste0('./data/tFileOvw_', today(), '.rds'))
 }
 readDailyFile = function(tFiles, date, prefix) {
  result = NULL
  fileNames = tFiles %>%
    filter(fileDate == date, 
           fileType == prefix) %>%
    pull(filePath)
  if (length(fileNames) > 0) {
    for (f in 1:length(fileNames)) {
      fileName = fileNames[f]
      cat(silver(paste0('Reading ', fileName)), '\n')
      fileContent = read_4D_file(fileName)
      if (f == 1) {
        result = fileContent
      } else  {
        result = result %>% 
          add_row(fileContent)
      }
    }
  }
  return(result)
 }
 readDailyMSFile = function(tFiles, date) {
  result = NULL
  fileNames = tFiles %>%
    filter(fileDate == date, 
           fileType == 'MS') %>%
    pull(filePath)
  if (length(fileNames) > 0) {
    for (f in 1:length(fileNames)) {
      fileName = fileNames[f]
      cat(silver(paste0('Reading ', fileName)), '\n')
      msFileContent = read_4D_MS_file(fileName)
      if (f == 1) {
        result = msFileContent
      } else  {
        result = result %>% 
          add_row(msFileContent)
      }
    }
  }
  return(result)
 }
 readDailyFiles = function(tFiles, date) {
  abFile = readDailyFile(tFiles, date, 'AB')
  msFile = readDailyMSFile(tFiles, date)
  stFile = readDailyFile(tFiles, date, 'ST')
  tmFile = readDailyFile(tFiles, date, 'TM')
  tsFile = readDailyFile(tFiles, date, 'TS')
  return(list(abFile, msFile, stFile, tmFile, tsFile))
 }
 readArchive = function(tFileInfo, tFileOvw, createDebugList = FALSE) {
  tMergeDates = NULL
  tsDates = NULL
  tMergeDatesDebugList = list()
  dates = tFileInfo %>% filter(!is.na(fileDate)) %>% pull(fileDate) %>% unique()
  for (d in 1:length(dates)) {
    currDate = dates[d]
    cat(paste0('\nd = ', d, ', date: ', currDate, '\n'))
    print(tFileOvw %>% filter(fileDate == currDate))
    dateFiles = readDailyFiles(tFileInfo, currDate)
    if (!is.null(dateFiles[[1]])) {
      processed = ProcessAndMergeDailyFiles(
        dateFiles[[1]] %>% select(-...15), 
        dateFiles[[2]], 
        dateFiles[[3]], 
        dateFiles[[4]], 
        dateFiles[[5]]
      )
      if (!is.null(processed$merged)) {
        tMergeDate = processed$merged %>% # abFile, stFile and msFile joined
          mutate(date = currDate, .before = 1)
        if (createDebugList) tMergeDatesDebugList[[d]] = tMergeDate
        # separate tables of (ab|st|ms|ts|tm)File of the current day can be found 
        # in processed$(ab|st|ms|ts|tm)File, for instance:
        if (!is.null(processed$tsFile)) {
          tsDate = processed$tsFile %>%
            mutate(date = currDate, .before = 1)
        }
        if (is.null(tMergeDates)) {
          if (nrow(tMergeDate) > 0) {
            tMergeDates = tMergeDate
          }
          if (exists('tsDate')) {
            if (nrow(tsDate) > 0) {
              tsDates = tsDate
            }
          }
        } else {
          if (nrow(tMergeDates) > 0) {
            tMergeDates = tMergeDates %>% add_row(tMergeDate)
          }
          if (exists('tsDates')) {
            if (!is.null(tsDates)) {
              if (nrow(tsDates)) {
                tsDates = tsDates %>% add_row(tsDate)
              }
            }
          }
        }
        tMergeDates = tMergeDates %>% 
          mutate(Monsternummer = str_trim(Monsternummer))
      }
    }
  }
  if (createDebugList) {
    return(list(tMergeDates, tMergeDatesDebugList))
  } else {
    return(tMergeDates)
  }
 }
--- a/interfaces/4D/src/4D_day.R
+++ b/interfaces/4D/src/4D_day.R
@@ -0,0 +1,268 @@
 # initially only interfacing 4D data
 # TODO loggen zoveel mogelijk over ontvangen data (nieuwe GLIMS data)
 # TODO pushen R scripts naar git server waar Gerhard en Kornelis bij kunnen
 # require(stringr)
 require(tidyverse)
 require(AMR)
 dataFolder = './data'
 get_most_recent_4D_file = function(path, prefix) {
  files = list.files(dataFolder, paste0('^', prefix, '*'))
  tFiles = tibble(name = files, 
                  mtime = file.mtime(file.path(dataFolder, files))) %>%
    arrange(desc(mtime))
  tmpFilename = tFiles$name[1]
  return(tmpFilename)
 }
 read_4D_file = function(path) {
  tFile = read_tsv(path, col_types = paste0(rep('c', 30), collapse = ''))
  # remove comment lines
  if (nrow(tFile) > 0) {
    tFile = tFile %>%
      slice( seq(4, nrow(tFile)) ) %>% 
      filter(str_sub(ID, 1, 3) != '___')
  }
  if (colnames(tFile)[1] == 'X1') {
    if (str_detect(path, 'AB-')) {
      tFile = tibble(ID = '0', 
                     Monsternummer = '', 
                     IsolNr = '', 
                     AB = '', 
                     V = '',
                     PrefMIC ='',
                     MIC = '',
                     D = '',
                     DIF = '',
                     PrefEtest = '',
                     E = '',
                     ETEST = '',
                     RIS = '',
                     RAP = '',
                     ...15 = '')
    } else if (str_detect(path, 'MS-')) {
      tFile = tibble(UMCG.Monster= '0',
                     PIN.Lot= '0',
                     TypeObject= '0',
                     Lot= '0',Echtgenoot= '0',
                     AchternNaam= '0',
                     Geboortedatum= '0',
                     Geslacht= '0',
                     OntvOLabDatum= '0',
                     Tijdstip.materiaal.afgenomen= '0',
                     PC= '0',
                     Aanvrager= '0',
                     ligafdeling= '0',
                     A= '0',
                     B= '0',
                     C= '0',
                     Materiaal= '0',
                     Materiaal.variabelen= '0',
                     X = F)
      } else if (str_detect(path, 'ST-')) {
      tFile = tibble(ID = '0', 
                     Monsternummer = '', 
                     MMBProc = '',
                     Isolnr = '', 
                     MO = '', 
                     Waarde = '', 
                     Pos = '',
                     Rap = '')
    } else if (str_detect(path, 'TM-')) {
      tFile = tibble(ID = '0', 
                     Monsternummer = '', 
                     Test = '',
                     Uitslag = '')
    } else if (str_detect(path, 'TS-')) {
      tFile = tibble(ID = '0', 
                     Monsternummer = '', 
                     Isolnr = '', 
                     Test = '', 
                     Waarde = '', 
                     Rapport = '')
    }
  }
  if ('ID' %in% colnames(tFile)) {
    tFile = tFile %>%
      filter(ID != '0')
  } else if ('UMCG.Monster' %in% colnames(tFile)) {
    tFile = tFile %>%
      filter(UMCG.Monster != '0')
  }
  return(tFile)
 }
 read_4D_TM_file = function(path) {
  tmFilename = str_remove(path, paste0(dirname(path), '/'))
  tmFileL = readLines(path)
  tmFileL = tmFileL[c(5:length(tmFileL))]
  for (l in 1:length(tmFileL)) {
    if (str_count(tmFileL[l], '\t') == 3) {
      tmFileL = paste0(tmFileL, '\t')
    }
  }
  newTmFilename = file.path(dirname(path), 
                            tmFilename %>% str_replace('TM', 'bTM'))
  if (file.exists(newTmFilename)) file.remove(newTmFilename)
  writeLines(tmFileL, newTmFilename)
  tFile = read_tsv(newTmFilename, 
                   col_names = c('ID','Monsternummer','Test','Status.test.tm','Uitslag'), 
                   col_types = 'ccccc')
  # remove comment lines
  tFile = tFile %>%
    slice(c(5:(tFile %>% nrow()))) %>%
    mutate(
      Monsternummer = as.character(Monsternummer)
    )
  return(tFile)
 }
 get_most_recent_4D_MS_file = function(path) {
  # get files
  msFiles = list.files(path, '^MS*')
  # create table with modification datetimes, for selection of the most recent one
  tmsFiles = tibble(name = msFiles,
                    mtime = file.mtime(file.path(path, msFiles))) %>%
    arrange(desc(mtime))
  # get the name of the most recently modified file
  msFilename = tmsFiles$name[1]
  return(msFilename)
 }
 read_4D_MS_file = function(path) {
  # print(path)
  msFilename = str_remove(path, paste0(dirname(path), '/'))
  # read content
  msFileL = readLines(path)
  # remove lines with repeated '_', useful for the human eye only
  msFileL = msFileL[str_sub(msFileL, 1, 5) != '_____']
  # replace (real) header
  msHeader = readLines(file.path(dataFolder, 'headerMS.txt'))
  msFileL[1] = paste0(msHeader, '\t')
  # replace tabs within a date time combination with a space in line 2 and further,
  # for example: replace '29/09/2021	14:05' by '29/09/2021 14:05'
  #                                 ↑ = '\t'               ↑ = ' '
  for (i in 2:length(msFileL)) {
    msFileL[i] = msFileL[i] %>% str_replace_all('(?<=[:digit:]{4})\t(?=[:digit:]{2}:[:digit:]{2})', ' ')
  }
  # write to new machine readable file
  newMsFilename = file.path(dirname(path), 
                            msFilename %>% str_replace('MS', 'bMS'))
  if (file.exists(newMsFilename)) file.remove(newMsFilename)
  writeLines(msFileL, newMsFilename)
  # finally: read the machine readable file
  result = read.delim(file = newMsFilename, sep = '\t') %>%
    as_tibble() %>%
    mutate(
      UMCG.Monster = as.character(UMCG.Monster),
      PIN.Lot = as.character(PIN.Lot),
      Aanvrager = as.character(Aanvrager),
      C = as.character(C)
    )
  # Hurray!
  return(result)
 }
 # abFile = read_4D_file(dataFolder, 'AB')
 # msFile = read_4D_MS_file(dataFolder)
 # stFile = read_4D_file(dataFolder, 'ST')
 # tmFile = read_4D_file(dataFolder, 'TM')
 # tsFile = read_4D_file(dataFolder, 'TS')
 ProcessAndMergeDailyFiles = function(abFile, msFile, stFile, tmFile, tsFile) {
  if (!is.null(abFile))
    abFile = abFile %>%
      filter(!is.na(RAP)) #%>%
  # mutate(
  #   #Monsternummer = as.character(Monsternummer)
  #   AB.amr = AMR::ab_property(AB, 'ab') %>% as.character()
  # )
  if (!is.null(stFile))
    stFile = stFile %>%
      filter(!is.na(Rap)) %>%
      # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>%
      rename(IsolNr = Isolnr)
  if (!is.null(msFile))
    msFile = msFile %>%
      rename(Monsternummer = UMCG.Monster) %>%
      # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>%
      select(-X)
  # if (!is.null(tmFile))
  # tmFile = tmFile %>%
  #   mutate(Monsternummer = as.character(Monsternummer) %>% str_trim())
  if (!is.null(tsFile))
    tsFile = tsFile %>%
      filter(!is.na(Rapport))
  #   mutate(Monsternummer = as.character(Monsternummer) %>% str_trim())
  if (!is.null(abFile) & !is.null(stFile))
    t_ab_st = abFile %>%
      full_join(stFile, 
                by = c('Monsternummer', 'IsolNr'), 
                suffix = c('.ab', '.st'))
  if (exists('t_ab_st')) {
    if (!is.null(t_ab_st) & !is.null(msFile)) {
      t_ab_st_ms = t_ab_st %>%
        full_join(msFile, by = 'Monsternummer') %>%
        rename(Waarde.st = Waarde)
      return(
        list(
          merged = t_ab_st_ms,
          abFile = abFile,
          stFile = stFile,
          msFile = msFile,
          tsFile = tsFile,
          tmFile = tmFile
        )
      )
      # t_ab_st_ms_ts = t_ab_st_ms %>%
      #   full_join(tsFile, by =  c('Monsternummer', 'IsolNr' = 'Isolnr'))
      # 
      # t_ab_st_ms_ts_tm = t_ab_st_ms_ts %>%
      #   add_column(Uitslag = NA,
      #              is_sampleResult = F) %>%
      #   add_row(Monsternummer = tmFile$Monsternummer,
      #           Test = tmFile$Test,
      #           Uitslag = tmFile$Uitslag,
      #           is_sampleResult = T)
      # 
      # t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>%
      #   rename(Waarde.ts = Waarde) %>%
      #   filter(!Test %in% c('no_filter_yet'))
      # 
      # # if ('X' %in% colnames(t_ab_st_ms_ts_tm)) {
      # #   t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>%
      # #     select(-'X')
      # #   print('Dit statement is voor debugging')
      # # }
    }
  }
  return(
    list(
      merged = NULL,
      abFile = abFile,
      stFile = stFile,
      msFile = msFile,
      tsFile = tsFile,
      tmFile = tmFile
    )
  )
 }
--- a/interfaces/4D/src/debug
+++ b/interfaces/4D/src/debug
@@ -0,0 +1,26 @@
 require(tidyverse)
 require(readr)
 v171 = read_rds('v171.rds')
 v180 = read_rds('v180.rds')
 new_v180 = v180 %>% anti_join(v171 %>% left_join(v180))
 source('interfaces.R')
 d = readGlimsArchive(NULL, NULL, T, NULL, NULL)
 d %>% filter(str_starts(mo, 'B_ES')) %>% count(mo, CXM) 
 # deze aantallen zie ik niet terug in wat de applicatie presenteert:
 # Ceftazidim: R (705), SI (2705), totaal dus 3410
 # misschien dat data_select() voor andere aantallen zorgt?
 data_select = d %>% 
  # filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>%
  filter_first_isolate(col_patient_id = "patientid", episode_days = 30) %>% 
  mutate(mo = as.mo(mo, Becker = TRUE))
 data_select %>% filter(str_starts(mo, 'B_ES')) %>% count(mo, CXM) 
 # Ja dus!
--- a/interfaces/4D/src/debug_inspect
+++ b/interfaces/4D/src/debug_inspect
@@ -0,0 +1,37 @@
 require(tidyverse)
 inspectRead = 10
 dataPath = 'data'
 abNames = read_rds('data/abNames.rds')
 filename = list.files(dataPath, paste0('read_', inspectRead, '_*'))
 d = read_rds(file.path(dataPath, filename)) #%>%
  # left_join(abNames, by = c('ab.amr' = 'nameAMR'))
 # list a sampleid that occurs more often but not too often
 examples = d %>% count(sampleid) %>% arrange(desc(n)) %>% filter(n < 50)
 view_d = d %>% 
  select(sampleid, date, IsolNr, MIC, mo, RIS, ab.amr, RAP) %>% #, name4d, nameAMRlong) %>%
  mutate(asmo = suppressWarnings(as.mo(mo))) %>%
  arrange(sampleid, IsolNr) #%>%
  # filter(sampleid == examples$sampleid[i])
 has_ecoli = view_d %>% filter(str_starts(asmo, 'B_ES'))
 examples_having_ecoli = has_ecoli %>% 
  filter(sampleid %in% examples$sampleid) %>%
  pull(sampleid) %>%
  unique()
 e = 2
 view(view_d %>% 
       filter(sampleid == examples_having_ecoli[e]) %>%
       mutate(IsolNr = as.numeric(IsolNr)) %>%
       arrange(IsolNr),# %>%
       #distinct(sampleid, date, IsolNr, mo, ab.amr, .keep_all = T), 
     title = 'example')
 if (F) {
  view(view_d)
  view_d %>% filter(!is.na(mo)) %>% pull(mo) %>% unique()
 }
--- a/reads.R
+++ b/reads.R
@@ -0,0 +1,7 @@
 require(tidyverse)
 reads = tribble(
  ~read, ~desription, 
  3, 'returned z',
  4, 'returned radarData'
 )
--- a/server.R
+++ b/server.R
@@ -2,18 +2,24 @@
 server <- function(input, output, session) {
  hideTab(inputId = 'RadaRtabs', target = 'Data')
  output$disclaimerText = renderText('you must confirm understanding the disclaimer')
  # define data selection ---------------------------------------------------
  data_select <- reactive({if (input$box2.6_first != 365) {
    radar_data %>% 
-      filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>%
+      filter(specialty_shiny %in% input$specialtyInput & 
               department %in% input$departmentInput &
               Materiaal %in% input$materialInput) %>%
      filter_first_isolate(col_patient_id = "patientid", episode_days = input$box2.6_first) %>% 
      mutate(mo = as.mo(mo, Becker = TRUE))
  } else {
    radar_data_first %>% 
-      filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput)
+      filter(specialty_shiny %in% input$specialtyInput & 
               department %in% input$departmentInput &
               Materiaal %in% input$materialInput)
  }
  })
@@ -39,6 +45,7 @@ server <- function(input, output, session) {
  observe({
    x <- input$specialtyInput
    if (input$allDepartments) {
      if (!is.null(x)) {
        update_departments <-
          radar_data %>% filter(specialty_shiny %in% input$specialtyInput)
@@ -47,6 +54,9 @@ server <- function(input, output, session) {
      else {
        x <- character(0)
      }
    } else {
      x <- character(0)
    }
    updateCheckboxGroupInput(
      session,
      inputId = "departmentInput",
@@ -56,6 +66,12 @@ server <- function(input, output, session) {
    )
  })
  observeEvent(input$btnDisclaimerUnderstood, {
    if (input$cbDisclaimerRead == T) {
      showTab(inputId = 'RadaRtabs', target = 'Data', select = T)
    }
  })
  # sidebar hover -----------------------------------------------------------
@@ -104,7 +120,8 @@ server <- function(input, output, session) {
            color = "#d33724",
            size = 0.7
          )
-        ),tabPanel(
+        ),
        tabPanel(
          title = "Isolates (and pathogens) detected",
          div(
            style = "position: absolute; left: 0.5em; bottom: 0.5em;",
@@ -312,7 +329,9 @@ server <- function(input, output, session) {
  pos_neg_plot <- reactive({
    pos_neg_plot <- radar_data %>% 
-      filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>% 
+      filter(specialty_shiny %in% input$specialtyInput & 
               department %in% input$departmentInput &
               Materiaal %in% input$materialInput) %>% 
      group_by_at(input$box2.1_group) %>% 
      summarise(patients = n_distinct(patientid),
                total = n_distinct(sampleid),
@@ -413,7 +432,6 @@ server <- function(input, output, session) {
        facet_wrap(input$box2.3_group)
      }} +
      NULL
  }) 
  output$patho_plot <- renderGirafe({
@@ -443,7 +461,7 @@ server <- function(input, output, session) {
      group_by_at(input$box2.2_group) %>% 
      summarise("14 days" = sum(first_14, na.rm = TRUE),
                "30 days" = sum(first_30, na.rm = TRUE),
-                "60 days" = sum(first_60, na.rm = TRUE))
+                "60 days" = sum(first_60, na.rm = TRUE)) %>%
      pivot_longer(cols = c("14 days", "30 days", "60 days"))
  })
@@ -479,7 +497,7 @@ server <- function(input, output, session) {
  output$box1 <- renderUI({
    div(
-      style = "position: relative",
+      style = "position: relative;",
      tabBox(
        id = "box1",
        width = NULL,
@@ -687,6 +705,8 @@ server <- function(input, output, session) {
        }}
    }
    cat('Implicit join in get_resistance_df(..) @server.R:692\n')
    get_resistance_df(data_select(),
                      mo_selected = mo_selected,
                      abx = if (mo_genus(mo_selected) %in% mo_genus(c("E. coli",
@@ -728,8 +748,11 @@ server <- function(input, output, session) {
        theme_void()
    } else {
-      ggplot(isolate_prop_data(), aes(value, reorder(antibiotic, value_r), fill = factor(interpretation, levels = c("SI", "R")),
+      ggplot(isolate_prop_data(), 
-                                      tooltip = paste0("R = ", value_r, " (", percent_r, ")\n", "SI = ", value_si, " (", percent_si, ")")
+             aes(value, reorder(antibiotic, value_r), 
                 fill = factor(interpretation, levels = c("SI", "R")),
                 tooltip = paste0("R = ", value_r, " (", percent_r, ")\n", 
                                  "SI = ", value_si, " (", percent_si, ")")
             )) + 
        geom_col_interactive(colour = "black", position = "fill") +
@@ -993,14 +1016,18 @@ server <- function(input, output, session) {
    # isolate_prop_data() %>% 
    if (input$box2 == "Positive & negative cultures") {
      all <- radar_data %>% 
-        filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>% 
+        filter(specialty_shiny %in% input$specialtyInput & 
                 department %in% input$departmentInput &
                 Materiaal %in% input$materialInput) %>% 
        rename(group = input$box2.1_group) %>% 
        filter(!is.na(group)) %>% 
        distinct(patientid, .keep_all = TRUE) %>%
        count(group) %>%
        rename(n_all = n)
      pos <- radar_data %>% 
-        filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>% 
+        filter(specialty_shiny %in% input$specialtyInput & 
                 department %in% input$departmentInput &
                 Materiaal %in% input$materialInput) %>% 
        rename(group = input$box2.1_group) %>% 
        group_by(group) %>%
        # group_by_at("gender") %>%
--- a/stitchArchive.R
+++ b/stitchArchive.R
@@ -0,0 +1,143 @@
 require(tidyverse)
 source('globalData.R')
 reqColumns = abAbbrevs
 patchFirstColumns = c('sampleid', 'date', 'is_clinical', 'is_icu', 'is_outward', 
                      'IsolNr', 'Materiaal', 'specimen_type', 'specimen_group', 
                      'patientid', 'birth_date', 'department', 'type_dept', 
                      'specialism', 'age', 'gender', 'MIC', 'RAP', 'mo')
 readPatch = function(filename, reqColumns) {
  patch = read_rds(filename)
  for (c in 1:length(reqColumns)) {
    if (!reqColumns[c] %in% colnames(patch)) { 
      patch = patch %>% 
        add_column(!!reqColumns[c] := as.rsi(NA)) %>%
        select(order(colnames(.))) %>%
        relocate(all_of(patchFirstColumns), .before = 1)
    }
  }
  return(patch)
 }
 readAllPatches = function() {
  tPatches = tibble(name = list.files(path = 'data', pattern = 'bk_.*\\.rds'))
  tPatches = tPatches %>% 
    separate(name, into = c('prefix', 'from', 'tail'), sep = '_') %>%
    separate(tail, into = c('to', 'ext'), sep = '\\.') %>%
    filter(!is.na(to), !str_detect(to, '[:alpha:]')) %>%
    mutate(name = paste0(prefix, '_', from, '_', to, '.', ext)) %>%
    arrange(from, to)
  return(tPatches)
 }
 stitchPatches = function(tPatches, dataPath = 'data', reqColumns) {
  for (i in 1:nrow(tPatches)) {
    # patch = read_rds(file.path(dataPath, tPatches[i,]$name))
    # for (c in 1:length(reqColumns)) {
    #   if (!reqColumns[c] %in% colnames(patch)) { patch = patch %>% add_column(!!reqColumns[c] := is.rsi(NA)) }
    # }
    cat(paste0('  patch ', i, ': ', tPatches$from[i], ' ', tPatches$to[i], '\n'))
    patch = readPatch(filename = file.path(dataPath, tPatches$name[i]), 
                      reqColumns = reqColumns)
    # print(colnames(patch))
    if (i == 1) {
      tStitched = patch
    } else {
      tStitched = tStitched %>% add_row(patch)
    }
  }
  tStitched = tStitched %>% arrange(date, sampleid)
  return(tStitched)
 }
 appendDailyData = function(fnTotal, fnTotalSofar, fnDay, reqColumns) {
  assert_that(file.exists(fnTotalSofar))#, 
              # msg = paste0("File ", fnTotalSofar, " doesn't exist"))
  assert_that(file.exists(fnDay))
  tTotalSofar = readPatch(fnTotalSofar, reqColumns = reqColumns)
  tDay        = readPatch(fnDay, reqColumns = reqColumns)
  tTotal = tTotalSofar %>% add_row(tDay)
  write_rds(x = tTotal, file = fnTotal, compress = 'gz')
  return(tTotal)
 }
 manualStitch_2022_09_30 = function() {
  a = 
    mutate(RAP = as.character('#')) %>% # correct for accidental inclusion in as.rsi conversion
    select(order(colnames(.))) %>%
    relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr,
               Materiaal, specimen_type, specimen_group, uuid, patientid,
               old_patientid, birth_date, old_birth_date, department,
               type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1)
  # a = a %>% mutate(across(AMC:VOR, as.rsi)) was already done before
  b = readPatch('bk_2022-06-17_2022-09-29.rds', reqColumns) %>%
    select(order(colnames(.))) %>%
    relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr,
               Materiaal, specimen_type, specimen_group, patientid,
               birth_date, department,
               type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1)
  b = b %>% mutate(across(AMC:VOR, as.rsi))
  tTotal = a %>% 
    add_row(b) %>%
    mutate(MIC = as.mic(MIC),
           mo = as.mo(mo))
  # Warning: Problem with `mutate()` column `mo`.
  # ℹ `mo = as.mo(mo)`.
  # ℹ 
  # in `as.mo()`: nine unique values (covering 0.0%) could not be coerced and were considered 'unknown': "}hemstre", "}Primair",
  # "acgsch", "anrhyd", "bclmeg", "BK_Isolaat_AEF", "corjimi", "strsalg" and "vvStrpne".
  # Use `mo_failures()` to review them. Edit the `allow_uncertain` argument if needed (see ?as.mo).
  # You can also use your own reference data with set_mo_source() or directly, e.g.:
  #   as.mo("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI"))
  # mo_name("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI"))
  # Warning message:
  #   Problem with `mutate()` column `MIC`.
  # ℹ `MIC = as.mic(MIC)`.
  # ℹ in `as.mic()`: 3 results truncated (0%) that were invalid MICs: "?", "13" and "17090355431" 
  write_rds(x = tTotal, file = 'data/total_2022-09-29.rds', compress = 'gz')
 }
 lastLargePatchesStitching = function() manualStitch_2022_09_30()
 # > mo_uncertainties()
 # Matching scores (in blue) are based on pathogenicity in humans and the resemblance between the input and the full taxonomic name.
 # See `?mo_matching_score`.
 # 
 # "brelut" -> Brevundimonas lutea (B_BRVND_LUTE, 0.316)
 # Also matched: Brevibacterium luteolum (0.304)
 # 
 # "vvEncspp" -> Brevibacterium ravenspurgense (B_BCTRM_RVNS, 0.284)
 # read 1
 if (F) {
  tPatches = readAllPatches()
  tStitched = stitchPatches(tPatches)
  tStitched %>% write_rds(file = 'data/bk_archive_2014-08_2022-06_v2022-09-15.rds',
                          compress = 'gz')
    x = read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15.rds')
  x = x %>% relocate(all_of(cns), .after = 'date') %>% 
    relocate(specimen_group, .after = specimen_type) %>%
    relocate(MIC, .after = mo)
  y = x %>% mutate(across(AMB:DAP, as.rsi))
  z = y %>% mutate(mo = as.mo(mo))
  mo_failures = c('}AeNeSt', '}hemstre', '}nietfe', '}Primair', 'acgsan', 'acgsch', 'bclmeg', 'bctfae', 'BK_Isolaat_AEF', 'BK_Isolaat_ANF', 'BK_Isolaat_PEF', 'chcgle', 'corjimi', 'enbbug', 'encfcmva', 'Meng', 'psspsy', 'staaurar', 'stainp', 'strsalg', 'surwad', 'vvAeNeSt', 'vvAePoSt', 'vvMRSA')
  write_rds(z, 'data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds', 
            compress = 'gz')
  write_rds(z %>% filter(date >= '2021-07-01'), 'data/bk_archive_2021-07_2022-06_v2022-09-23_3.rds', 
            compress = 'gz')
 }
--- a/ui.R
+++ b/ui.R
@@ -1,4 +1,12 @@
 jsc <- '
 $(document).ready(function () {
  $("#allInput").parent().parent().parent().parent().css({ "width": "600px" });
  $("#specialtyInput").css({ "width": "590px" });
  $("#materialInput").parent().parent().css({ "width": "200px" });
  $("#materialInput").css({ "width": "190px" });
 });
 '
 ui <- dashboardPage(
  title = "RadaR2",
@@ -7,7 +15,7 @@ ui <- dashboardPage(
  dashboardHeader(
    title = span(img(src = "radar.png", height = 30), strong("RadaR2")),
-    titleWidth = 500,
+    titleWidth = 358,
    tags$li(
      a(
@@ -23,22 +31,54 @@ ui <- dashboardPage(
  # SIDEBAR -----------------------------------------------------------------
-  dashboardSidebar(disable = TRUE,
+  dashboardSidebar(
-                   sidebarMenu(id = "sidebar_id",
+    disable = TRUE,
-                               menuItem(icon = icon("check-square"),
+    width = 400,
    sidebarMenu(
      id = "sidebar_id",
      menuItem(
        icon = icon("user-nurse"),
        div(
          materialSwitch(
            inputId = "allInput",
            label = trnslt("All specialties selected"),
            value = TRUE,
-                                          status = "danger")),
+            status = "danger"
-                     menuItem(icon = icon("user-nurse"), 
+          ),
-                              checkboxGroupInput(inputId = "specialtyInput", 
+          checkboxGroupInput(
            # tags$head(tags$style("width: 400px; max-width: 400px;")),
            inputId = "specialtyInput",
            label = trnslt("Select specialties"),
-                                                 choices = sort(unique(radar_data$specialty_shiny)))),
+            choices = sort(unique(radar_data$specialty_shiny)),
-                     menuItem(icon = icon("hospital"),
+          )
-                              checkboxGroupInput(inputId = "departmentInput", 
+        )
      ),
      menuItem(
        icon = icon("hospital"),
        div(
          materialSwitch(
            inputId = "allDepartments",
            label = trnslt("All deparmtents selected"),
            value = TRUE,
            status = "danger"
          ),
          checkboxGroupInput(
            inputId = "departmentInput",
            label = trnslt("Select departments"),
-                                                 choices = sort(unique(radar_data$department))))
+            choices = sort(unique(radar_data$department))
          )
        )
      ),
      menuItem(
        icon = icon("flask"),
        div(
          checkboxGroupInput(
            inputId = "materialInput",
            label = trnslt("Select material"),
            choices = sort(unique(radar_data$Materiaal))
          )
        )
      )
    )
  ),
@@ -46,6 +86,7 @@ ui <- dashboardPage(
  # BODY --------------------------------------------------------------------
  dashboardBody(
    tags$head(tags$script(jsc)),
    # fluidRow(
    #   column = 12,
@@ -59,12 +100,48 @@ ui <- dashboardPage(
      ".nav-tabs {background-color: white;}
      .nav-tabs-custom .nav-tabs li.active:hover a, .nav-tabs-custom .nav-tabs li.active a {background-color: transparent; border-color: transparent;}
      .nav-tabs-custom .nav-tabs li.active {border-top-color: black;}"),
      tabBox(
        id = 'RadaRtabs',
        width = '100%',
        # column(width = 6, 'Instructie'),
        # column(width = 6, 'Disclaimer, blabla blab la bla lbal bla lbal blablal', style="height: 300px;")
        tabPanel(
          'Start', 
          div(
            h3('Disclaimer'), 
            paste('blabla bla bla, legal legal legal.',
                  'Ok, someone needs to write some wise stuff here.',
                  "Like only use for surveillance, not intended for ",
                  "individual patients' clinical support, etc. etc. ",
                  "Yadda yadda yadda"),
            hr()
            ),
          fluidRow(
            column(2, checkboxInput('cbDisclaimerRead', "I understand"), 
                   style = 'margin-left: 10px;'), 
            column(2, actionButton('btnDisclaimerUnderstood', 
                                 label = 'confirm', 
                                 icon = icon("legal", class = "opt")))
          ),
          fluidRow(
            hr()
          ),
          div(
            h3('Instruction'), 
            tags$ul(tags$li('do this'), 
                    tags$li('do that'), 
                    tags$li('do such'), 
                    tags$li('do so'))
          ),
        ),
        tabPanel(
          'Data', 
          style = "width: 100%;",
          div(
            id = "resistance", 
            column(
              width = 6,
              uiOutput("box2")
            ),
            column(
              width = 6,
@@ -81,4 +158,6 @@ ui <- dashboardPage(
          )
        )
      )
    )
  )
 )
--- a/updateGlimsData.R
+++ b/updateGlimsData.R
@@ -0,0 +1,82 @@
 require(readr)
 require(lubridate)
 cat(paste0('\14\n', now(), '\n'))
 oldDir = getwd()
 basePath = '/share/mmbi/radar2'
 setwd(basePath)
 source('interfaces.R')
 source('stitchArchive.R')
 source('4D_archive.R')
 dataPath = 'data'
 archivePath = 'data/archive'
 tFiles = read_rds(file.path(dataPath, 'tFilesInfo.rds'))
 tFileOvw = read_rds(file.path(dataPath, 'tFileOvw.rds'))
 if (today() >= '2022-09-30') {
  require(assertthat, quietly = T)
  # get the last date that is present in the data sofar
  maxDate = tFiles %>% pull(fileDate) %>% max(na.rm = T)
  # catch up with today
  nDaysToUpdate = difftime(today(), maxDate) %>% as.numeric()
  daysToUpdate = (maxDate + 1:nDaysToUpdate)
  for (d in 1:nDaysToUpdate) {
    currDate = daysToUpdate[d]
    # aDate is converted to Excel style numeric format with origin '1970-01-01'
    # currDate = as.Date(aDate, origin = lubridate::origin)
    tFiles = tFiles %>% 
      filter(!is.na(fileDate)) %>% 
      addFileInfoOfDay(folders = folders, 
                       ymdDate = currDate)
    tInfoDay = tFiles %>% filter(fileDate == currDate)
    a = assert_that(nrow(tInfoDay) > 0, 
                    msg = paste0('tInfoDay has 0 rows: no files present for ', 
                                 currDate, '?'))
  }
  tInfoDays = tFiles %>% filter(fileDate %in% daysToUpdate)
  tFileOvwToday = deriveFileOverview(tInfoDays)
  a = assert_that(nrow(tFileOvwToday) > 0, 
                  msg = 'derivederiveFileOverview() failed')
  tFileOvw = tFileOvw %>% ungroup() %>% 
    add_row(tFileOvwToday) %>% 
    filter(!is.na(fileDate))
  write_rds(tFiles, file.path(dataPath, 'tFilesInfo.rds'))
  write_rds(tFileOvw, file.path(dataPath, 'tFileOvw.rds'))
  # write GLIMMS data per date in 'daysToUpdate' and add it to the data sofar
  for (d in 1:nDaysToUpdate) {
    currDate = daysToUpdate[d]
    tGlimmsOfDay = readArchiveOfDay(tFiles, tFileOvw, ymdDate = currDate)
    a = assert_that(nrow(tGlimmsOfDay) > 0, 
                    msg = paste0('readArchiveDay() yielded empty result for ', 
                                 currDate))
    filename = paste0(dataPath,'/bk_', currDate, '_', currDate, '.rds')
    write_rds(tGlimmsOfDay, filename, compress = 'gz')
    filenameTotalSofar = paste0(dataPath, '/total_', currDate - 1, '.rds')
    filenameTotal      = paste0(dataPath, '/total_', currDate, '.rds')
    filenameOfDay      = filename
    tTotal = appendDailyData(fnTotal      = filenameTotal, 
                             fnTotalSofar = filenameTotalSofar, 
                             fnDay        = filenameOfDay, 
                             reqColumns   = abAbbrevs)
    # nNewRows = tTotal %>% filter(date == today()) %>% nrow()
    # write_rds(tTotal, file = filenameTotal, compress = 'gz')
  }
 }
 setwd(oldDir)
--- a/validatie.R
+++ b/validatie.R
@@ -0,0 +1,95 @@
 source('global.R')
 if (F)  {
 # CoNS
  radar_data_first %>% 
    mutate(f = mo_name(mo)) %>% 
    filter(f == 'Coagulase-negative Staphylococcus (CoNS)') %>% 
    pull(MO) %>% 
    unique() %>% 
    mo_name()
  # versus
  radar_data %>% 
    mutate(f = mo_name(mo)) %>% 
    filter(f == 'Coagulase-negative Staphylococcus (CoNS)') %>% 
    pull(MO) %>% 
    unique() %>% 
    mo_name()
  # conclusie: Becker = TRUE maakt het verschil
 #  Fluclox onder SA
 radar_data_first %>% 
  select(sampleid, patientid, Isolnr, date, MO, mo, FLC) %>% 
  filter(!is.na(FLC), MO == 'staaur') %>% count(patientid, Isolnr, FLC)
  # conclusie: 212 samples, 142 patienten
 radar_data_first %>% 
  select(sampleid, patientid, Isolnr, date, MO, mo) %>% 
  filter(MO == 'staaur') %>% 
  count(patientid)
 # 93 samples, 91 patienten
 countSamplesPatients = function(moName4d) {
  nSamples = radar_data_first %>% 
    select(sampleid, patientid, Isolnr, date, MO, mo) %>% 
    filter(MO == moName4d) %>% 
    count(sampleid) %>% 
    nrow()
  nPatients = radar_data_first %>% 
    select(sampleid, patientid, Isolnr, date, MO, mo) %>% 
    filter(MO == moName4d) %>% 
    count(patientid) %>% 
    nrow()
  return(tribble(~mo4d, ~nSamples, ~nPatients,
                 moName4d, nSamples, nPatients))
 }
 t = countSamplesPatients('staaur') %>%
  add_row(countSamplesPatients('esccol')) %>%
  add_row(countSamplesPatients('klepne')) %>%
  add_row(countSamplesPatients('encfcl')) %>%
  add_row(countSamplesPatients('enbcco')) %>%
  add_row(countSamplesPatients('strmgr')) %>%
  add_row(countSamplesPatients('pseaer')) %>%
  mutate(mo_name = mo_name(mo4d)) %>%
  relocate(mo_name, .after = mo4d)
 }
 if (F) {
 allTrue = function(x) { identical(x, rep(T, length(x))) }
 allVal = function(x, val) { identical(x, rep(val, length(x))) }
 whereVal = function(x, val) { which(x == val) }
 whereAll = function(x, val) {
  rx = which(x %>% map_lgl(~ allVal(., val))) %>% as.numeric() 
  ry = which(z %>% 
               rowwise() %>% 
               mutate(skdsjui3irskjdf = allVal(c_across(), val)) %>% 
               pull(skdsjui3irskjdf) 
  ) %>% as.numeric()
  return(list(cols = colnames(x)[rx], x = rx, y = ry))
 }
 whereAny = function(x, val) {
  rx = which(x %>% map_lgl(~any(. == val))) %>% as.numeric()
  ry = which(x %>%
               rowwise() %>%
               mutate(skdsjui3irskjdf = any(c_across() == val)) %>% 
               pull(skdsjui3irskjdf)) %>% as.numeric()
  return(list(cols = colnames(x)[rx], x = rx, y = ry))
 }
 whereR = radar_data %>% 
  filter(MO == 'mrsa') %>% 
  select(is.rsi) %>% 
  mutate(across(everything(), as.character)) %>% 
  whereAny('R')
 }