# initially only interfacing 4D data require(assertthat, quietly = T) source('4D_archive.R') source('anonymize.R') source('globalData.R') readGlimsArchive = function(ymdStartDate, ymdEndDate, readPreprocessed = T, tFiles = NULL, tFileOvw = NULL) { if (readPreprocessed) { # return(read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds')) # return(read_rds('data/total_2022-09-30.rds')) return(read_rds('data/bk_2019.rds')) # return(read_rds('data/bk_2022.09.01_2022.10.13.rds')) # return(read_rds('data/archive_2022-01-01_2022-10-19_v2022-10-20_1530.rds')) # read_2_2022-10-20_1530 # return(read_rds('data/read_10_2022-01-01_2022-10-25_x2022-10-25.rds')) } else { assert_that(!is.null(tFiles), msg = 'readGlimsArchive(.., readPreprocessed = F, ..)') tArchive = readArchive( tFileInfo = tFiles %>% filter( fileDate >= ymdStartDate, fileDate <= ymdEndDate ), tFileOvw = tFileOvw, createDebugList = F # save memory ) bk = NULL if (!is.null(tArchive)) { radarData <- tArchive %>% # work towards RadaR nomenclature rename(sampleid = Monsternummer, patientid = PIN.Lot, department = ligafdeling, specialism = B, mo = MO) %>% # some processing towards RadaR format mutate( gender = Geslacht, # case_when(Geslacht == 'M' ~ 'Male', # Geslacht == 'V' ~ 'Female') %>% as.factor(), type_dept = specialism, birth_date = dmy(Geboortedatum), age = trunc((birth_date %--% date) / years(1)), Materiaal = replace_na(Materiaal, ''), Materiaal = str_trim(Materiaal), # Rapport = replace_na(Rapport, ''), # Rapport = str_trim(Rapport), specimen_group = 'bk', specimen_type = '', is_icu = is_icu(department), is_clinical = is_clinical(department), is_outward = is_outward(department) ) %>% # get AMR package (WHONET, EARS-Net std) mnemonics and names left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>% rename(ab.amr = `EARS-Net.Mnemonic`) %>% # get the fields used in RadaR only select( sampleid, date, IsolNr, Materiaal, specimen_group, specimen_type, department, type_dept, specialism, is_icu, is_clinical, is_outward, birth_date, age, gender, patientid, mo, RIS, ab.amr, MIC, RAP ) # return(radarData) # these sub steps are for debugging purposes z <- radarData %>% mutate(IsolNr = as.numeric(IsolNr)) %>% arrange(sampleid, department) %>% group_by(sampleid) %>% # fill repeating data fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% filter(#!is.na(mo), #!is.na(RIS), IsolNr == max(IsolNr, na.rm = T)) %>% ungroup() %>% distinct(sampleid, date, IsolNr, mo, ab.amr, .keep_all = T) # # get distinctive rows, arrange not strictly necessary # # arrange(sampleid, date, department, type_dept, specialism, # # birth_date, age, gender, patientid, mo, ab.amr, RIS) %>% # # commented 2022-10-14 by GB in order to get the slicing done correctly # # group_by(date, sampleid, IsolNr, department, type_dept, specialism, # # birth_date, age, gender, patientid, mo, ab.amr) %>% # group_by(date, sampleid, IsolNr, mo, ab.amr) %>% # slice(n()) %>% # ungroup() %>% # distinct(date, sampleid, IsolNr, department, type_dept, specialism, # birth_date, age, gender, patientid, mo, ab.amr, .keep_all = T) %>% # # filter only reported rows, with a micro-organism and RIS value # filter( # !is.na(RAP) # # !is.na(mo), # # !is.na(RIS) # ) return(z) # pivot to wider format bk <- z %>% filter(!is.na(RIS)) %>% mutate( RIS = as.rsi(RIS), MIC = as.mic(MIC), mo = as.mo(mo)) %>% pivot_wider(names_from = ab.amr, values_from = RIS) %>% select(order(colnames(.))) %>% relocate(all_of(patchFirstColumns), .before = 1) # bk = bk %>% # mutate( # across( # (which(colnames(.) == 'mo') + 1) : length(colnames(.)), # as.rsi) # ) } return(bk) } } readArchiveOfDay = function(tFiles, tFileOvw, ymdDate) { readGlimsArchive(ymdStartDate = ymdDate, ymdEndDate = ymdDate, readPreprocessed = F, tFiles = tFiles, tFileOvw = tFileOvw) } readArchiveToday = function(tFiles, tFileOvw) { readGlimsArchive(ymdStartDate = today(), ymdEndDate = today(), readPreprocessed = F, tFiles = tFiles, tFileOvw = tFileOvw) } # anonymize the dataset if (F) { nopd = readArchiveToday() # nopd = nopd %>% # filter(str_length(patientid) == 7) %>% # rowwise() %>% # mutate(patientid = getUupin(patientid)) %>% # ungroup() %>% # mutate(birth_date = ymd('1970-01-01')) filename = paste0('data/bk_', ymdStartDate, '_', ymdEndDate, '.rds') if (!file.exists(filename)) { write_rds(nopd, file = filename) } } # stich together processed parts of the archive if (F) { tibble(name = list.files(path = 'data/', pattern = 'bk_.*_.*\\.rds')) } # testing the slicing if (F) { ymdStartDate = ymd('2022-09-01') # find interesting cases radarData %>% arrange(sampleid, department) %>% group_by(sampleid) %>% fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% ungroup() %>% group_by(date, sampleid, IsolNr, mo, ab.amr) %>% # slice(n()) %>% ungroup() %>% select(1:3, 13:ncol(.)) %>% filter(ab.amr %in% c('CAZ', 'CTX')) %>% filter(!is.na(mo)) %>% count(date, sampleid, IsolNr, mo, ab.amr) %>% filter(n>1) # show without slicing last row radarData %>% arrange(sampleid, department) %>% group_by(sampleid) %>% fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% ungroup() %>% group_by(date, sampleid, IsolNr, mo, ab.amr) %>% # slice(n()) %>% select(1:3, 13:ncol(.)) %>% filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX')) # show with slicing last row radarData %>% arrange(sampleid, department) %>% group_by(sampleid) %>% fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% ungroup() %>% group_by(date, sampleid, IsolNr, mo, ab.amr) %>% slice(n()) %>% select(1:3, 13:ncol(.)) %>% filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX')) }