diff --git a/.gitignore b/.gitignore index f20a8b8..495a57e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1 @@ -.Rproj.user -.Rhistory -.RData -.Ruserdata -*.Rproj -.DS_Store -bk_2019.rds +copyFromNetwork.sh diff --git a/anonymize.R b/anonymize.R new file mode 100644 index 0000000..95baecf --- /dev/null +++ b/anonymize.R @@ -0,0 +1,56 @@ +require(tidyverse, quietly = T) +require(lubridate, quietly = T) +require(readr) +require(uuid, quietly = T) + +pinWidth = 7 + +intTopin = function(x) { + return(formatC(x, width = 7, format = "d", flag = "0")) +} + +# tPinMapping = tibble(origpin = intTopin(0), +# uupin = '78c3fe82-9343-4aa8-8b06-afc6da79a39a') +source('config.R', echo = F) +tPinMapping = read_rds(pinMappingDictFilename) + +getUupin = function(pin) { + if (length(pin) == 1) { + if (class(pin) == 'character') { + if (pin %in% tPinMapping$origpin) { + return((tPinMapping %>% filter(origpin == pin) %>% pull(uupin))) + } else { + if (str_length(pin) == pinWidth) { + newUupin = UUIDgenerate() + tPinMapping <<- tPinMapping %>% + add_row(origpin = pin, uupin = newUupin) + tPinMapping %>% write_rds('data/pinMapping.rds', compress = 'gz') + return(newUupin) + } else { + stop(paste0('pin width <> ', pinWidth)) + } + } + } else { + stop(paste0('class of pin should be character, but is ', class(pin))) + } + } else { + stop('Please supply exactly 1 pin') + } +} + +getOrigpin = function(uuId) { + tPinMapping %>% filter(uupin == uuId) %>% pull(origpin) +} + +anonymizeRds = function(x, ymdStartDate, ymdEndDate, suffix) { + y = x %>% + rowwise() %>% + mutate(patientid = getUupin(patientid)) %>% + ungroup() %>% + mutate(birth_date = ymd('1970-01-01')) + + filename = paste0('data/bk_', ymdStartDate, '_', ymdEndDate, '_', suffix, '.rds') + if (!file.exists(filename)) { + write_rds(y, file = filename) + } +} \ No newline at end of file diff --git a/config.R b/config.R new file mode 100644 index 0000000..7a16b4a --- /dev/null +++ b/config.R @@ -0,0 +1,6 @@ + +pinMappingDictFilename = '/share/mmbi/radar2/data/pinMapping.rds' + +# anonymized data for deployment +radarDataFilename = 'data/radarData.rds' # '/share/mmbi/radar2/interfaces/4D_final_results/data/radarData.rds' + diff --git a/copyGlimsData.R b/copyGlimsData.R new file mode 100644 index 0000000..6077b82 --- /dev/null +++ b/copyGlimsData.R @@ -0,0 +1,53 @@ +require(lubridate) +require(tidyverse) + +fromBasePath = '/share/transport_4d' +toBasePath = '/share/mmbi/radar2/data/archive' + +currDay = today() + +getTraverseDirectory = function(aDate) { + dirPattern = format(aDate, format = '%Y-%m') + dirsGlimsExport = list.dirs(path = fromBasePath, full.names = F) + + return(dirsGlimsExport[which(str_detect(dirsGlimsExport, dirPattern))]) + +} + +getFilesToCopy = function(aDate, aDir) { + filesGlimsExport = list.files(aDir) + + filePattern = format(aDate, format = '%y%m%d') + + return(filesGlimsExport[which(str_detect(filesGlimsExport, filePattern))]) +} + +copyFiles = function(aDate, overwrite = F, verbose = T) { + traverseDir = getTraverseDirectory(aDate) + fromDir = file.path(fromBasePath, traverseDir) + toDir = file.path(toBasePath, traverseDir) + filesToCopy = getFilesToCopy(aDate, fromDir) + if (verbose) + cat(paste0('\nCopying from ', fromDir, ' => ', toDir, ' ..\n')) + for (f in filesToCopy) { + copySuccess = file.copy(from = file.path(fromDir, f), + to = file.path(toDir, f), + overwrite = overwrite, + copy.mode = T, + copy.date = T) + cat(paste0(f, '\11', copySuccess, '\n')) + } +} + +copyFilesDateRange = function(nDays) { + ds = today() - nDays + d8s = ds + 1:nDays + for (d in 1:length(d8s)) { + d8 = d8s[d] + cat(paste0('copyFiles(', d8, ')\n')) + copyFiles(d8, verbose = F) + } +} + + +copyFilesDateRange(nDays = 3) diff --git a/createPatches.R b/createPatches.R new file mode 100644 index 0000000..4569f76 --- /dev/null +++ b/createPatches.R @@ -0,0 +1,22 @@ +require(tidyverse) +require(lubridate) + +source('globalData.R') +source('interfaces.R') # readGlimsArchive() + + +createPatches = function(tPeriods, dataPath, tFiles, tFileOvw) { + for (m in 1:nrow(tPeriods)) { + patch = readGlimsArchive( + ymdStartDate = tPeriods$startDate[m], + ymdEndDate = tPeriods$endDate[m], + readPreprocessed = F, + tFiles = tFiles, + tFileOvw = tFileOvw + ) + filename = paste0(dataPath,'/bk_', + tPeriods$startDate[m], '_', + tPeriods$endDate[m], '.rds') + write_rds(patch, filename, compress = 'gz') + } +} diff --git a/executeRead.R b/executeRead.R new file mode 100644 index 0000000..9ac07ee --- /dev/null +++ b/executeRead.R @@ -0,0 +1,68 @@ +source('interfaces.R') +source('createPatches.R') +source('stitchArchive.R') + +dataPath = 'data' +inspectRead = 10 +inspectReason = 'test selection of max IsolNr (only). mo and RIS can be NA' + +reads = read_rds(file.path(dataPath, 'reads.rds')) +reads = reads %>% add_row(read = inspectRead, desription = inspectReason) +write_rds(reads, file.path(dataPath, 'reads.rds')) + +tFiles = read_rds(file.path(dataPath, 'tFilesInfo.rds')) +tFileOvw = read_rds(file.path(dataPath, 'tFileOvw.rds')) + +updateFileOvw = F +if (updateFileOvw) { + # on 2022-10-20 tFiles was up-to-date, due to the daily update. + tFiles = read_rds(file.path(dataPath, 'tFilesInfo.rds')) + # However, tFileOvw somehow wasn't + + # get the last date that is present in the data sofar + maxDate = tFileOvw %>% pull(fileDate) %>% max(na.rm = T) + # catch up with today + nDaysToUpdate = difftime(today(), maxDate) %>% as.numeric() + daysToUpdate = (maxDate + 1:nDaysToUpdate) + + tInfoDays = tFiles %>% filter(fileDate %in% daysToUpdate) + tFileOvwToday = deriveFileOverview(tInfoDays) + a = assert_that(nrow(tFileOvwToday) > 0, + msg = 'derivederiveFileOverview() failed') + + tFileOvw = tFileOvw %>% ungroup() %>% + add_row(tFileOvwToday) %>% + filter(!is.na(fileDate)) + + # write_rds(tFiles, file.path(dataPath, 'tFilesInfo.rds')) Was up-to-date on 2022-10-20 + write_rds(tFileOvw, file.path(dataPath, 'tFileOvw.rds')) +} + +# define the patch periods +tPeriods = tribble( + ~startDate, ~endDate, + ymd('2022-01-01'), ymd('2022-02-01') - 1, + ymd('2022-02-01'), ymd('2022-03-01') - 1 + # ymd('2022-03-01'), ymd('2022-04-01') - 1, + # ymd('2022-04-01'), ymd('2022-05-01') - 1, + # ymd('2022-05-01'), ymd('2022-06-01') - 1, + # ymd('2022-06-01'), ymd('2022-07-01') - 1, + # ymd('2022-07-01'), ymd('2022-08-01') - 1, + # ymd('2022-08-01'), ymd('2022-09-01') - 1, + # ymd('2022-09-01'), ymd('2022-10-01') - 1, + # ymd('2022-10-01'), ymd('2022-10-20') - 1 + # ymd('2022-09-01'), ymd('2022-10-20') +) + +createPatches(tPeriods, dataPath, tFiles, tFileOvw) +# stitch together patches created by createPatches.R +# readAllPatches() assumes that all bk_*.rds files are in the 'data' path and +# that they all need to be read for stitching +tPatches = readAllPatches()# %>% filter(from >= '2022-01-01', to <= (ymd('2022-05-01') - 1)) +minFrom = min(tPatches$from, na.rm = T) +maxTo = max(tPatches$to, na.rm = T) +cat(paste0('Stitching patches: ', minFrom, ' - ', maxTo, '\n')) +tStitched = stitchPatches(tPatches, dataPath = 'data', reqColumns = abAbbrevs) +tStitched %>% write_rds(file = paste0('data/read_', inspectRead, '_', minFrom, '_', + maxTo, '_x', today(), '.rds'), + compress = 'gz') diff --git a/global.R b/global.R index 5d1d596..61fd637 100644 --- a/global.R +++ b/global.R @@ -18,7 +18,7 @@ library(cleaner) library(scales) library(ggiraph) -specialties <- read_csv("specialties.csv", lazy = FALSE) +source('interfaces.R') trsns <- read_tsv("translations.txt", lazy = FALSE) # to do: make system settings dependent @@ -30,17 +30,33 @@ trnslt <- function(english, new_lang = "nl", file = trsns) { } } -specialties <- specialties %>% +pathogens <- read_csv("data/grouping microorganisms.csv") + +# created by calling createAbMapping() +tAntibiotics = readxl::read_xlsx('data/ab_mapping.xlsx') %>% + select(Mnemonic, Naam, `EARS-Net.Mnemonic`, `EARS-Net.Name`) + +# radar_data <- readGlimsArchive(ymdStartDate = ymdStartDate, +# ymdEndDate = ymdEndDate, +# readPreprocessed = T) +radar_data <- readGlimsFinalResults() + +if (!'CRO' %in% colnames(radar_data)) + radar_data <- radar_data %>% add_column('CRO' = '') + +if (!'OXA' %in% colnames(radar_data)) + radar_data <- radar_data %>% add_column('OXA' = '') + +specialties <- read_csv("data/specialties.csv", lazy = FALSE) + +specialties <- specialties %>% mutate(specialty_shiny = if_else(specialty != "Outpatients", paste0(specialty, " (", str_replace_all(specialism, ";", ", "), ")"), "Outpatients"), department = if_else(is.na(department), "NO DEPARTMENT GIVEN", department)) - + specialties <- separate_rows(specialties, specialism, sep = ";") specialties <- separate_rows(specialties, department, sep = ";") -pathogens <- read_csv("grouping microorganisms.csv") - -radar_data <- readRDS("bk_2019.rds") radar_data <- radar_data %>% left_join(specialties) %>% @@ -50,10 +66,10 @@ radar_data <- radar_data %>% FLC = OXA) %>% left_join(pathogens) -# lab_ab <- c("CFCC", "CFCL", "CFOC", "CFSC", "CFTC", "COCL", "GEHI", "QUDA") +lab_ab <- c("CFCC", "CFCL", "CFOC", "CFSC", "CFTC", "COCL", "GEHI", "QUDA") radar_data <- radar_data %>% - # select_at(vars(!contains(lab_ab))) %>% + select_at(vars(!contains(lab_ab))) %>% rename_if(is.rsi, function(x) ifelse(is.na(suppressWarnings(as.ab(x))), x, suppressWarnings(as.ab(x)))) %>% diff --git a/globalData.R b/globalData.R new file mode 100644 index 0000000..a34ac7a --- /dev/null +++ b/globalData.R @@ -0,0 +1,12 @@ +abAbbrevs = c('AMB', 'AMC', 'AMK', 'AMP', 'AMX', 'ANI', 'ATM', 'AZA', 'CAS', 'CAZ', 'CCV', + 'CHL', 'CIP', 'CIX', 'CLF', 'CLI', 'COL', 'CPC', 'CRO', 'CTC', 'CTX', 'CXM', + 'CZA', 'CZO', 'CZT', 'DAP', 'DOX', 'ERY', 'FCT', 'FEP', 'FLC', 'FLU', + 'FOS', 'FOX', 'FOX1', 'FUS', 'GEN', 'GRI', 'IPM', 'ITR', 'KAN', 'LNZ', + 'LVX', 'MCZ', 'MEM', 'MFX', 'MIF', 'MNO', 'MTR', 'MUP', 'NAL', 'NIT', + 'NOR', 'PEN', 'PIP', 'PLB', 'POS', 'QDA', 'RIF', 'SAM', 'STR1', + 'SXT', 'TEC', 'TEM', 'TGC', 'TMP', 'TOB', 'TRB', 'TZP', 'VAN', 'VOR') + +patchFirstColumns = c('sampleid', 'date', 'is_clinical', 'is_icu', 'is_outward', + 'IsolNr', 'Materiaal', 'specimen_type', 'specimen_group', + 'patientid', 'birth_date', 'department', 'type_dept', + 'specialism', 'age', 'gender', 'MIC', 'RAP', 'mo') diff --git a/interface_4D.R b/interface_4D.R new file mode 100644 index 0000000..7279508 --- /dev/null +++ b/interface_4D.R @@ -0,0 +1,206 @@ +# initially only interfacing 4D data + +require(assertthat, quietly = T) + +source('4D_archive.R') +source('anonymize.R') +source('globalData.R') + + +readGlimsArchive = function(ymdStartDate, ymdEndDate, + readPreprocessed = T, + tFiles = NULL, tFileOvw = NULL) { + if (readPreprocessed) { + # return(read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds')) + # return(read_rds('data/total_2022-09-30.rds')) + return(read_rds('data/bk_2019.rds')) + # return(read_rds('data/bk_2022.09.01_2022.10.13.rds')) + # return(read_rds('data/archive_2022-01-01_2022-10-19_v2022-10-20_1530.rds')) # read_2_2022-10-20_1530 + # return(read_rds('data/read_10_2022-01-01_2022-10-25_x2022-10-25.rds')) + } else { + assert_that(!is.null(tFiles), + msg = 'readGlimsArchive(.., readPreprocessed = F, ..)') + tArchive = readArchive( + tFileInfo = tFiles %>% + filter( + fileDate >= ymdStartDate, + fileDate <= ymdEndDate + ), + tFileOvw = tFileOvw, + createDebugList = F # save memory + ) + + bk = NULL + if (!is.null(tArchive)) { + radarData <- tArchive %>% + # work towards RadaR nomenclature + rename(sampleid = Monsternummer, + patientid = PIN.Lot, + department = ligafdeling, + specialism = B, + mo = MO) %>% + # some processing towards RadaR format + mutate( + gender = Geslacht, + # case_when(Geslacht == 'M' ~ 'Male', + # Geslacht == 'V' ~ 'Female') %>% as.factor(), + type_dept = specialism, + birth_date = dmy(Geboortedatum), + age = trunc((birth_date %--% date) / years(1)), + Materiaal = replace_na(Materiaal, ''), + Materiaal = str_trim(Materiaal), + # Rapport = replace_na(Rapport, ''), + # Rapport = str_trim(Rapport), + specimen_group = 'bk', + specimen_type = '', + is_icu = is_icu(department), + is_clinical = is_clinical(department), + is_outward = is_outward(department) + ) %>% + # get AMR package (WHONET, EARS-Net std) mnemonics and names + left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>% + rename(ab.amr = `EARS-Net.Mnemonic`) %>% + # get the fields used in RadaR only + select( + sampleid, date, IsolNr, Materiaal, + specimen_group, specimen_type, + department, type_dept, specialism, + is_icu, is_clinical, is_outward, + birth_date, age, gender, patientid, mo, RIS, ab.amr, MIC, RAP + ) + + # return(radarData) + + # these sub steps are for debugging purposes + z <- radarData %>% + mutate(IsolNr = as.numeric(IsolNr)) %>% + arrange(sampleid, department) %>% + group_by(sampleid) %>% + # fill repeating data + fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% + filter(#!is.na(mo), + #!is.na(RIS), + IsolNr == max(IsolNr, na.rm = T)) %>% + ungroup() %>% + distinct(sampleid, date, IsolNr, mo, ab.amr, .keep_all = T) + # # get distinctive rows, arrange not strictly necessary + # # arrange(sampleid, date, department, type_dept, specialism, + # # birth_date, age, gender, patientid, mo, ab.amr, RIS) %>% + # # commented 2022-10-14 by GB in order to get the slicing done correctly + # # group_by(date, sampleid, IsolNr, department, type_dept, specialism, + # # birth_date, age, gender, patientid, mo, ab.amr) %>% + # group_by(date, sampleid, IsolNr, mo, ab.amr) %>% + # slice(n()) %>% + # ungroup() %>% + # distinct(date, sampleid, IsolNr, department, type_dept, specialism, + # birth_date, age, gender, patientid, mo, ab.amr, .keep_all = T) %>% + # # filter only reported rows, with a micro-organism and RIS value + # filter( + # !is.na(RAP) + # # !is.na(mo), + # # !is.na(RIS) + # ) + + return(z) + + # pivot to wider format + bk <- z %>% + filter(!is.na(RIS)) %>% + mutate( + RIS = as.rsi(RIS), + MIC = as.mic(MIC), + mo = as.mo(mo)) %>% + pivot_wider(names_from = ab.amr, values_from = RIS) %>% + select(order(colnames(.))) %>% + relocate(all_of(patchFirstColumns), .before = 1) + + + # bk = bk %>% + # mutate( + # across( + # (which(colnames(.) == 'mo') + 1) : length(colnames(.)), + # as.rsi) + # ) + } + return(bk) + } +} + + +readArchiveOfDay = function(tFiles, tFileOvw, ymdDate) { + readGlimsArchive(ymdStartDate = ymdDate, + ymdEndDate = ymdDate, + readPreprocessed = F, + tFiles = tFiles, + tFileOvw = tFileOvw) +} + +readArchiveToday = function(tFiles, tFileOvw) { + readGlimsArchive(ymdStartDate = today(), + ymdEndDate = today(), + readPreprocessed = F, + tFiles = tFiles, + tFileOvw = tFileOvw) +} + +# anonymize the dataset +if (F) { + nopd = readArchiveToday() + # nopd = nopd %>% + # filter(str_length(patientid) == 7) %>% + # rowwise() %>% + # mutate(patientid = getUupin(patientid)) %>% + # ungroup() %>% + # mutate(birth_date = ymd('1970-01-01')) + + + filename = paste0('data/bk_', ymdStartDate, '_', ymdEndDate, '.rds') + if (!file.exists(filename)) { + write_rds(nopd, file = filename) + } +} + +# stich together processed parts of the archive +if (F) { + tibble(name = list.files(path = 'data/', pattern = 'bk_.*_.*\\.rds')) +} + +# testing the slicing +if (F) { + ymdStartDate = ymd('2022-09-01') + + # find interesting cases + radarData %>% + arrange(sampleid, department) %>% + group_by(sampleid) %>% + fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% + ungroup() %>% + group_by(date, sampleid, IsolNr, mo, ab.amr) %>% + # slice(n()) %>% + ungroup() %>% + select(1:3, 13:ncol(.)) %>% + filter(ab.amr %in% c('CAZ', 'CTX')) %>% filter(!is.na(mo)) %>% count(date, sampleid, IsolNr, mo, ab.amr) %>% filter(n>1) + + # show without slicing last row + radarData %>% + arrange(sampleid, department) %>% + group_by(sampleid) %>% + fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% + ungroup() %>% + group_by(date, sampleid, IsolNr, mo, ab.amr) %>% + # slice(n()) %>% + select(1:3, 13:ncol(.)) %>% + filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX')) + + # show with slicing last row + radarData %>% + arrange(sampleid, department) %>% + group_by(sampleid) %>% + fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% + ungroup() %>% + group_by(date, sampleid, IsolNr, mo, ab.amr) %>% + slice(n()) %>% + select(1:3, 13:ncol(.)) %>% + filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX')) + +} \ No newline at end of file diff --git a/interface_4D_final_results.R b/interface_4D_final_results.R new file mode 100644 index 0000000..e23246b --- /dev/null +++ b/interface_4D_final_results.R @@ -0,0 +1,307 @@ +suppressMessages({ + require(tidyverse) + require(lubridate) + require(vroom) + require(writexl) + require(AMR) +}) + + +minMemoryAsap = F + + +# Rapport ab: +# altijd wel hebben, +# # = niet gerapporteerd +# +# Rap st: +# # = niet gerapporteerd, niet opnemen als niet gerapporteerd +# niet gerapporteerd kan fout zijn + +dataPath = 'interfaces/4D_final_results/data' +# Antibiotica data +fnAB = 'AB-2021-22.csv' +# Microorganism data +fnST = 'ST-Alles.csv' +# Monster data +fnMS = 'MSv2-2022-tm10.csv' +# Isolaattesten data +fnTS = 'TS-Alles-221130-092256.csv' +# Tests data +fnTM = 'TM-2022-tm10.csv' + +source('globalData.R') +source('interfaces.R') + +# temporarily put here for dev +tAntibiotics = readxl::read_xlsx('data/ab_mapping.xlsx') %>% + select(Mnemonic, Naam, `EARS-Net.Mnemonic`, `EARS-Net.Name`) + + +tAB = vroom(file = file.path(dataPath, fnAB), show_col_types = F) +tST = vroom(file = file.path(dataPath, fnST), show_col_types = F) +tMS = vroom(file = file.path(dataPath, fnMS), skip = 5, show_col_types = F) +tTS = vroom(file = file.path(dataPath, fnTS), show_col_types = F) +tTM = vroom(file = file.path(dataPath, fnTM), show_col_types = F) + +tAB = tAB %>% + filter(Monsternummer %>% str_sub(1, 2) == '22') %>% + mutate(Monsternummer = as.character(Monsternummer)) + +# gerapporteerde isolaten +tST = tST %>% + filter( + Monsternummer %>% str_sub(1, 2) == '22', + # '#' = niet gerapporteerd, deze niet includeren + # in ST betekent dit: alleen NA wel includeren + is.na(Rap) # = gerapporteert + ) %>% + select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>% + mutate(Monsternummer = as.character(Monsternummer)) + +tMS = tMS %>% + filter( + Monsternr %>% str_sub(1, 2) == '22', + ObjType != 'L' + # Materiaal == 'MP_BLOED' # 118922, 2023-02-07 + ) %>% + rename(Monsternummer = Monsternr, + patientid = `PIN-lot`) %>% + select(-`Materiaal Variabelen`, ObjType, Lot, Kamer) %>% + mutate(date = dmy_hms(paste(OntvOLabDt, as.character(Tijd)))) %>% + select(-OntvOLabDt, -Tijd) %>% + mutate(Monsternummer = as.character(Monsternummer)) + +tTS = tTS %>% + mutate(Monsternummer = as.character(Monsternummer)) + +# MS monsters die geen isolaat hebben zijn negatief +# vandaar nu de right_join +# Vervolgens moeten uit de MS file alleen de kweekbepalingen worden geincludeerd + +x = tMS %>% + mutate(Monsternummer = str_trim(Monsternummer)) %>% + left_join(tST, by = c('Monsternummer')) %>% + left_join(tAB, by = c('Monsternummer', 'Isolnr' = 'IsolNr')) %>% + left_join(tTS, + by = c('Monsternummer', 'Isolnr' = 'Isolaatnr'), + suffix = c('.MSxSTxAB', '.TS_IsolaatTest')) + +if (minMemoryAsap) { + # save memory + rm(tAB, tST, tMS) +} + +x = x %>% + # select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>% + mutate(year = str_sub(Monsternummer, 1, 2) %>% as.numeric(), + weeknr = str_sub(Monsternummer, 3, 4) %>% as.numeric()) %>% + # smaller subset in order to keep the first explorations workable + filter(year == 22) + # weeknr >= 41) + +s = x %>% + # filter(Materiaal == 'MP_BLOED') %>% # 118922, 2023-02-07 + # fields expected by RadaR: + # + # sampleid , date , specimen_group , specimen_type , + # department , type_dept , specialism , + # is_icu , is_clinical , is_outward , + # age , gender , patientid , mo , + # + # align column names + rename( + sampleid = Monsternummer, + # mo = MO, # later by as.mo(MO) + rap.st = Rap, + rap.ab = Rapport.MSxSTxAB, + department = LigAfd, + specialism = Spec, + age = Leeftijd, + gender = Geslacht + ) %>% + # add yet missing fields + add_column( + specimen_group = 'bk', + specimen_type = '', + type_dept = '' # needed? + ) %>% + mutate( + is_icu = is_icu(department), + is_clinical = is_clinical(department), + is_outward = is_outward(department), + ) %>% + relocate(rap.ab, .after = rap.st) %>% + relocate(c(year, weeknr, date), .after = sampleid) %>% + # get AMR package (WHONET, EARS-Net std) mnemonics and names + left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>% + rename( + AB_WHO = `EARS-Net.Mnemonic` + ) %>% + select(-AB) + +if (minMemoryAsap) { + # save memory + rm(x) +} + +# TODO: find diff in multiple rows per sampleid that yield io +if (F) { + s %>% filter(sampleid == '22010009431') %>% + select(sampleid, AB_WHO, RIS, MIC, MO) %>% + mutate( + RIS = as.rsi(RIS), + MIC = as.mic(MIC), + mo = as.mo(MO)) %>% + pivot_wider(names_from = AB_WHO, values_from = RIS) + # diff: rap.ab, Etst, Naam, AB_WHO, `EARS-Net.Name` +} + +x = s %>% + select(-`EARS-Net.Name`, -Naam, + # door de volgende waarden ontstaan dubbelingen in radarData, daarom + # tijdelijk wegfilteren. maar gaat later wel nodig zijn + -Rapport.TS_IsolaatTest, -Waarde.TS_IsolaatTest, -isolaattest, + -MIC, -Etst, -Diff, -rap.ab) %>% + # RIS values of 'V' are a pain in the neck + mutate(RIS = replace(RIS, RIS == 'V', NA)) %>% + group_by(sampleid, Isolnr, MO, AB_WHO) %>% + # mark rows to be deleted + mutate(deleteRow = n() > 1 & is.na(RIS) & !is.na(MO)) %>% + ungroup() %>% + # delete the rows that are marked with deleteRow + filter(!deleteRow) %>% + # remove column deleteRow + select(-deleteRow) %>% + # all multiple values should be absent now, so get distinct rows + distinct(sampleid, Isolnr, AB_WHO, RIS, .keep_all = T) + +y = x %>% + pivot_wider( + id_cols = c(sampleid, Isolnr, MO), + names_from = AB_WHO, + values_from = RIS + ) + +radarData = x %>% + # filter(!is.na(AB_WHO)) %>% + mutate( + RIS = as.rsi(RIS), + # MIC = as.mic(MIC), + mo = as.mo(MO)) %>% + pivot_wider(names_from = AB_WHO, values_from = RIS) %>% + # pivot_wider(names_from = Rapport.TS_IsolaatTest, + # values_from = Waarde.TS_IsolaatTest) + select(order(colnames(.))) %>% + relocate( + all_of( + c( + 'sampleid', 'Isolnr', 'patientid', 'year', 'weeknr', 'date', 'MMBProc', + 'department', 'type_dept', 'specialism', 'is_clinical', 'is_icu', 'is_outward', + 'specimen_type', 'specimen_group', + 'rap.st', + # 'rap.ab', + 'age', 'gender', + # 'Diff', 'Etst', + 'Waarde.MSxSTxAB', + # 'Waarde.TS_IsolaatTest', + # 'MIC', + 'MO', 'mo', 'Pos' + ) + ), + .before = 1 + ) %>% + select(-`NA`) + +if (minMemoryAsap) { + # save memory + rm(s) +} + +radarData = radarData %>% + filter(date >= '2022-01-01', date <= '2022-09-30') + + +if (F) { + # TODO je verwacht meer Neg dan Pos + # DONE hiervoor tMS left_join tST ipv andersom + radarData %>% filter(!is.na(Pos), MO == 'mrsa') %>% count(patientid) + radarData %>% filter(!is.na(Pos), MO == 'esccol') %>% count(patientid) + radarData %>% filter(!is.na(Pos), MO == 'pseaer') %>% count(patientid) + radarData %>% filter(!is.na(Pos), MO == 'klepne') %>% count(patientid) + radarData %>% filter(!is.na(Pos), MO == 'canalb') %>% count(patientid) + radarData %>% filter(!is.na(Pos), MO == 'enbcco') %>% count(patientid) + + radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'enbcco') %>% count(patientid) + radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'esccol') %>% count(patientid) + radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'klepne') %>% count(patientid) + radarData %>% filter(MO == 'klepne', GEN == 'R') %>% count(patientid) + radarData %>% filter(MO == 'klepne', CIP == 'R') %>% count(patientid) + radarData %>% filter(MO == 'klepne', MEM == 'R') %>% count(patientid) + radarData %>% filter(MO == 'staaur', CLI == 'R') %>% count(patientid) + + radarData %>% filter(is.na(MO), is.na(rap.st)) + + # rijen voor pat 8000005 in MS file: 36 + tMS %>% filter(patientid == '8000005') %>% count(Monsternummer) + # rijen voor pat 8000005 in ST file: 23 + tST %>% filter(Monsternummer %in% (tMS %>% filter(patientid == '8000005') %>% pull(Monsternummer) %>% unique())) %>% group_by(Monsternummer) %>% mutate(n = n()) %>% print(n=25) + # 22 * 36 = 792 + + + + x %>% filter(sampleid == '22070019631') %>% select(-year, -weeknr, -date, - patientid, -age, -ObjType, -Lot, -gender, -PC, -department, -Type, -Kamer, -Isolnr, MO, -Waarde.MSxSTxAB, -Pos, -rap.st, -MIC, -Diff, -isolaattest, -Waarde.TS_IsolaatTest, -Rapport.TS_IsolaatTest, -Afname, -is_icu, -is_clinical, -is_outward) %>% print(n=38) + + x %>% filter(sampleid == '22070019631', Isolnr == 3) %>% + # door deze waarden ontstaan dubbelingen in radarData, daarom + # tijdelijk wegfilteren. maar gaat later wel nodig zijn + # select(-MIC, -Etst, -Diff, -rap.ab) %>% + pivot_wider(names_from = AB_WHO, values_from = RIS) %>% view() + # pivot_wider(names_from = Rapport.TS_IsolaatTest, + # values_from = Waarde.TS_IsolaatTest) %>% view() + + listMultiValCols = function(x) { + z = x + colns = colnames(z) + collens = c() + for (c in 1:length(colns)) { + collens = c(collens, + z %>% + group_by(sampleid, Isolnr) %>% + pull(colns[c]) %>% unique() %>% length()) + } + multValCols = colns[which(collens != 1)] + return(z %>% select(sampleid, patientid, multValCols, RIS)) + } + + # dit AB levert nog steeds een List + x %>% filter(AB_WHO == 'CAZ') %>% count(sampleid, Isolnr, AB_WHO) %>% filter(n>1) + + x %>% + filter( + sampleid == '22070547731' %>% str_trim(), + AB_WHO == 'CAZ', + RIS != 'V' + ) %>% + select(sampleid, Isolnr, AB_WHO, RIS) + + key = c('sampleid', 'Isolnr') + y %>% filter(!is.na(FOX))%>% select(key, FOX) + + x %>% + select(sampleid, Isolnr, AB_WHO, RIS) %>% + pivot_wider(id_cols = c(sampleid, Isolnr), + names_from = AB_WHO, + values_from = RIS) %>% + select(1:5) + + + selectRelCols = function(x, s = c()) { + x %>% select(sampleid, Isolnr, MO, AB_WHO, RIS, all_of(s)) + } +} + + +# vroom_write(x = x, +# file = file.path(getwd(), dataPath, 'ST x AB_2022.csv'), +# delim = ';') diff --git a/interfaces.R b/interfaces.R new file mode 100644 index 0000000..b14022e --- /dev/null +++ b/interfaces.R @@ -0,0 +1,66 @@ +suppressMessages({ + require(tidyverse) + require(AMR) +}) + +source('config.R') +source('anonymize.R') + + +# moved from 4D_archive.R +createAbMapping = function() { + options(AMR_locale = 'nl') + abOpm = readxl::read_xlsx('data/ab_glims_2022-09-02.xlsx', + sheet = 1) + ab2 = readxl::read_xlsx('ab_glims_2022-09-02.xlsx', + sheet = 'codes uit Glims 2022-09-02') + + # first try to retrieve the AB from Naam + ab2 = ab2 %>% + select(Mnemonic, Naam, Toelichting) %>% + mutate(Naam = str_replace(Naam, 'clavulaanzuur', 'clavulanic acid'), + `EARS-Net.Mnemonic` = as.ab(Naam), + `EARS-Net.Name` = ab_name(Naam)) %>% + left_join(abOpm %>% select(name4d, Opm.Greetje), by = c('Mnemonic' = 'name4d')) + + # secondly, try to retrieve the AB from Mnemonic + ab2 = ab2 %>% + mutate(`EARS-Net.Mnemonic` = replace(`EARS-Net.Mnemonic`, + is.na(`EARS-Net.Mnemonic`), + as.ab(Mnemonic)), + `EARS-Net.Name` = replace(`EARS-Net.Name`, + is.na(`EARS-Net.Name`), ab_name(Mnemonic))) +} + +is_icu = function(departmentName) { + if (!is_empty(departmentName)) { + departmentName %in% + c('KINN', 'KINZ', 'KICW', 'KICN', 'KINW', # NICU IC + 'KICG', 'KICR', 'KICB', 'KICK', # Kinder IC + 'THIC', 'ICV2', # old and new name + 'NCIC', 'ICV1', # old and new name + 'E1IC', 'ICV3', # old and new name + 'CICA', 'CICB', 'CICD', 'CHIC', 'ICV4' # old and new name + ) + } else { + F + } +} + +is_clinical = function(departmentName) { + is_icu(departmentName) | + str_ends(departmentName, 'VA') | + departmentName %in% + c('L1SC', 'CIVZ', 'NEUC', 'L1KV', 'OZOK', 'BOA0', 'C1CC', 'BOB0') +} + +is_outward = function(departmentName) { + str_ends(departmentName, 'P') | + str_detect(departmentName, 'DB') +} + +readGlimsFinalResults = function() { + cat(paste0('Reading ', radarDataFilename, '\n')) + return(read_rds(radarDataFilename)) +} + diff --git a/interfaces/4D/src/4D_archive.R b/interfaces/4D/src/4D_archive.R new file mode 100644 index 0000000..77c60d0 --- /dev/null +++ b/interfaces/4D/src/4D_archive.R @@ -0,0 +1,299 @@ +require(tidyverse) +if (!require(lubridate)) { + install.packages('lubridate') + require(lubridate) +} +require(crayon) +require(stringr) + +# TODO: check wat ProcessAndMergeDailyFiles ingaat: Monsternummer is.character? +# TODO: min # rows = bijv. 10 i.v.m. privacy +# TODO: 2 versies: MMBI & 'Algemeen' + +memSize = function() { + result = tibble(object='', size=0, sizeMB=0) + objs = ls(envir = globalenv()) + for (i in 1:length(objs) ) { + objName = objs[i] + sizeBytes = as.numeric(object.size(get(objName))) + result = result %>% + add_row(object = objName, size = sizeBytes, sizeMB = sizeBytes / 1024^2) + } + result = result %>% + arrange(desc(size)) + return(list(result, sum(result$sizeMB))) +} + +gatherAbIds = function() { + archFolders = list.dirs(file.path(dataFolder, 'archive'), full.names = F) + archFolders = archFolders[str_length(archFolders) == 26] + for (archFolder in archFolders) { + folder = archFolder + abFiles = list.files(file.path(dataFolder, 'archive', folder), pattern = 'AB-*') + for (abFileName in abFiles) { + abFile = read_4D_file(file.path(dataFolder, 'archive', folder, abFileName)) + abNamesDict = tibble(name4d = abFile %>% pull(AB) %>% unique()) + newNamesDict = abNamesDict %>% filter(!name4d %in% a$name4d, !is.na(name4d)) + tNewNames = tibble(name4d = newNamesDict$name4d, + nameAMR = as.ab(newNamesDict$name4d), + nameAMRlong = ab_name(newNamesDict$name4d)) + a = a %>% add_row(tNewNames) + } + } + return(a) +} + +source('4D_day.R') + +useCachedFileInfo = T + +archivePath = './data/archive' +folders = list.files(path = archivePath, pattern = 'ImportedFromGlimms-*', full.names = T) + +addFileInfo = function(x, filePath, fileName) { + mtime = file.mtime(filePath) + + x = x %>% + add_row(filePath = filePath, + fileName = fileName, + fileType = str_sub(fileName, 1, 2), + fileDate = fileName %>% str_sub(4, 9) %>% ymd(), + modified = mtime, + # mdate = as.Date(ymd_hms(modified)), + size = file.size(filePath)) %>% + group_by(fileDate) %>% + mutate(nFilesPerDate = n()) %>% + ungroup() %>% + arrange(desc(fileDate)) + + return(x) +} + +deriveFileOverview = function(tFiles) { + tFiles %>% + group_by(fileDate) %>% + mutate(AB = cur_data() %>% + filter(str_sub(fileName, 1, 2) == 'AB') %>% + nrow(), + MS = cur_data() %>% + filter(str_sub(fileName, 1, 2) == 'MS') %>% + nrow(), + ST = cur_data() %>% + filter(str_sub(fileName, 1, 2) == 'ST') %>% + nrow(), + TM = cur_data() %>% + filter(str_sub(fileName, 1, 2) == 'TM') %>% + nrow(), + TS = cur_data() %>% + filter(str_sub(fileName, 1, 2) == 'TS') %>% + nrow() + ) %>% + slice(1) %>% + select(fileDate, AB, MS, ST, TM, TS) +} + +getFilesInfo = function() { + tFiles = tibble(filePath = 'deleteme', + fileName = '', + fileType = '', + fileDate = ymd('2022-01-01'), + modified = ymd('2022-01-01'), + size = 0 + ) + for (folder in folders) { + # if (folder == 'ImportedFromGlimms-2014-09') { + folderPath = folder #file.path(archivePath, folder) + files = list.files(folderPath, pattern = '*') + for (file in files) { + filePath = file.path(folderPath, file) + tFiles = tFiles %>% + addFileInfo(filePath = filePath, fileName = file) + } + } + tFiles = tFiles %>% + mutate(mdate = as.Date(ymd_hms(modified))) %>% + filter(filePath != 'deleteme') %>% + arrange(fileDate, fileName) + tFiles = tFiles %>% + left_join(tFiles %>% count(fileDate, name = 'nFilesPerDate'), by = 'fileDate') + + return(tFiles) +} + +addFileInfoOfDay = function(x, folders, ymdDate, overwrite = F) { + datePattern = format(ymdDate, format = '%y%m%d') + dirPattern = format(today(), format = '%Y-%m') + folder = folders[which(folders %>% str_detect(dirPattern))] + dayFiles = list.files(path = folder, pattern = datePattern) + for (fileName in dayFiles) { + cat(paste0('Processing ', fileName, '\n')) + x = x %>% addFileInfo(filePath = file.path(folder, fileName), + fileName = fileName) + } + + return(x) +} + +addFileInfoOfToday = function(tFiles, folders) { + browser() + dirPattern = format(today(), format = '%Y-%m') + folder = folders[which(folders %>% str_detect(dirPattern))] + tFiles = tFiles %>% + addFileInfoOfDay(folderPath = file.path(archivePath, folder), + ymdDate = today(), + overwrite = F) + return(tFiles) +} + +plotFileSize = function(tFiles) { + tFiles %>% + filter(fileDate > ymd('2014-10-01'), + fileDate <= ymd('2014-10-31'), + fileType != 'Ee') %>% + mutate(dayOfWeek = wday(fileDate), + dayOfWeekName = weekdays(fileDate), + size = size / (1024) + ) %>% + ggplot(aes(x = dayOfWeek, y = size, group = dayOfWeek)) + + geom_boxplot(aes(fill = fileType)) + + scale_y_continuous(limits = c(0, 1250)) + + facet_wrap(facets = c(~fileType)) + + labs(y = 'size [KB]') + + theme_light() + + theme( + plot.title = element_text(face = 'bold') + ) +} + +if (useCachedFileInfo) { + tFiles = read_rds('./data/tFiles_2022-10-13.rds') + tFileOvw = read_rds('./data/tFileOvw_2022-10-13.rds') +} else { + tFiles = getFilesInfo() + tFileOvw = deriveFileOverview(tFiles) + + write_rds(tFiles, paste0('./data/tFiles_', today(), '.rds')) + write_rds(tFileOvw, paste0('./data/tFileOvw_', today(), '.rds')) +} + +readDailyFile = function(tFiles, date, prefix) { + result = NULL + fileNames = tFiles %>% + filter(fileDate == date, + fileType == prefix) %>% + pull(filePath) + if (length(fileNames) > 0) { + for (f in 1:length(fileNames)) { + fileName = fileNames[f] + cat(silver(paste0('Reading ', fileName)), '\n') + fileContent = read_4D_file(fileName) + if (f == 1) { + result = fileContent + } else { + result = result %>% + add_row(fileContent) + } + } + } + return(result) +} + +readDailyMSFile = function(tFiles, date) { + result = NULL + fileNames = tFiles %>% + filter(fileDate == date, + fileType == 'MS') %>% + pull(filePath) + if (length(fileNames) > 0) { + for (f in 1:length(fileNames)) { + fileName = fileNames[f] + cat(silver(paste0('Reading ', fileName)), '\n') + msFileContent = read_4D_MS_file(fileName) + if (f == 1) { + result = msFileContent + } else { + result = result %>% + add_row(msFileContent) + } + } + } + return(result) + +} + +readDailyFiles = function(tFiles, date) { + abFile = readDailyFile(tFiles, date, 'AB') + msFile = readDailyMSFile(tFiles, date) + stFile = readDailyFile(tFiles, date, 'ST') + tmFile = readDailyFile(tFiles, date, 'TM') + tsFile = readDailyFile(tFiles, date, 'TS') + + return(list(abFile, msFile, stFile, tmFile, tsFile)) +} + +readArchive = function(tFileInfo, tFileOvw, createDebugList = FALSE) { + tMergeDates = NULL + tsDates = NULL + tMergeDatesDebugList = list() + dates = tFileInfo %>% filter(!is.na(fileDate)) %>% pull(fileDate) %>% unique() + for (d in 1:length(dates)) { + currDate = dates[d] + cat(paste0('\nd = ', d, ', date: ', currDate, '\n')) + print(tFileOvw %>% filter(fileDate == currDate)) + + dateFiles = readDailyFiles(tFileInfo, currDate) + + if (!is.null(dateFiles[[1]])) { + processed = ProcessAndMergeDailyFiles( + dateFiles[[1]] %>% select(-...15), + dateFiles[[2]], + dateFiles[[3]], + dateFiles[[4]], + dateFiles[[5]] + ) + + if (!is.null(processed$merged)) { + tMergeDate = processed$merged %>% # abFile, stFile and msFile joined + mutate(date = currDate, .before = 1) + if (createDebugList) tMergeDatesDebugList[[d]] = tMergeDate + + # separate tables of (ab|st|ms|ts|tm)File of the current day can be found + # in processed$(ab|st|ms|ts|tm)File, for instance: + if (!is.null(processed$tsFile)) { + tsDate = processed$tsFile %>% + mutate(date = currDate, .before = 1) + } + + if (is.null(tMergeDates)) { + if (nrow(tMergeDate) > 0) { + tMergeDates = tMergeDate + } + if (exists('tsDate')) { + if (nrow(tsDate) > 0) { + tsDates = tsDate + } + } + } else { + if (nrow(tMergeDates) > 0) { + tMergeDates = tMergeDates %>% add_row(tMergeDate) + } + if (exists('tsDates')) { + if (!is.null(tsDates)) { + if (nrow(tsDates)) { + tsDates = tsDates %>% add_row(tsDate) + } + } + } + } + tMergeDates = tMergeDates %>% + mutate(Monsternummer = str_trim(Monsternummer)) + } + } + } + + if (createDebugList) { + return(list(tMergeDates, tMergeDatesDebugList)) + } else { + return(tMergeDates) + } +} diff --git a/interfaces/4D/src/4D_day.R b/interfaces/4D/src/4D_day.R new file mode 100644 index 0000000..4766c54 --- /dev/null +++ b/interfaces/4D/src/4D_day.R @@ -0,0 +1,268 @@ +# initially only interfacing 4D data + +# TODO loggen zoveel mogelijk over ontvangen data (nieuwe GLIMS data) +# TODO pushen R scripts naar git server waar Gerhard en Kornelis bij kunnen + +# require(stringr) +require(tidyverse) +require(AMR) + +dataFolder = './data' + +get_most_recent_4D_file = function(path, prefix) { + files = list.files(dataFolder, paste0('^', prefix, '*')) + tFiles = tibble(name = files, + mtime = file.mtime(file.path(dataFolder, files))) %>% + arrange(desc(mtime)) + + tmpFilename = tFiles$name[1] + + return(tmpFilename) +} + +read_4D_file = function(path) { + tFile = read_tsv(path, col_types = paste0(rep('c', 30), collapse = '')) + + # remove comment lines + if (nrow(tFile) > 0) { + tFile = tFile %>% + slice( seq(4, nrow(tFile)) ) %>% + filter(str_sub(ID, 1, 3) != '___') + } + + if (colnames(tFile)[1] == 'X1') { + if (str_detect(path, 'AB-')) { + tFile = tibble(ID = '0', + Monsternummer = '', + IsolNr = '', + AB = '', + V = '', + PrefMIC ='', + MIC = '', + D = '', + DIF = '', + PrefEtest = '', + E = '', + ETEST = '', + RIS = '', + RAP = '', + ...15 = '') + } else if (str_detect(path, 'MS-')) { + tFile = tibble(UMCG.Monster= '0', + PIN.Lot= '0', + TypeObject= '0', + Lot= '0',Echtgenoot= '0', + AchternNaam= '0', + Geboortedatum= '0', + Geslacht= '0', + OntvOLabDatum= '0', + Tijdstip.materiaal.afgenomen= '0', + PC= '0', + Aanvrager= '0', + ligafdeling= '0', + A= '0', + B= '0', + C= '0', + Materiaal= '0', + Materiaal.variabelen= '0', + X = F) + } else if (str_detect(path, 'ST-')) { + tFile = tibble(ID = '0', + Monsternummer = '', + MMBProc = '', + Isolnr = '', + MO = '', + Waarde = '', + Pos = '', + Rap = '') + } else if (str_detect(path, 'TM-')) { + tFile = tibble(ID = '0', + Monsternummer = '', + Test = '', + Uitslag = '') + } else if (str_detect(path, 'TS-')) { + tFile = tibble(ID = '0', + Monsternummer = '', + Isolnr = '', + Test = '', + Waarde = '', + Rapport = '') + } + } + if ('ID' %in% colnames(tFile)) { + tFile = tFile %>% + filter(ID != '0') + } else if ('UMCG.Monster' %in% colnames(tFile)) { + tFile = tFile %>% + filter(UMCG.Monster != '0') + } + + return(tFile) +} + +read_4D_TM_file = function(path) { + tmFilename = str_remove(path, paste0(dirname(path), '/')) + tmFileL = readLines(path) + tmFileL = tmFileL[c(5:length(tmFileL))] + for (l in 1:length(tmFileL)) { + if (str_count(tmFileL[l], '\t') == 3) { + tmFileL = paste0(tmFileL, '\t') + } + } + newTmFilename = file.path(dirname(path), + tmFilename %>% str_replace('TM', 'bTM')) + if (file.exists(newTmFilename)) file.remove(newTmFilename) + writeLines(tmFileL, newTmFilename) + + tFile = read_tsv(newTmFilename, + col_names = c('ID','Monsternummer','Test','Status.test.tm','Uitslag'), + col_types = 'ccccc') + + # remove comment lines + tFile = tFile %>% + slice(c(5:(tFile %>% nrow()))) %>% + mutate( + Monsternummer = as.character(Monsternummer) + ) + return(tFile) +} + +get_most_recent_4D_MS_file = function(path) { + # get files + msFiles = list.files(path, '^MS*') + # create table with modification datetimes, for selection of the most recent one + tmsFiles = tibble(name = msFiles, + mtime = file.mtime(file.path(path, msFiles))) %>% + arrange(desc(mtime)) + # get the name of the most recently modified file + msFilename = tmsFiles$name[1] + + return(msFilename) +} + +read_4D_MS_file = function(path) { + # print(path) + msFilename = str_remove(path, paste0(dirname(path), '/')) + # read content + msFileL = readLines(path) + # remove lines with repeated '_', useful for the human eye only + msFileL = msFileL[str_sub(msFileL, 1, 5) != '_____'] + # replace (real) header + msHeader = readLines(file.path(dataFolder, 'headerMS.txt')) + msFileL[1] = paste0(msHeader, '\t') + # replace tabs within a date time combination with a space in line 2 and further, + # for example: replace '29/09/2021 14:05' by '29/09/2021 14:05' + # ↑ = '\t' ↑ = ' ' + for (i in 2:length(msFileL)) { + msFileL[i] = msFileL[i] %>% str_replace_all('(?<=[:digit:]{4})\t(?=[:digit:]{2}:[:digit:]{2})', ' ') + } + # write to new machine readable file + newMsFilename = file.path(dirname(path), + msFilename %>% str_replace('MS', 'bMS')) + if (file.exists(newMsFilename)) file.remove(newMsFilename) + writeLines(msFileL, newMsFilename) + # finally: read the machine readable file + result = read.delim(file = newMsFilename, sep = '\t') %>% + as_tibble() %>% + mutate( + UMCG.Monster = as.character(UMCG.Monster), + PIN.Lot = as.character(PIN.Lot), + Aanvrager = as.character(Aanvrager), + C = as.character(C) + ) + # Hurray! + return(result) +} + +# abFile = read_4D_file(dataFolder, 'AB') +# msFile = read_4D_MS_file(dataFolder) +# stFile = read_4D_file(dataFolder, 'ST') +# tmFile = read_4D_file(dataFolder, 'TM') +# tsFile = read_4D_file(dataFolder, 'TS') + +ProcessAndMergeDailyFiles = function(abFile, msFile, stFile, tmFile, tsFile) { + if (!is.null(abFile)) + abFile = abFile %>% + filter(!is.na(RAP)) #%>% + # mutate( + # #Monsternummer = as.character(Monsternummer) + # AB.amr = AMR::ab_property(AB, 'ab') %>% as.character() + # ) + + if (!is.null(stFile)) + stFile = stFile %>% + filter(!is.na(Rap)) %>% + # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>% + rename(IsolNr = Isolnr) + + if (!is.null(msFile)) + msFile = msFile %>% + rename(Monsternummer = UMCG.Monster) %>% + # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>% + select(-X) + + # if (!is.null(tmFile)) + # tmFile = tmFile %>% + # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) + + if (!is.null(tsFile)) + tsFile = tsFile %>% + filter(!is.na(Rapport)) + # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) + + if (!is.null(abFile) & !is.null(stFile)) + t_ab_st = abFile %>% + full_join(stFile, + by = c('Monsternummer', 'IsolNr'), + suffix = c('.ab', '.st')) + + if (exists('t_ab_st')) { + if (!is.null(t_ab_st) & !is.null(msFile)) { + t_ab_st_ms = t_ab_st %>% + full_join(msFile, by = 'Monsternummer') %>% + rename(Waarde.st = Waarde) + + return( + list( + merged = t_ab_st_ms, + abFile = abFile, + stFile = stFile, + msFile = msFile, + tsFile = tsFile, + tmFile = tmFile + ) + ) + + # t_ab_st_ms_ts = t_ab_st_ms %>% + # full_join(tsFile, by = c('Monsternummer', 'IsolNr' = 'Isolnr')) + # + # t_ab_st_ms_ts_tm = t_ab_st_ms_ts %>% + # add_column(Uitslag = NA, + # is_sampleResult = F) %>% + # add_row(Monsternummer = tmFile$Monsternummer, + # Test = tmFile$Test, + # Uitslag = tmFile$Uitslag, + # is_sampleResult = T) + # + # t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>% + # rename(Waarde.ts = Waarde) %>% + # filter(!Test %in% c('no_filter_yet')) + # + # # if ('X' %in% colnames(t_ab_st_ms_ts_tm)) { + # # t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>% + # # select(-'X') + # # print('Dit statement is voor debugging') + # # } + } + } + return( + list( + merged = NULL, + abFile = abFile, + stFile = stFile, + msFile = msFile, + tsFile = tsFile, + tmFile = tmFile + ) + ) +} diff --git a/interfaces/4D/src/debug weergave patho_plot.R b/interfaces/4D/src/debug weergave patho_plot.R new file mode 100644 index 0000000..128afac --- /dev/null +++ b/interfaces/4D/src/debug weergave patho_plot.R @@ -0,0 +1,26 @@ +require(tidyverse) +require(readr) + +v171 = read_rds('v171.rds') +v180 = read_rds('v180.rds') + +new_v180 = v180 %>% anti_join(v171 %>% left_join(v180)) + +source('interfaces.R') + +d = readGlimsArchive(NULL, NULL, T, NULL, NULL) +d %>% filter(str_starts(mo, 'B_ES')) %>% count(mo, CXM) +# deze aantallen zie ik niet terug in wat de applicatie presenteert: +# Ceftazidim: R (705), SI (2705), totaal dus 3410 + +# misschien dat data_select() voor andere aantallen zorgt? +data_select = d %>% + # filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>% + filter_first_isolate(col_patient_id = "patientid", episode_days = 30) %>% + mutate(mo = as.mo(mo, Becker = TRUE)) + +data_select %>% filter(str_starts(mo, 'B_ES')) %>% count(mo, CXM) + +# Ja dus! + + diff --git a/interfaces/4D/src/debug_inspect for data validation.R b/interfaces/4D/src/debug_inspect for data validation.R new file mode 100644 index 0000000..1f3f9d9 --- /dev/null +++ b/interfaces/4D/src/debug_inspect for data validation.R @@ -0,0 +1,37 @@ +require(tidyverse) + +inspectRead = 10 +dataPath = 'data' + +abNames = read_rds('data/abNames.rds') + +filename = list.files(dataPath, paste0('read_', inspectRead, '_*')) +d = read_rds(file.path(dataPath, filename)) #%>% + # left_join(abNames, by = c('ab.amr' = 'nameAMR')) +# list a sampleid that occurs more often but not too often +examples = d %>% count(sampleid) %>% arrange(desc(n)) %>% filter(n < 50) + +view_d = d %>% + select(sampleid, date, IsolNr, MIC, mo, RIS, ab.amr, RAP) %>% #, name4d, nameAMRlong) %>% + mutate(asmo = suppressWarnings(as.mo(mo))) %>% + arrange(sampleid, IsolNr) #%>% + # filter(sampleid == examples$sampleid[i]) + +has_ecoli = view_d %>% filter(str_starts(asmo, 'B_ES')) +examples_having_ecoli = has_ecoli %>% + filter(sampleid %in% examples$sampleid) %>% + pull(sampleid) %>% + unique() + +e = 2 +view(view_d %>% + filter(sampleid == examples_having_ecoli[e]) %>% + mutate(IsolNr = as.numeric(IsolNr)) %>% + arrange(IsolNr),# %>% + #distinct(sampleid, date, IsolNr, mo, ab.amr, .keep_all = T), + title = 'example') + +if (F) { + view(view_d) + view_d %>% filter(!is.na(mo)) %>% pull(mo) %>% unique() +} diff --git a/reads.R b/reads.R new file mode 100644 index 0000000..b68ac96 --- /dev/null +++ b/reads.R @@ -0,0 +1,7 @@ +require(tidyverse) + +reads = tribble( + ~read, ~desription, + 3, 'returned z', + 4, 'returned radarData' +) diff --git a/server.R b/server.R index 67d5f4b..4cece77 100644 --- a/server.R +++ b/server.R @@ -2,18 +2,24 @@ server <- function(input, output, session) { + hideTab(inputId = 'RadaRtabs', target = 'Data') + + output$disclaimerText = renderText('you must confirm understanding the disclaimer') # define data selection --------------------------------------------------- - data_select <- reactive({if (input$box2.6_first != 365) { radar_data %>% - filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>% + filter(specialty_shiny %in% input$specialtyInput & + department %in% input$departmentInput & + Materiaal %in% input$materialInput) %>% filter_first_isolate(col_patient_id = "patientid", episode_days = input$box2.6_first) %>% mutate(mo = as.mo(mo, Becker = TRUE)) } else { radar_data_first %>% - filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) + filter(specialty_shiny %in% input$specialtyInput & + department %in% input$departmentInput & + Materiaal %in% input$materialInput) } }) @@ -39,12 +45,16 @@ server <- function(input, output, session) { observe({ x <- input$specialtyInput - if (!is.null(x)) { - update_departments <- - radar_data %>% filter(specialty_shiny %in% input$specialtyInput) - x <- sort(unique(update_departments$department)) - } - else { + if (input$allDepartments) { + if (!is.null(x)) { + update_departments <- + radar_data %>% filter(specialty_shiny %in% input$specialtyInput) + x <- sort(unique(update_departments$department)) + } + else { + x <- character(0) + } + } else { x <- character(0) } updateCheckboxGroupInput( @@ -56,6 +66,12 @@ server <- function(input, output, session) { ) }) + observeEvent(input$btnDisclaimerUnderstood, { + if (input$cbDisclaimerRead == T) { + showTab(inputId = 'RadaRtabs', target = 'Data', select = T) + } + }) + # sidebar hover ----------------------------------------------------------- @@ -82,7 +98,7 @@ server <- function(input, output, session) { choiceNames = c(trnslt("All"), trnslt("Year"), trnslt("Gender"), trnslt("Department"), trnslt("Specialty"), trnslt("Specialty code"), trnslt("ICU status"), trnslt("Clinical status"), trnslt("Outward status")), choiceValues = c("group_all", "year", "gender", "department", "specialty", "specialism", "is_icu", "is_clinical", "is_outward"), selected = "group_all", - direction = "vertical", + direction = "vertical", ), size = "xs", icon = icon("gear", class = "opt"), @@ -104,7 +120,8 @@ server <- function(input, output, session) { color = "#d33724", size = 0.7 ) - ),tabPanel( + ), + tabPanel( title = "Isolates (and pathogens) detected", div( style = "position: absolute; left: 0.5em; bottom: 0.5em;", @@ -312,7 +329,9 @@ server <- function(input, output, session) { pos_neg_plot <- reactive({ pos_neg_plot <- radar_data %>% - filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>% + filter(specialty_shiny %in% input$specialtyInput & + department %in% input$departmentInput & + Materiaal %in% input$materialInput) %>% group_by_at(input$box2.1_group) %>% summarise(patients = n_distinct(patientid), total = n_distinct(sampleid), @@ -413,7 +432,6 @@ server <- function(input, output, session) { facet_wrap(input$box2.3_group) }} + NULL - }) output$patho_plot <- renderGirafe({ @@ -443,7 +461,7 @@ server <- function(input, output, session) { group_by_at(input$box2.2_group) %>% summarise("14 days" = sum(first_14, na.rm = TRUE), "30 days" = sum(first_30, na.rm = TRUE), - "60 days" = sum(first_60, na.rm = TRUE)) + "60 days" = sum(first_60, na.rm = TRUE)) %>% pivot_longer(cols = c("14 days", "30 days", "60 days")) }) @@ -479,7 +497,7 @@ server <- function(input, output, session) { output$box1 <- renderUI({ div( - style = "position: relative", + style = "position: relative;", tabBox( id = "box1", width = NULL, @@ -687,6 +705,8 @@ server <- function(input, output, session) { }} } + cat('Implicit join in get_resistance_df(..) @server.R:692\n') + get_resistance_df(data_select(), mo_selected = mo_selected, abx = if (mo_genus(mo_selected) %in% mo_genus(c("E. coli", @@ -728,9 +748,12 @@ server <- function(input, output, session) { theme_void() } else { - ggplot(isolate_prop_data(), aes(value, reorder(antibiotic, value_r), fill = factor(interpretation, levels = c("SI", "R")), - tooltip = paste0("R = ", value_r, " (", percent_r, ")\n", "SI = ", value_si, " (", percent_si, ")") - )) + + ggplot(isolate_prop_data(), + aes(value, reorder(antibiotic, value_r), + fill = factor(interpretation, levels = c("SI", "R")), + tooltip = paste0("R = ", value_r, " (", percent_r, ")\n", + "SI = ", value_si, " (", percent_si, ")") + )) + geom_col_interactive(colour = "black", position = "fill") + scale_fill_manual(limits = c("R", "SI"), @@ -993,14 +1016,18 @@ server <- function(input, output, session) { # isolate_prop_data() %>% if (input$box2 == "Positive & negative cultures") { all <- radar_data %>% - filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>% + filter(specialty_shiny %in% input$specialtyInput & + department %in% input$departmentInput & + Materiaal %in% input$materialInput) %>% rename(group = input$box2.1_group) %>% filter(!is.na(group)) %>% distinct(patientid, .keep_all = TRUE) %>% count(group) %>% rename(n_all = n) pos <- radar_data %>% - filter(specialty_shiny %in% input$specialtyInput & department %in% input$departmentInput) %>% + filter(specialty_shiny %in% input$specialtyInput & + department %in% input$departmentInput & + Materiaal %in% input$materialInput) %>% rename(group = input$box2.1_group) %>% group_by(group) %>% # group_by_at("gender") %>% diff --git a/stitchArchive.R b/stitchArchive.R new file mode 100644 index 0000000..ef04333 --- /dev/null +++ b/stitchArchive.R @@ -0,0 +1,143 @@ +require(tidyverse) + +source('globalData.R') +reqColumns = abAbbrevs + +patchFirstColumns = c('sampleid', 'date', 'is_clinical', 'is_icu', 'is_outward', + 'IsolNr', 'Materiaal', 'specimen_type', 'specimen_group', + 'patientid', 'birth_date', 'department', 'type_dept', + 'specialism', 'age', 'gender', 'MIC', 'RAP', 'mo') + +readPatch = function(filename, reqColumns) { + patch = read_rds(filename) + for (c in 1:length(reqColumns)) { + if (!reqColumns[c] %in% colnames(patch)) { + patch = patch %>% + add_column(!!reqColumns[c] := as.rsi(NA)) %>% + select(order(colnames(.))) %>% + relocate(all_of(patchFirstColumns), .before = 1) + } + } + return(patch) +} + +readAllPatches = function() { + tPatches = tibble(name = list.files(path = 'data', pattern = 'bk_.*\\.rds')) + + tPatches = tPatches %>% + separate(name, into = c('prefix', 'from', 'tail'), sep = '_') %>% + separate(tail, into = c('to', 'ext'), sep = '\\.') %>% + filter(!is.na(to), !str_detect(to, '[:alpha:]')) %>% + mutate(name = paste0(prefix, '_', from, '_', to, '.', ext)) %>% + arrange(from, to) + + return(tPatches) +} + +stitchPatches = function(tPatches, dataPath = 'data', reqColumns) { + for (i in 1:nrow(tPatches)) { + # patch = read_rds(file.path(dataPath, tPatches[i,]$name)) + # for (c in 1:length(reqColumns)) { + # if (!reqColumns[c] %in% colnames(patch)) { patch = patch %>% add_column(!!reqColumns[c] := is.rsi(NA)) } + # } + cat(paste0(' patch ', i, ': ', tPatches$from[i], ' ', tPatches$to[i], '\n')) + patch = readPatch(filename = file.path(dataPath, tPatches$name[i]), + reqColumns = reqColumns) + # print(colnames(patch)) + if (i == 1) { + tStitched = patch + } else { + tStitched = tStitched %>% add_row(patch) + } + } + tStitched = tStitched %>% arrange(date, sampleid) + + return(tStitched) +} + +appendDailyData = function(fnTotal, fnTotalSofar, fnDay, reqColumns) { + assert_that(file.exists(fnTotalSofar))#, + # msg = paste0("File ", fnTotalSofar, " doesn't exist")) + assert_that(file.exists(fnDay)) + + tTotalSofar = readPatch(fnTotalSofar, reqColumns = reqColumns) + tDay = readPatch(fnDay, reqColumns = reqColumns) + + tTotal = tTotalSofar %>% add_row(tDay) + + write_rds(x = tTotal, file = fnTotal, compress = 'gz') + + return(tTotal) +} + +manualStitch_2022_09_30 = function() { + a = + mutate(RAP = as.character('#')) %>% # correct for accidental inclusion in as.rsi conversion + select(order(colnames(.))) %>% + relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr, + Materiaal, specimen_type, specimen_group, uuid, patientid, + old_patientid, birth_date, old_birth_date, department, + type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1) + # a = a %>% mutate(across(AMC:VOR, as.rsi)) was already done before + + b = readPatch('bk_2022-06-17_2022-09-29.rds', reqColumns) %>% + select(order(colnames(.))) %>% + relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr, + Materiaal, specimen_type, specimen_group, patientid, + birth_date, department, + type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1) + b = b %>% mutate(across(AMC:VOR, as.rsi)) + + tTotal = a %>% + add_row(b) %>% + mutate(MIC = as.mic(MIC), + mo = as.mo(mo)) + # Warning: Problem with `mutate()` column `mo`. + # ℹ `mo = as.mo(mo)`. + # ℹ + # in `as.mo()`: nine unique values (covering 0.0%) could not be coerced and were considered 'unknown': "}hemstre", "}Primair", + # "acgsch", "anrhyd", "bclmeg", "BK_Isolaat_AEF", "corjimi", "strsalg" and "vvStrpne". + # Use `mo_failures()` to review them. Edit the `allow_uncertain` argument if needed (see ?as.mo). + # You can also use your own reference data with set_mo_source() or directly, e.g.: + # as.mo("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI")) + # mo_name("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI")) + # Warning message: + # Problem with `mutate()` column `MIC`. + # ℹ `MIC = as.mic(MIC)`. + # ℹ in `as.mic()`: 3 results truncated (0%) that were invalid MICs: "?", "13" and "17090355431" + + write_rds(x = tTotal, file = 'data/total_2022-09-29.rds', compress = 'gz') +} + +lastLargePatchesStitching = function() manualStitch_2022_09_30() + +# > mo_uncertainties() +# Matching scores (in blue) are based on pathogenicity in humans and the resemblance between the input and the full taxonomic name. +# See `?mo_matching_score`. +# +# "brelut" -> Brevundimonas lutea (B_BRVND_LUTE, 0.316) +# Also matched: Brevibacterium luteolum (0.304) +# +# "vvEncspp" -> Brevibacterium ravenspurgense (B_BCTRM_RVNS, 0.284) + +# read 1 +if (F) { + tPatches = readAllPatches() + tStitched = stitchPatches(tPatches) + tStitched %>% write_rds(file = 'data/bk_archive_2014-08_2022-06_v2022-09-15.rds', + compress = 'gz') + + x = read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15.rds') + x = x %>% relocate(all_of(cns), .after = 'date') %>% + relocate(specimen_group, .after = specimen_type) %>% + relocate(MIC, .after = mo) + y = x %>% mutate(across(AMB:DAP, as.rsi)) + z = y %>% mutate(mo = as.mo(mo)) + mo_failures = c('}AeNeSt', '}hemstre', '}nietfe', '}Primair', 'acgsan', 'acgsch', 'bclmeg', 'bctfae', 'BK_Isolaat_AEF', 'BK_Isolaat_ANF', 'BK_Isolaat_PEF', 'chcgle', 'corjimi', 'enbbug', 'encfcmva', 'Meng', 'psspsy', 'staaurar', 'stainp', 'strsalg', 'surwad', 'vvAeNeSt', 'vvAePoSt', 'vvMRSA') + + write_rds(z, 'data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds', + compress = 'gz') + write_rds(z %>% filter(date >= '2021-07-01'), 'data/bk_archive_2021-07_2022-06_v2022-09-23_3.rds', + compress = 'gz') +} + diff --git a/ui.R b/ui.R index a6a7e20..29c381f 100644 --- a/ui.R +++ b/ui.R @@ -1,14 +1,22 @@ +jsc <- ' +$(document).ready(function () { + $("#allInput").parent().parent().parent().parent().css({ "width": "600px" }); + $("#specialtyInput").css({ "width": "590px" }); + $("#materialInput").parent().parent().css({ "width": "200px" }); + $("#materialInput").css({ "width": "190px" }); +}); +' ui <- dashboardPage( title = "RadaR2", - skin = "black", + skin = "black", # HEADER ------------------------------------------------------------------ - + dashboardHeader( title = span(img(src = "radar.png", height = 30), strong("RadaR2")), - titleWidth = 500, - + titleWidth = 358, + tags$li( a( strong(trnslt("About RadaR2")), @@ -20,32 +28,65 @@ ui <- dashboardPage( class = "dropdown", ) ), - + # SIDEBAR ----------------------------------------------------------------- - - dashboardSidebar(disable = TRUE, - sidebarMenu(id = "sidebar_id", - menuItem(icon = icon("check-square"), - materialSwitch( - inputId = "allInput", - label = trnslt("All specialties selected"), - value = TRUE, - status = "danger")), - menuItem(icon = icon("user-nurse"), - checkboxGroupInput(inputId = "specialtyInput", - label = trnslt("Select specialties"), - choices = sort(unique(radar_data$specialty_shiny)))), - menuItem(icon = icon("hospital"), - checkboxGroupInput(inputId = "departmentInput", - label = trnslt("Select departments"), - choices = sort(unique(radar_data$department)))) - ) - ), - + + dashboardSidebar( + disable = TRUE, + width = 400, + sidebarMenu( + id = "sidebar_id", + menuItem( + icon = icon("user-nurse"), + div( + materialSwitch( + inputId = "allInput", + label = trnslt("All specialties selected"), + value = TRUE, + status = "danger" + ), + checkboxGroupInput( + # tags$head(tags$style("width: 400px; max-width: 400px;")), + inputId = "specialtyInput", + label = trnslt("Select specialties"), + choices = sort(unique(radar_data$specialty_shiny)), + ) + ) + ), + menuItem( + icon = icon("hospital"), + div( + materialSwitch( + inputId = "allDepartments", + label = trnslt("All deparmtents selected"), + value = TRUE, + status = "danger" + ), + checkboxGroupInput( + inputId = "departmentInput", + label = trnslt("Select departments"), + choices = sort(unique(radar_data$department)) + ) + ) + ), + menuItem( + icon = icon("flask"), + div( + checkboxGroupInput( + inputId = "materialInput", + label = trnslt("Select material"), + choices = sort(unique(radar_data$Materiaal)) + ) + ) + ) + ) + ), + # BODY -------------------------------------------------------------------- dashboardBody( + tags$head(tags$script(jsc)), # fluidRow( # column = 12, @@ -59,24 +100,62 @@ ui <- dashboardPage( ".nav-tabs {background-color: white;} .nav-tabs-custom .nav-tabs li.active:hover a, .nav-tabs-custom .nav-tabs li.active a {background-color: transparent; border-color: transparent;} .nav-tabs-custom .nav-tabs li.active {border-top-color: black;}"), - div( - id = "resistance", - column( - width = 6, - uiOutput("box2") - + tabBox( + id = 'RadaRtabs', + width = '100%', + # column(width = 6, 'Instructie'), + # column(width = 6, 'Disclaimer, blabla blab la bla lbal bla lbal blablal', style="height: 300px;") + tabPanel( + 'Start', + div( + h3('Disclaimer'), + paste('blabla bla bla, legal legal legal.', + 'Ok, someone needs to write some wise stuff here.', + "Like only use for surveillance, not intended for ", + "individual patients' clinical support, etc. etc. ", + "Yadda yadda yadda"), + hr() + ), + fluidRow( + column(2, checkboxInput('cbDisclaimerRead', "I understand"), + style = 'margin-left: 10px;'), + column(2, actionButton('btnDisclaimerUnderstood', + label = 'confirm', + icon = icon("legal", class = "opt"))) + ), + fluidRow( + hr() + ), + div( + h3('Instruction'), + tags$ul(tags$li('do this'), + tags$li('do that'), + tags$li('do such'), + tags$li('do so')) + ), ), - column( - width = 6, - uiOutput("box1") - ), - column( - width = 6, - uiOutput("box3") - ), - column( - width = 6, - uiOutput("box4") + tabPanel( + 'Data', + style = "width: 100%;", + div( + id = "resistance", + column( + width = 6, + uiOutput("box2") + ), + column( + width = 6, + uiOutput("box1") + ), + column( + width = 6, + uiOutput("box3") + ), + column( + width = 6, + uiOutput("box4") + ) + ) ) ) ) diff --git a/updateGlimsData.R b/updateGlimsData.R new file mode 100644 index 0000000..14767d8 --- /dev/null +++ b/updateGlimsData.R @@ -0,0 +1,82 @@ +require(readr) +require(lubridate) + +cat(paste0('\14\n', now(), '\n')) + +oldDir = getwd() +basePath = '/share/mmbi/radar2' +setwd(basePath) + +source('interfaces.R') +source('stitchArchive.R') +source('4D_archive.R') + + +dataPath = 'data' +archivePath = 'data/archive' + +tFiles = read_rds(file.path(dataPath, 'tFilesInfo.rds')) +tFileOvw = read_rds(file.path(dataPath, 'tFileOvw.rds')) + +if (today() >= '2022-09-30') { + require(assertthat, quietly = T) + + # get the last date that is present in the data sofar + maxDate = tFiles %>% pull(fileDate) %>% max(na.rm = T) + # catch up with today + nDaysToUpdate = difftime(today(), maxDate) %>% as.numeric() + daysToUpdate = (maxDate + 1:nDaysToUpdate) + for (d in 1:nDaysToUpdate) { + currDate = daysToUpdate[d] + # aDate is converted to Excel style numeric format with origin '1970-01-01' + # currDate = as.Date(aDate, origin = lubridate::origin) + tFiles = tFiles %>% + filter(!is.na(fileDate)) %>% + addFileInfoOfDay(folders = folders, + ymdDate = currDate) + tInfoDay = tFiles %>% filter(fileDate == currDate) + a = assert_that(nrow(tInfoDay) > 0, + msg = paste0('tInfoDay has 0 rows: no files present for ', + currDate, '?')) + + } + + tInfoDays = tFiles %>% filter(fileDate %in% daysToUpdate) + tFileOvwToday = deriveFileOverview(tInfoDays) + a = assert_that(nrow(tFileOvwToday) > 0, + msg = 'derivederiveFileOverview() failed') + + tFileOvw = tFileOvw %>% ungroup() %>% + add_row(tFileOvwToday) %>% + filter(!is.na(fileDate)) + + write_rds(tFiles, file.path(dataPath, 'tFilesInfo.rds')) + write_rds(tFileOvw, file.path(dataPath, 'tFileOvw.rds')) + + # write GLIMMS data per date in 'daysToUpdate' and add it to the data sofar + for (d in 1:nDaysToUpdate) { + currDate = daysToUpdate[d] + tGlimmsOfDay = readArchiveOfDay(tFiles, tFileOvw, ymdDate = currDate) + a = assert_that(nrow(tGlimmsOfDay) > 0, + msg = paste0('readArchiveDay() yielded empty result for ', + currDate)) + filename = paste0(dataPath,'/bk_', currDate, '_', currDate, '.rds') + write_rds(tGlimmsOfDay, filename, compress = 'gz') + + filenameTotalSofar = paste0(dataPath, '/total_', currDate - 1, '.rds') + filenameTotal = paste0(dataPath, '/total_', currDate, '.rds') + filenameOfDay = filename + + tTotal = appendDailyData(fnTotal = filenameTotal, + fnTotalSofar = filenameTotalSofar, + fnDay = filenameOfDay, + reqColumns = abAbbrevs) + # nNewRows = tTotal %>% filter(date == today()) %>% nrow() + + # write_rds(tTotal, file = filenameTotal, compress = 'gz') + } +} + + + +setwd(oldDir) \ No newline at end of file diff --git a/validatie.R b/validatie.R new file mode 100644 index 0000000..a50e7b9 --- /dev/null +++ b/validatie.R @@ -0,0 +1,95 @@ +source('global.R') + +if (F) { + +# CoNS + radar_data_first %>% + mutate(f = mo_name(mo)) %>% + filter(f == 'Coagulase-negative Staphylococcus (CoNS)') %>% + pull(MO) %>% + unique() %>% + mo_name() + # versus + radar_data %>% + mutate(f = mo_name(mo)) %>% + filter(f == 'Coagulase-negative Staphylococcus (CoNS)') %>% + pull(MO) %>% + unique() %>% + mo_name() + # conclusie: Becker = TRUE maakt het verschil + + +# Fluclox onder SA +radar_data_first %>% + select(sampleid, patientid, Isolnr, date, MO, mo, FLC) %>% + filter(!is.na(FLC), MO == 'staaur') %>% count(patientid, Isolnr, FLC) + # conclusie: 212 samples, 142 patienten + +radar_data_first %>% + select(sampleid, patientid, Isolnr, date, MO, mo) %>% + filter(MO == 'staaur') %>% + count(patientid) +# 93 samples, 91 patienten + +countSamplesPatients = function(moName4d) { + nSamples = radar_data_first %>% + select(sampleid, patientid, Isolnr, date, MO, mo) %>% + filter(MO == moName4d) %>% + count(sampleid) %>% + nrow() + nPatients = radar_data_first %>% + select(sampleid, patientid, Isolnr, date, MO, mo) %>% + filter(MO == moName4d) %>% + count(patientid) %>% + nrow() + return(tribble(~mo4d, ~nSamples, ~nPatients, + moName4d, nSamples, nPatients)) +} + +t = countSamplesPatients('staaur') %>% + add_row(countSamplesPatients('esccol')) %>% + add_row(countSamplesPatients('klepne')) %>% + add_row(countSamplesPatients('encfcl')) %>% + add_row(countSamplesPatients('enbcco')) %>% + add_row(countSamplesPatients('strmgr')) %>% + add_row(countSamplesPatients('pseaer')) %>% + mutate(mo_name = mo_name(mo4d)) %>% + relocate(mo_name, .after = mo4d) +} + + +if (F) { + +allTrue = function(x) { identical(x, rep(T, length(x))) } + +allVal = function(x, val) { identical(x, rep(val, length(x))) } + +whereVal = function(x, val) { which(x == val) } + +whereAll = function(x, val) { + rx = which(x %>% map_lgl(~ allVal(., val))) %>% as.numeric() + ry = which(z %>% + rowwise() %>% + mutate(skdsjui3irskjdf = allVal(c_across(), val)) %>% + pull(skdsjui3irskjdf) + ) %>% as.numeric() + return(list(cols = colnames(x)[rx], x = rx, y = ry)) +} + +whereAny = function(x, val) { + rx = which(x %>% map_lgl(~any(. == val))) %>% as.numeric() + ry = which(x %>% + rowwise() %>% + mutate(skdsjui3irskjdf = any(c_across() == val)) %>% + pull(skdsjui3irskjdf)) %>% as.numeric() + return(list(cols = colnames(x)[rx], x = rx, y = ry)) +} + +whereR = radar_data %>% + filter(MO == 'mrsa') %>% + select(is.rsi) %>% + mutate(across(everything(), as.character)) %>% + whereAny('R') + + +} \ No newline at end of file