radar2/interface_4D.R
2023-02-07 16:49:16 +01:00

206 lines
6.9 KiB
R

# initially only interfacing 4D data
require(assertthat, quietly = T)
source('4D_archive.R')
source('anonymize.R')
source('globalData.R')
readGlimsArchive = function(ymdStartDate, ymdEndDate,
readPreprocessed = T,
tFiles = NULL, tFileOvw = NULL) {
if (readPreprocessed) {
# return(read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds'))
# return(read_rds('data/total_2022-09-30.rds'))
return(read_rds('data/bk_2019.rds'))
# return(read_rds('data/bk_2022.09.01_2022.10.13.rds'))
# return(read_rds('data/archive_2022-01-01_2022-10-19_v2022-10-20_1530.rds')) # read_2_2022-10-20_1530
# return(read_rds('data/read_10_2022-01-01_2022-10-25_x2022-10-25.rds'))
} else {
assert_that(!is.null(tFiles),
msg = 'readGlimsArchive(.., readPreprocessed = F, ..)')
tArchive = readArchive(
tFileInfo = tFiles %>%
filter(
fileDate >= ymdStartDate,
fileDate <= ymdEndDate
),
tFileOvw = tFileOvw,
createDebugList = F # save memory
)
bk = NULL
if (!is.null(tArchive)) {
radarData <- tArchive %>%
# work towards RadaR nomenclature
rename(sampleid = Monsternummer,
patientid = PIN.Lot,
department = ligafdeling,
specialism = B,
mo = MO) %>%
# some processing towards RadaR format
mutate(
gender = Geslacht,
# case_when(Geslacht == 'M' ~ 'Male',
# Geslacht == 'V' ~ 'Female') %>% as.factor(),
type_dept = specialism,
birth_date = dmy(Geboortedatum),
age = trunc((birth_date %--% date) / years(1)),
Materiaal = replace_na(Materiaal, ''),
Materiaal = str_trim(Materiaal),
# Rapport = replace_na(Rapport, ''),
# Rapport = str_trim(Rapport),
specimen_group = 'bk',
specimen_type = '',
is_icu = is_icu(department),
is_clinical = is_clinical(department),
is_outward = is_outward(department)
) %>%
# get AMR package (WHONET, EARS-Net std) mnemonics and names
left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>%
rename(ab.amr = `EARS-Net.Mnemonic`) %>%
# get the fields used in RadaR only
select(
sampleid, date, IsolNr, Materiaal,
specimen_group, specimen_type,
department, type_dept, specialism,
is_icu, is_clinical, is_outward,
birth_date, age, gender, patientid, mo, RIS, ab.amr, MIC, RAP
)
# return(radarData)
# these sub steps are for debugging purposes
z <- radarData %>%
mutate(IsolNr = as.numeric(IsolNr)) %>%
arrange(sampleid, department) %>%
group_by(sampleid) %>%
# fill repeating data
fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
filter(#!is.na(mo),
#!is.na(RIS),
IsolNr == max(IsolNr, na.rm = T)) %>%
ungroup() %>%
distinct(sampleid, date, IsolNr, mo, ab.amr, .keep_all = T)
# # get distinctive rows, arrange not strictly necessary
# # arrange(sampleid, date, department, type_dept, specialism,
# # birth_date, age, gender, patientid, mo, ab.amr, RIS) %>%
# # commented 2022-10-14 by GB in order to get the slicing done correctly
# # group_by(date, sampleid, IsolNr, department, type_dept, specialism,
# # birth_date, age, gender, patientid, mo, ab.amr) %>%
# group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
# slice(n()) %>%
# ungroup() %>%
# distinct(date, sampleid, IsolNr, department, type_dept, specialism,
# birth_date, age, gender, patientid, mo, ab.amr, .keep_all = T) %>%
# # filter only reported rows, with a micro-organism and RIS value
# filter(
# !is.na(RAP)
# # !is.na(mo),
# # !is.na(RIS)
# )
return(z)
# pivot to wider format
bk <- z %>%
filter(!is.na(RIS)) %>%
mutate(
RIS = as.rsi(RIS),
MIC = as.mic(MIC),
mo = as.mo(mo)) %>%
pivot_wider(names_from = ab.amr, values_from = RIS) %>%
select(order(colnames(.))) %>%
relocate(all_of(patchFirstColumns), .before = 1)
# bk = bk %>%
# mutate(
# across(
# (which(colnames(.) == 'mo') + 1) : length(colnames(.)),
# as.rsi)
# )
}
return(bk)
}
}
readArchiveOfDay = function(tFiles, tFileOvw, ymdDate) {
readGlimsArchive(ymdStartDate = ymdDate,
ymdEndDate = ymdDate,
readPreprocessed = F,
tFiles = tFiles,
tFileOvw = tFileOvw)
}
readArchiveToday = function(tFiles, tFileOvw) {
readGlimsArchive(ymdStartDate = today(),
ymdEndDate = today(),
readPreprocessed = F,
tFiles = tFiles,
tFileOvw = tFileOvw)
}
# anonymize the dataset
if (F) {
nopd = readArchiveToday()
# nopd = nopd %>%
# filter(str_length(patientid) == 7) %>%
# rowwise() %>%
# mutate(patientid = getUupin(patientid)) %>%
# ungroup() %>%
# mutate(birth_date = ymd('1970-01-01'))
filename = paste0('data/bk_', ymdStartDate, '_', ymdEndDate, '.rds')
if (!file.exists(filename)) {
write_rds(nopd, file = filename)
}
}
# stich together processed parts of the archive
if (F) {
tibble(name = list.files(path = 'data/', pattern = 'bk_.*_.*\\.rds'))
}
# testing the slicing
if (F) {
ymdStartDate = ymd('2022-09-01')
# find interesting cases
radarData %>%
arrange(sampleid, department) %>%
group_by(sampleid) %>%
fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
ungroup() %>%
group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
# slice(n()) %>%
ungroup() %>%
select(1:3, 13:ncol(.)) %>%
filter(ab.amr %in% c('CAZ', 'CTX')) %>% filter(!is.na(mo)) %>% count(date, sampleid, IsolNr, mo, ab.amr) %>% filter(n>1)
# show without slicing last row
radarData %>%
arrange(sampleid, department) %>%
group_by(sampleid) %>%
fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
ungroup() %>%
group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
# slice(n()) %>%
select(1:3, 13:ncol(.)) %>%
filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))
# show with slicing last row
radarData %>%
arrange(sampleid, department) %>%
group_by(sampleid) %>%
fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
ungroup() %>%
group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
slice(n()) %>%
select(1:3, 13:ncol(.)) %>%
filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))
}