206 lines
6.9 KiB
R
206 lines
6.9 KiB
R
|
# initially only interfacing 4D data
|
||
|
|
||
|
require(assertthat, quietly = T)
|
||
|
|
||
|
source('4D_archive.R')
|
||
|
source('anonymize.R')
|
||
|
source('globalData.R')
|
||
|
|
||
|
|
||
|
readGlimsArchive = function(ymdStartDate, ymdEndDate,
|
||
|
readPreprocessed = T,
|
||
|
tFiles = NULL, tFileOvw = NULL) {
|
||
|
if (readPreprocessed) {
|
||
|
# return(read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds'))
|
||
|
# return(read_rds('data/total_2022-09-30.rds'))
|
||
|
return(read_rds('data/bk_2019.rds'))
|
||
|
# return(read_rds('data/bk_2022.09.01_2022.10.13.rds'))
|
||
|
# return(read_rds('data/archive_2022-01-01_2022-10-19_v2022-10-20_1530.rds')) # read_2_2022-10-20_1530
|
||
|
# return(read_rds('data/read_10_2022-01-01_2022-10-25_x2022-10-25.rds'))
|
||
|
} else {
|
||
|
assert_that(!is.null(tFiles),
|
||
|
msg = 'readGlimsArchive(.., readPreprocessed = F, ..)')
|
||
|
tArchive = readArchive(
|
||
|
tFileInfo = tFiles %>%
|
||
|
filter(
|
||
|
fileDate >= ymdStartDate,
|
||
|
fileDate <= ymdEndDate
|
||
|
),
|
||
|
tFileOvw = tFileOvw,
|
||
|
createDebugList = F # save memory
|
||
|
)
|
||
|
|
||
|
bk = NULL
|
||
|
if (!is.null(tArchive)) {
|
||
|
radarData <- tArchive %>%
|
||
|
# work towards RadaR nomenclature
|
||
|
rename(sampleid = Monsternummer,
|
||
|
patientid = PIN.Lot,
|
||
|
department = ligafdeling,
|
||
|
specialism = B,
|
||
|
mo = MO) %>%
|
||
|
# some processing towards RadaR format
|
||
|
mutate(
|
||
|
gender = Geslacht,
|
||
|
# case_when(Geslacht == 'M' ~ 'Male',
|
||
|
# Geslacht == 'V' ~ 'Female') %>% as.factor(),
|
||
|
type_dept = specialism,
|
||
|
birth_date = dmy(Geboortedatum),
|
||
|
age = trunc((birth_date %--% date) / years(1)),
|
||
|
Materiaal = replace_na(Materiaal, ''),
|
||
|
Materiaal = str_trim(Materiaal),
|
||
|
# Rapport = replace_na(Rapport, ''),
|
||
|
# Rapport = str_trim(Rapport),
|
||
|
specimen_group = 'bk',
|
||
|
specimen_type = '',
|
||
|
is_icu = is_icu(department),
|
||
|
is_clinical = is_clinical(department),
|
||
|
is_outward = is_outward(department)
|
||
|
) %>%
|
||
|
# get AMR package (WHONET, EARS-Net std) mnemonics and names
|
||
|
left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>%
|
||
|
rename(ab.amr = `EARS-Net.Mnemonic`) %>%
|
||
|
# get the fields used in RadaR only
|
||
|
select(
|
||
|
sampleid, date, IsolNr, Materiaal,
|
||
|
specimen_group, specimen_type,
|
||
|
department, type_dept, specialism,
|
||
|
is_icu, is_clinical, is_outward,
|
||
|
birth_date, age, gender, patientid, mo, RIS, ab.amr, MIC, RAP
|
||
|
)
|
||
|
|
||
|
# return(radarData)
|
||
|
|
||
|
# these sub steps are for debugging purposes
|
||
|
z <- radarData %>%
|
||
|
mutate(IsolNr = as.numeric(IsolNr)) %>%
|
||
|
arrange(sampleid, department) %>%
|
||
|
group_by(sampleid) %>%
|
||
|
# fill repeating data
|
||
|
fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
|
||
|
filter(#!is.na(mo),
|
||
|
#!is.na(RIS),
|
||
|
IsolNr == max(IsolNr, na.rm = T)) %>%
|
||
|
ungroup() %>%
|
||
|
distinct(sampleid, date, IsolNr, mo, ab.amr, .keep_all = T)
|
||
|
# # get distinctive rows, arrange not strictly necessary
|
||
|
# # arrange(sampleid, date, department, type_dept, specialism,
|
||
|
# # birth_date, age, gender, patientid, mo, ab.amr, RIS) %>%
|
||
|
# # commented 2022-10-14 by GB in order to get the slicing done correctly
|
||
|
# # group_by(date, sampleid, IsolNr, department, type_dept, specialism,
|
||
|
# # birth_date, age, gender, patientid, mo, ab.amr) %>%
|
||
|
# group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
|
||
|
# slice(n()) %>%
|
||
|
# ungroup() %>%
|
||
|
# distinct(date, sampleid, IsolNr, department, type_dept, specialism,
|
||
|
# birth_date, age, gender, patientid, mo, ab.amr, .keep_all = T) %>%
|
||
|
# # filter only reported rows, with a micro-organism and RIS value
|
||
|
# filter(
|
||
|
# !is.na(RAP)
|
||
|
# # !is.na(mo),
|
||
|
# # !is.na(RIS)
|
||
|
# )
|
||
|
|
||
|
return(z)
|
||
|
|
||
|
# pivot to wider format
|
||
|
bk <- z %>%
|
||
|
filter(!is.na(RIS)) %>%
|
||
|
mutate(
|
||
|
RIS = as.rsi(RIS),
|
||
|
MIC = as.mic(MIC),
|
||
|
mo = as.mo(mo)) %>%
|
||
|
pivot_wider(names_from = ab.amr, values_from = RIS) %>%
|
||
|
select(order(colnames(.))) %>%
|
||
|
relocate(all_of(patchFirstColumns), .before = 1)
|
||
|
|
||
|
|
||
|
# bk = bk %>%
|
||
|
# mutate(
|
||
|
# across(
|
||
|
# (which(colnames(.) == 'mo') + 1) : length(colnames(.)),
|
||
|
# as.rsi)
|
||
|
# )
|
||
|
}
|
||
|
return(bk)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
readArchiveOfDay = function(tFiles, tFileOvw, ymdDate) {
|
||
|
readGlimsArchive(ymdStartDate = ymdDate,
|
||
|
ymdEndDate = ymdDate,
|
||
|
readPreprocessed = F,
|
||
|
tFiles = tFiles,
|
||
|
tFileOvw = tFileOvw)
|
||
|
}
|
||
|
|
||
|
readArchiveToday = function(tFiles, tFileOvw) {
|
||
|
readGlimsArchive(ymdStartDate = today(),
|
||
|
ymdEndDate = today(),
|
||
|
readPreprocessed = F,
|
||
|
tFiles = tFiles,
|
||
|
tFileOvw = tFileOvw)
|
||
|
}
|
||
|
|
||
|
# anonymize the dataset
|
||
|
if (F) {
|
||
|
nopd = readArchiveToday()
|
||
|
# nopd = nopd %>%
|
||
|
# filter(str_length(patientid) == 7) %>%
|
||
|
# rowwise() %>%
|
||
|
# mutate(patientid = getUupin(patientid)) %>%
|
||
|
# ungroup() %>%
|
||
|
# mutate(birth_date = ymd('1970-01-01'))
|
||
|
|
||
|
|
||
|
filename = paste0('data/bk_', ymdStartDate, '_', ymdEndDate, '.rds')
|
||
|
if (!file.exists(filename)) {
|
||
|
write_rds(nopd, file = filename)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# stich together processed parts of the archive
|
||
|
if (F) {
|
||
|
tibble(name = list.files(path = 'data/', pattern = 'bk_.*_.*\\.rds'))
|
||
|
}
|
||
|
|
||
|
# testing the slicing
|
||
|
if (F) {
|
||
|
ymdStartDate = ymd('2022-09-01')
|
||
|
|
||
|
# find interesting cases
|
||
|
radarData %>%
|
||
|
arrange(sampleid, department) %>%
|
||
|
group_by(sampleid) %>%
|
||
|
fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
|
||
|
ungroup() %>%
|
||
|
group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
|
||
|
# slice(n()) %>%
|
||
|
ungroup() %>%
|
||
|
select(1:3, 13:ncol(.)) %>%
|
||
|
filter(ab.amr %in% c('CAZ', 'CTX')) %>% filter(!is.na(mo)) %>% count(date, sampleid, IsolNr, mo, ab.amr) %>% filter(n>1)
|
||
|
|
||
|
# show without slicing last row
|
||
|
radarData %>%
|
||
|
arrange(sampleid, department) %>%
|
||
|
group_by(sampleid) %>%
|
||
|
fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
|
||
|
ungroup() %>%
|
||
|
group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
|
||
|
# slice(n()) %>%
|
||
|
select(1:3, 13:ncol(.)) %>%
|
||
|
filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))
|
||
|
|
||
|
# show with slicing last row
|
||
|
radarData %>%
|
||
|
arrange(sampleid, department) %>%
|
||
|
group_by(sampleid) %>%
|
||
|
fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
|
||
|
ungroup() %>%
|
||
|
group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
|
||
|
slice(n()) %>%
|
||
|
select(1:3, 13:ncol(.)) %>%
|
||
|
filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))
|
||
|
|
||
|
}
|