radar2/interface_4D_final_results.R

308 lines
9.8 KiB
R

suppressMessages({
require(tidyverse)
require(lubridate)
require(vroom)
require(writexl)
require(AMR)
})
minMemoryAsap = F
# Rapport ab:
# altijd wel hebben,
# # = niet gerapporteerd
#
# Rap st:
# # = niet gerapporteerd, niet opnemen als niet gerapporteerd
# niet gerapporteerd kan fout zijn
dataPath = 'interfaces/4D_final_results/data'
# Antibiotica data
fnAB = 'AB-2021-22.csv'
# Microorganism data
fnST = 'ST-Alles.csv'
# Monster data
fnMS = 'MSv2-2022-tm10.csv'
# Isolaattesten data
fnTS = 'TS-Alles-221130-092256.csv'
# Tests data
fnTM = 'TM-2022-tm10.csv'
source('globalData.R')
source('interfaces.R')
# temporarily put here for dev
tAntibiotics = readxl::read_xlsx('data/ab_mapping.xlsx') %>%
select(Mnemonic, Naam, `EARS-Net.Mnemonic`, `EARS-Net.Name`)
tAB = vroom(file = file.path(dataPath, fnAB), show_col_types = F)
tST = vroom(file = file.path(dataPath, fnST), show_col_types = F)
tMS = vroom(file = file.path(dataPath, fnMS), skip = 5, show_col_types = F)
tTS = vroom(file = file.path(dataPath, fnTS), show_col_types = F)
tTM = vroom(file = file.path(dataPath, fnTM), show_col_types = F)
tAB = tAB %>%
filter(Monsternummer %>% str_sub(1, 2) == '22') %>%
mutate(Monsternummer = as.character(Monsternummer))
# gerapporteerde isolaten
tST = tST %>%
filter(
Monsternummer %>% str_sub(1, 2) == '22',
# '#' = niet gerapporteerd, deze niet includeren
# in ST betekent dit: alleen NA wel includeren
is.na(Rap) # = gerapporteert
) %>%
select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>%
mutate(Monsternummer = as.character(Monsternummer))
tMS = tMS %>%
filter(
Monsternr %>% str_sub(1, 2) == '22',
ObjType != 'L'
# Materiaal == 'MP_BLOED' # 118922, 2023-02-07
) %>%
rename(Monsternummer = Monsternr,
patientid = `PIN-lot`) %>%
select(-`Materiaal Variabelen`, ObjType, Lot, Kamer) %>%
mutate(date = dmy_hms(paste(OntvOLabDt, as.character(Tijd)))) %>%
select(-OntvOLabDt, -Tijd) %>%
mutate(Monsternummer = as.character(Monsternummer))
tTS = tTS %>%
mutate(Monsternummer = as.character(Monsternummer))
# MS monsters die geen isolaat hebben zijn negatief
# vandaar nu de right_join
# Vervolgens moeten uit de MS file alleen de kweekbepalingen worden geincludeerd
x = tMS %>%
mutate(Monsternummer = str_trim(Monsternummer)) %>%
left_join(tST, by = c('Monsternummer')) %>%
left_join(tAB, by = c('Monsternummer', 'Isolnr' = 'IsolNr')) %>%
left_join(tTS,
by = c('Monsternummer', 'Isolnr' = 'Isolaatnr'),
suffix = c('.MSxSTxAB', '.TS_IsolaatTest'))
if (minMemoryAsap) {
# save memory
rm(tAB, tST, tMS)
}
x = x %>%
# select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>%
mutate(year = str_sub(Monsternummer, 1, 2) %>% as.numeric(),
weeknr = str_sub(Monsternummer, 3, 4) %>% as.numeric()) %>%
# smaller subset in order to keep the first explorations workable
filter(year == 22)
# weeknr >= 41)
s = x %>%
# filter(Materiaal == 'MP_BLOED') %>% # 118922, 2023-02-07
# fields expected by RadaR:
#
# sampleid <chr>, date <date>, specimen_group <chr>, specimen_type <chr>,
# department <chr>, type_dept <fct>, specialism <chr>,
# is_icu <lgl>, is_clinical <lgl>, is_outward <lgl>,
# age <int>, gender <fct>, patientid <chr>, mo <mo>,
#
# align column names
rename(
sampleid = Monsternummer,
# mo = MO, # later by as.mo(MO)
rap.st = Rap,
rap.ab = Rapport.MSxSTxAB,
department = LigAfd,
specialism = Spec,
age = Leeftijd,
gender = Geslacht
) %>%
# add yet missing fields
add_column(
specimen_group = 'bk',
specimen_type = '',
type_dept = '' # needed?
) %>%
mutate(
is_icu = is_icu(department),
is_clinical = is_clinical(department),
is_outward = is_outward(department),
) %>%
relocate(rap.ab, .after = rap.st) %>%
relocate(c(year, weeknr, date), .after = sampleid) %>%
# get AMR package (WHONET, EARS-Net std) mnemonics and names
left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>%
rename(
AB_WHO = `EARS-Net.Mnemonic`
) %>%
select(-AB)
if (minMemoryAsap) {
# save memory
rm(x)
}
# TODO: find diff in multiple rows per sampleid that yield <list> io <rsi>
if (F) {
s %>% filter(sampleid == '22010009431') %>%
select(sampleid, AB_WHO, RIS, MIC, MO) %>%
mutate(
RIS = as.rsi(RIS),
MIC = as.mic(MIC),
mo = as.mo(MO)) %>%
pivot_wider(names_from = AB_WHO, values_from = RIS)
# diff: rap.ab, Etst, Naam, AB_WHO, `EARS-Net.Name`
}
x = s %>%
select(-`EARS-Net.Name`, -Naam,
# door de volgende waarden ontstaan dubbelingen in radarData, daarom
# tijdelijk wegfilteren. maar gaat later wel nodig zijn
-Rapport.TS_IsolaatTest, -Waarde.TS_IsolaatTest, -isolaattest,
-MIC, -Etst, -Diff, -rap.ab) %>%
# RIS values of 'V' are a pain in the neck
mutate(RIS = replace(RIS, RIS == 'V', NA)) %>%
group_by(sampleid, Isolnr, MO, AB_WHO) %>%
# mark rows to be deleted
mutate(deleteRow = n() > 1 & is.na(RIS) & !is.na(MO)) %>%
ungroup() %>%
# delete the rows that are marked with deleteRow
filter(!deleteRow) %>%
# remove column deleteRow
select(-deleteRow) %>%
# all multiple values should be absent now, so get distinct rows
distinct(sampleid, Isolnr, AB_WHO, RIS, .keep_all = T)
y = x %>%
pivot_wider(
id_cols = c(sampleid, Isolnr, MO),
names_from = AB_WHO,
values_from = RIS
)
radarData = x %>%
# filter(!is.na(AB_WHO)) %>%
mutate(
RIS = as.rsi(RIS),
# MIC = as.mic(MIC),
mo = as.mo(MO)) %>%
pivot_wider(names_from = AB_WHO, values_from = RIS) %>%
# pivot_wider(names_from = Rapport.TS_IsolaatTest,
# values_from = Waarde.TS_IsolaatTest)
select(order(colnames(.))) %>%
relocate(
all_of(
c(
'sampleid', 'Isolnr', 'patientid', 'year', 'weeknr', 'date', 'MMBProc',
'department', 'type_dept', 'specialism', 'is_clinical', 'is_icu', 'is_outward',
'specimen_type', 'specimen_group',
'rap.st',
# 'rap.ab',
'age', 'gender',
# 'Diff', 'Etst',
'Waarde.MSxSTxAB',
# 'Waarde.TS_IsolaatTest',
# 'MIC',
'MO', 'mo', 'Pos'
)
),
.before = 1
) %>%
select(-`NA`)
if (minMemoryAsap) {
# save memory
rm(s)
}
radarData = radarData %>%
filter(date >= '2022-01-01', date <= '2022-09-30')
if (F) {
# TODO je verwacht meer Neg dan Pos
# DONE hiervoor tMS left_join tST ipv andersom
radarData %>% filter(!is.na(Pos), MO == 'mrsa') %>% count(patientid)
radarData %>% filter(!is.na(Pos), MO == 'esccol') %>% count(patientid)
radarData %>% filter(!is.na(Pos), MO == 'pseaer') %>% count(patientid)
radarData %>% filter(!is.na(Pos), MO == 'klepne') %>% count(patientid)
radarData %>% filter(!is.na(Pos), MO == 'canalb') %>% count(patientid)
radarData %>% filter(!is.na(Pos), MO == 'enbcco') %>% count(patientid)
radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'enbcco') %>% count(patientid)
radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'esccol') %>% count(patientid)
radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'klepne') %>% count(patientid)
radarData %>% filter(MO == 'klepne', GEN == 'R') %>% count(patientid)
radarData %>% filter(MO == 'klepne', CIP == 'R') %>% count(patientid)
radarData %>% filter(MO == 'klepne', MEM == 'R') %>% count(patientid)
radarData %>% filter(MO == 'staaur', CLI == 'R') %>% count(patientid)
radarData %>% filter(is.na(MO), is.na(rap.st))
# rijen voor pat 8000005 in MS file: 36
tMS %>% filter(patientid == '8000005') %>% count(Monsternummer)
# rijen voor pat 8000005 in ST file: 23
tST %>% filter(Monsternummer %in% (tMS %>% filter(patientid == '8000005') %>% pull(Monsternummer) %>% unique())) %>% group_by(Monsternummer) %>% mutate(n = n()) %>% print(n=25)
# 22 * 36 = 792
x %>% filter(sampleid == '22070019631') %>% select(-year, -weeknr, -date, - patientid, -age, -ObjType, -Lot, -gender, -PC, -department, -Type, -Kamer, -Isolnr, MO, -Waarde.MSxSTxAB, -Pos, -rap.st, -MIC, -Diff, -isolaattest, -Waarde.TS_IsolaatTest, -Rapport.TS_IsolaatTest, -Afname, -is_icu, -is_clinical, -is_outward) %>% print(n=38)
x %>% filter(sampleid == '22070019631', Isolnr == 3) %>%
# door deze waarden ontstaan dubbelingen in radarData, daarom
# tijdelijk wegfilteren. maar gaat later wel nodig zijn
# select(-MIC, -Etst, -Diff, -rap.ab) %>%
pivot_wider(names_from = AB_WHO, values_from = RIS) %>% view()
# pivot_wider(names_from = Rapport.TS_IsolaatTest,
# values_from = Waarde.TS_IsolaatTest) %>% view()
listMultiValCols = function(x) {
z = x
colns = colnames(z)
collens = c()
for (c in 1:length(colns)) {
collens = c(collens,
z %>%
group_by(sampleid, Isolnr) %>%
pull(colns[c]) %>% unique() %>% length())
}
multValCols = colns[which(collens != 1)]
return(z %>% select(sampleid, patientid, multValCols, RIS))
}
# dit AB levert nog steeds een List
x %>% filter(AB_WHO == 'CAZ') %>% count(sampleid, Isolnr, AB_WHO) %>% filter(n>1)
x %>%
filter(
sampleid == '22070547731' %>% str_trim(),
AB_WHO == 'CAZ',
RIS != 'V'
) %>%
select(sampleid, Isolnr, AB_WHO, RIS)
key = c('sampleid', 'Isolnr')
y %>% filter(!is.na(FOX))%>% select(key, FOX)
x %>%
select(sampleid, Isolnr, AB_WHO, RIS) %>%
pivot_wider(id_cols = c(sampleid, Isolnr),
names_from = AB_WHO,
values_from = RIS) %>%
select(1:5)
selectRelCols = function(x, s = c()) {
x %>% select(sampleid, Isolnr, MO, AB_WHO, RIS, all_of(s))
}
}
# vroom_write(x = x,
# file = file.path(getwd(), dataPath, 'ST x AB_2022.csv'),
# delim = ';')