308 lines
9.8 KiB
R
308 lines
9.8 KiB
R
suppressMessages({
|
|
require(tidyverse)
|
|
require(lubridate)
|
|
require(vroom)
|
|
require(writexl)
|
|
require(AMR)
|
|
})
|
|
|
|
|
|
minMemoryAsap = F
|
|
|
|
|
|
# Rapport ab:
|
|
# altijd wel hebben,
|
|
# # = niet gerapporteerd
|
|
#
|
|
# Rap st:
|
|
# # = niet gerapporteerd, niet opnemen als niet gerapporteerd
|
|
# niet gerapporteerd kan fout zijn
|
|
|
|
dataPath = 'interfaces/4D_final_results/data'
|
|
# Antibiotica data
|
|
fnAB = 'AB-2021-22.csv'
|
|
# Microorganism data
|
|
fnST = 'ST-Alles.csv'
|
|
# Monster data
|
|
fnMS = 'MSv2-2022-tm10.csv'
|
|
# Isolaattesten data
|
|
fnTS = 'TS-Alles-221130-092256.csv'
|
|
# Tests data
|
|
fnTM = 'TM-2022-tm10.csv'
|
|
|
|
source('globalData.R')
|
|
source('interfaces.R')
|
|
|
|
# temporarily put here for dev
|
|
tAntibiotics = readxl::read_xlsx('data/ab_mapping.xlsx') %>%
|
|
select(Mnemonic, Naam, `EARS-Net.Mnemonic`, `EARS-Net.Name`)
|
|
|
|
|
|
tAB = vroom(file = file.path(dataPath, fnAB), show_col_types = F)
|
|
tST = vroom(file = file.path(dataPath, fnST), show_col_types = F)
|
|
tMS = vroom(file = file.path(dataPath, fnMS), skip = 5, show_col_types = F)
|
|
tTS = vroom(file = file.path(dataPath, fnTS), show_col_types = F)
|
|
tTM = vroom(file = file.path(dataPath, fnTM), show_col_types = F)
|
|
|
|
tAB = tAB %>%
|
|
filter(Monsternummer %>% str_sub(1, 2) == '22') %>%
|
|
mutate(Monsternummer = as.character(Monsternummer))
|
|
|
|
# gerapporteerde isolaten
|
|
tST = tST %>%
|
|
filter(
|
|
Monsternummer %>% str_sub(1, 2) == '22',
|
|
# '#' = niet gerapporteerd, deze niet includeren
|
|
# in ST betekent dit: alleen NA wel includeren
|
|
is.na(Rap) # = gerapporteert
|
|
) %>%
|
|
select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>%
|
|
mutate(Monsternummer = as.character(Monsternummer))
|
|
|
|
tMS = tMS %>%
|
|
filter(
|
|
Monsternr %>% str_sub(1, 2) == '22',
|
|
ObjType != 'L'
|
|
# Materiaal == 'MP_BLOED' # 118922, 2023-02-07
|
|
) %>%
|
|
rename(Monsternummer = Monsternr,
|
|
patientid = `PIN-lot`) %>%
|
|
select(-`Materiaal Variabelen`, ObjType, Lot, Kamer) %>%
|
|
mutate(date = dmy_hms(paste(OntvOLabDt, as.character(Tijd)))) %>%
|
|
select(-OntvOLabDt, -Tijd) %>%
|
|
mutate(Monsternummer = as.character(Monsternummer))
|
|
|
|
tTS = tTS %>%
|
|
mutate(Monsternummer = as.character(Monsternummer))
|
|
|
|
# MS monsters die geen isolaat hebben zijn negatief
|
|
# vandaar nu de right_join
|
|
# Vervolgens moeten uit de MS file alleen de kweekbepalingen worden geincludeerd
|
|
|
|
x = tMS %>%
|
|
mutate(Monsternummer = str_trim(Monsternummer)) %>%
|
|
left_join(tST, by = c('Monsternummer')) %>%
|
|
left_join(tAB, by = c('Monsternummer', 'Isolnr' = 'IsolNr')) %>%
|
|
left_join(tTS,
|
|
by = c('Monsternummer', 'Isolnr' = 'Isolaatnr'),
|
|
suffix = c('.MSxSTxAB', '.TS_IsolaatTest'))
|
|
|
|
if (minMemoryAsap) {
|
|
# save memory
|
|
rm(tAB, tST, tMS)
|
|
}
|
|
|
|
x = x %>%
|
|
# select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>%
|
|
mutate(year = str_sub(Monsternummer, 1, 2) %>% as.numeric(),
|
|
weeknr = str_sub(Monsternummer, 3, 4) %>% as.numeric()) %>%
|
|
# smaller subset in order to keep the first explorations workable
|
|
filter(year == 22)
|
|
# weeknr >= 41)
|
|
|
|
s = x %>%
|
|
# filter(Materiaal == 'MP_BLOED') %>% # 118922, 2023-02-07
|
|
# fields expected by RadaR:
|
|
#
|
|
# sampleid <chr>, date <date>, specimen_group <chr>, specimen_type <chr>,
|
|
# department <chr>, type_dept <fct>, specialism <chr>,
|
|
# is_icu <lgl>, is_clinical <lgl>, is_outward <lgl>,
|
|
# age <int>, gender <fct>, patientid <chr>, mo <mo>,
|
|
#
|
|
# align column names
|
|
rename(
|
|
sampleid = Monsternummer,
|
|
# mo = MO, # later by as.mo(MO)
|
|
rap.st = Rap,
|
|
rap.ab = Rapport.MSxSTxAB,
|
|
department = LigAfd,
|
|
specialism = Spec,
|
|
age = Leeftijd,
|
|
gender = Geslacht
|
|
) %>%
|
|
# add yet missing fields
|
|
add_column(
|
|
specimen_group = 'bk',
|
|
specimen_type = '',
|
|
type_dept = '' # needed?
|
|
) %>%
|
|
mutate(
|
|
is_icu = is_icu(department),
|
|
is_clinical = is_clinical(department),
|
|
is_outward = is_outward(department),
|
|
) %>%
|
|
relocate(rap.ab, .after = rap.st) %>%
|
|
relocate(c(year, weeknr, date), .after = sampleid) %>%
|
|
# get AMR package (WHONET, EARS-Net std) mnemonics and names
|
|
left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>%
|
|
rename(
|
|
AB_WHO = `EARS-Net.Mnemonic`
|
|
) %>%
|
|
select(-AB)
|
|
|
|
if (minMemoryAsap) {
|
|
# save memory
|
|
rm(x)
|
|
}
|
|
|
|
# TODO: find diff in multiple rows per sampleid that yield <list> io <rsi>
|
|
if (F) {
|
|
s %>% filter(sampleid == '22010009431') %>%
|
|
select(sampleid, AB_WHO, RIS, MIC, MO) %>%
|
|
mutate(
|
|
RIS = as.rsi(RIS),
|
|
MIC = as.mic(MIC),
|
|
mo = as.mo(MO)) %>%
|
|
pivot_wider(names_from = AB_WHO, values_from = RIS)
|
|
# diff: rap.ab, Etst, Naam, AB_WHO, `EARS-Net.Name`
|
|
}
|
|
|
|
x = s %>%
|
|
select(-`EARS-Net.Name`, -Naam,
|
|
# door de volgende waarden ontstaan dubbelingen in radarData, daarom
|
|
# tijdelijk wegfilteren. maar gaat later wel nodig zijn
|
|
-Rapport.TS_IsolaatTest, -Waarde.TS_IsolaatTest, -isolaattest,
|
|
-MIC, -Etst, -Diff, -rap.ab) %>%
|
|
# RIS values of 'V' are a pain in the neck
|
|
mutate(RIS = replace(RIS, RIS == 'V', NA)) %>%
|
|
group_by(sampleid, Isolnr, MO, AB_WHO) %>%
|
|
# mark rows to be deleted
|
|
mutate(deleteRow = n() > 1 & is.na(RIS) & !is.na(MO)) %>%
|
|
ungroup() %>%
|
|
# delete the rows that are marked with deleteRow
|
|
filter(!deleteRow) %>%
|
|
# remove column deleteRow
|
|
select(-deleteRow) %>%
|
|
# all multiple values should be absent now, so get distinct rows
|
|
distinct(sampleid, Isolnr, AB_WHO, RIS, .keep_all = T)
|
|
|
|
y = x %>%
|
|
pivot_wider(
|
|
id_cols = c(sampleid, Isolnr, MO),
|
|
names_from = AB_WHO,
|
|
values_from = RIS
|
|
)
|
|
|
|
radarData = x %>%
|
|
# filter(!is.na(AB_WHO)) %>%
|
|
mutate(
|
|
RIS = as.rsi(RIS),
|
|
# MIC = as.mic(MIC),
|
|
mo = as.mo(MO)) %>%
|
|
pivot_wider(names_from = AB_WHO, values_from = RIS) %>%
|
|
# pivot_wider(names_from = Rapport.TS_IsolaatTest,
|
|
# values_from = Waarde.TS_IsolaatTest)
|
|
select(order(colnames(.))) %>%
|
|
relocate(
|
|
all_of(
|
|
c(
|
|
'sampleid', 'Isolnr', 'patientid', 'year', 'weeknr', 'date', 'MMBProc',
|
|
'department', 'type_dept', 'specialism', 'is_clinical', 'is_icu', 'is_outward',
|
|
'specimen_type', 'specimen_group',
|
|
'rap.st',
|
|
# 'rap.ab',
|
|
'age', 'gender',
|
|
# 'Diff', 'Etst',
|
|
'Waarde.MSxSTxAB',
|
|
# 'Waarde.TS_IsolaatTest',
|
|
# 'MIC',
|
|
'MO', 'mo', 'Pos'
|
|
)
|
|
),
|
|
.before = 1
|
|
) %>%
|
|
select(-`NA`)
|
|
|
|
if (minMemoryAsap) {
|
|
# save memory
|
|
rm(s)
|
|
}
|
|
|
|
radarData = radarData %>%
|
|
filter(date >= '2022-01-01', date <= '2022-09-30')
|
|
|
|
|
|
if (F) {
|
|
# TODO je verwacht meer Neg dan Pos
|
|
# DONE hiervoor tMS left_join tST ipv andersom
|
|
radarData %>% filter(!is.na(Pos), MO == 'mrsa') %>% count(patientid)
|
|
radarData %>% filter(!is.na(Pos), MO == 'esccol') %>% count(patientid)
|
|
radarData %>% filter(!is.na(Pos), MO == 'pseaer') %>% count(patientid)
|
|
radarData %>% filter(!is.na(Pos), MO == 'klepne') %>% count(patientid)
|
|
radarData %>% filter(!is.na(Pos), MO == 'canalb') %>% count(patientid)
|
|
radarData %>% filter(!is.na(Pos), MO == 'enbcco') %>% count(patientid)
|
|
|
|
radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'enbcco') %>% count(patientid)
|
|
radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'esccol') %>% count(patientid)
|
|
radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'klepne') %>% count(patientid)
|
|
radarData %>% filter(MO == 'klepne', GEN == 'R') %>% count(patientid)
|
|
radarData %>% filter(MO == 'klepne', CIP == 'R') %>% count(patientid)
|
|
radarData %>% filter(MO == 'klepne', MEM == 'R') %>% count(patientid)
|
|
radarData %>% filter(MO == 'staaur', CLI == 'R') %>% count(patientid)
|
|
|
|
radarData %>% filter(is.na(MO), is.na(rap.st))
|
|
|
|
# rijen voor pat 8000005 in MS file: 36
|
|
tMS %>% filter(patientid == '8000005') %>% count(Monsternummer)
|
|
# rijen voor pat 8000005 in ST file: 23
|
|
tST %>% filter(Monsternummer %in% (tMS %>% filter(patientid == '8000005') %>% pull(Monsternummer) %>% unique())) %>% group_by(Monsternummer) %>% mutate(n = n()) %>% print(n=25)
|
|
# 22 * 36 = 792
|
|
|
|
|
|
|
|
x %>% filter(sampleid == '22070019631') %>% select(-year, -weeknr, -date, - patientid, -age, -ObjType, -Lot, -gender, -PC, -department, -Type, -Kamer, -Isolnr, MO, -Waarde.MSxSTxAB, -Pos, -rap.st, -MIC, -Diff, -isolaattest, -Waarde.TS_IsolaatTest, -Rapport.TS_IsolaatTest, -Afname, -is_icu, -is_clinical, -is_outward) %>% print(n=38)
|
|
|
|
x %>% filter(sampleid == '22070019631', Isolnr == 3) %>%
|
|
# door deze waarden ontstaan dubbelingen in radarData, daarom
|
|
# tijdelijk wegfilteren. maar gaat later wel nodig zijn
|
|
# select(-MIC, -Etst, -Diff, -rap.ab) %>%
|
|
pivot_wider(names_from = AB_WHO, values_from = RIS) %>% view()
|
|
# pivot_wider(names_from = Rapport.TS_IsolaatTest,
|
|
# values_from = Waarde.TS_IsolaatTest) %>% view()
|
|
|
|
listMultiValCols = function(x) {
|
|
z = x
|
|
colns = colnames(z)
|
|
collens = c()
|
|
for (c in 1:length(colns)) {
|
|
collens = c(collens,
|
|
z %>%
|
|
group_by(sampleid, Isolnr) %>%
|
|
pull(colns[c]) %>% unique() %>% length())
|
|
}
|
|
multValCols = colns[which(collens != 1)]
|
|
return(z %>% select(sampleid, patientid, multValCols, RIS))
|
|
}
|
|
|
|
# dit AB levert nog steeds een List
|
|
x %>% filter(AB_WHO == 'CAZ') %>% count(sampleid, Isolnr, AB_WHO) %>% filter(n>1)
|
|
|
|
x %>%
|
|
filter(
|
|
sampleid == '22070547731' %>% str_trim(),
|
|
AB_WHO == 'CAZ',
|
|
RIS != 'V'
|
|
) %>%
|
|
select(sampleid, Isolnr, AB_WHO, RIS)
|
|
|
|
key = c('sampleid', 'Isolnr')
|
|
y %>% filter(!is.na(FOX))%>% select(key, FOX)
|
|
|
|
x %>%
|
|
select(sampleid, Isolnr, AB_WHO, RIS) %>%
|
|
pivot_wider(id_cols = c(sampleid, Isolnr),
|
|
names_from = AB_WHO,
|
|
values_from = RIS) %>%
|
|
select(1:5)
|
|
|
|
|
|
selectRelCols = function(x, s = c()) {
|
|
x %>% select(sampleid, Isolnr, MO, AB_WHO, RIS, all_of(s))
|
|
}
|
|
}
|
|
|
|
|
|
# vroom_write(x = x,
|
|
# file = file.path(getwd(), dataPath, 'ST x AB_2022.csv'),
|
|
# delim = ';')
|