suppressMessages({ require(tidyverse) require(lubridate) require(vroom) require(writexl) require(AMR) }) minMemoryAsap = F # Rapport ab: # altijd wel hebben, # # = niet gerapporteerd # # Rap st: # # = niet gerapporteerd, niet opnemen als niet gerapporteerd # niet gerapporteerd kan fout zijn dataPath = 'interfaces/4D_final_results/data' # Antibiotica data fnAB = 'AB-2021-22.csv' # Microorganism data fnST = 'ST-Alles.csv' # Monster data fnMS = 'MSv2-2022-tm10.csv' # Isolaattesten data fnTS = 'TS-Alles-221130-092256.csv' # Tests data fnTM = 'TM-2022-tm10.csv' source('globalData.R') source('interfaces.R') # temporarily put here for dev tAntibiotics = readxl::read_xlsx('data/ab_mapping.xlsx') %>% select(Mnemonic, Naam, `EARS-Net.Mnemonic`, `EARS-Net.Name`) tAB = vroom(file = file.path(dataPath, fnAB), show_col_types = F) tST = vroom(file = file.path(dataPath, fnST), show_col_types = F) tMS = vroom(file = file.path(dataPath, fnMS), skip = 5, show_col_types = F) tTS = vroom(file = file.path(dataPath, fnTS), show_col_types = F) tTM = vroom(file = file.path(dataPath, fnTM), show_col_types = F) tAB = tAB %>% filter(Monsternummer %>% str_sub(1, 2) == '22') %>% mutate(Monsternummer = as.character(Monsternummer)) # gerapporteerde isolaten tST = tST %>% filter( Monsternummer %>% str_sub(1, 2) == '22', # '#' = niet gerapporteerd, deze niet includeren # in ST betekent dit: alleen NA wel includeren is.na(Rap) # = gerapporteert ) %>% select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>% mutate(Monsternummer = as.character(Monsternummer)) tMS = tMS %>% filter( Monsternr %>% str_sub(1, 2) == '22', ObjType != 'L' # Materiaal == 'MP_BLOED' # 118922, 2023-02-07 ) %>% rename(Monsternummer = Monsternr, patientid = `PIN-lot`) %>% select(-`Materiaal Variabelen`, ObjType, Lot, Kamer) %>% mutate(date = dmy_hms(paste(OntvOLabDt, as.character(Tijd)))) %>% select(-OntvOLabDt, -Tijd) %>% mutate(Monsternummer = as.character(Monsternummer)) tTS = tTS %>% mutate(Monsternummer = as.character(Monsternummer)) # MS monsters die geen isolaat hebben zijn negatief # vandaar nu de right_join # Vervolgens moeten uit de MS file alleen de kweekbepalingen worden geincludeerd x = tMS %>% mutate(Monsternummer = str_trim(Monsternummer)) %>% left_join(tST, by = c('Monsternummer')) %>% left_join(tAB, by = c('Monsternummer', 'Isolnr' = 'IsolNr')) %>% left_join(tTS, by = c('Monsternummer', 'Isolnr' = 'Isolaatnr'), suffix = c('.MSxSTxAB', '.TS_IsolaatTest')) if (minMemoryAsap) { # save memory rm(tAB, tST, tMS) } x = x %>% # select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>% mutate(year = str_sub(Monsternummer, 1, 2) %>% as.numeric(), weeknr = str_sub(Monsternummer, 3, 4) %>% as.numeric()) %>% # smaller subset in order to keep the first explorations workable filter(year == 22) # weeknr >= 41) s = x %>% # filter(Materiaal == 'MP_BLOED') %>% # 118922, 2023-02-07 # fields expected by RadaR: # # sampleid , date , specimen_group , specimen_type , # department , type_dept , specialism , # is_icu , is_clinical , is_outward , # age , gender , patientid , mo , # # align column names rename( sampleid = Monsternummer, # mo = MO, # later by as.mo(MO) rap.st = Rap, rap.ab = Rapport.MSxSTxAB, department = LigAfd, specialism = Spec, age = Leeftijd, gender = Geslacht ) %>% # add yet missing fields add_column( specimen_group = 'bk', specimen_type = '', type_dept = '' # needed? ) %>% mutate( is_icu = is_icu(department), is_clinical = is_clinical(department), is_outward = is_outward(department), ) %>% relocate(rap.ab, .after = rap.st) %>% relocate(c(year, weeknr, date), .after = sampleid) %>% # get AMR package (WHONET, EARS-Net std) mnemonics and names left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>% rename( AB_WHO = `EARS-Net.Mnemonic` ) %>% select(-AB) if (minMemoryAsap) { # save memory rm(x) } # TODO: find diff in multiple rows per sampleid that yield io if (F) { s %>% filter(sampleid == '22010009431') %>% select(sampleid, AB_WHO, RIS, MIC, MO) %>% mutate( RIS = as.rsi(RIS), MIC = as.mic(MIC), mo = as.mo(MO)) %>% pivot_wider(names_from = AB_WHO, values_from = RIS) # diff: rap.ab, Etst, Naam, AB_WHO, `EARS-Net.Name` } x = s %>% select(-`EARS-Net.Name`, -Naam, # door de volgende waarden ontstaan dubbelingen in radarData, daarom # tijdelijk wegfilteren. maar gaat later wel nodig zijn -Rapport.TS_IsolaatTest, -Waarde.TS_IsolaatTest, -isolaattest, -MIC, -Etst, -Diff, -rap.ab) %>% # RIS values of 'V' are a pain in the neck mutate(RIS = replace(RIS, RIS == 'V', NA)) %>% group_by(sampleid, Isolnr, MO, AB_WHO) %>% # mark rows to be deleted mutate(deleteRow = n() > 1 & is.na(RIS) & !is.na(MO)) %>% ungroup() %>% # delete the rows that are marked with deleteRow filter(!deleteRow) %>% # remove column deleteRow select(-deleteRow) %>% # all multiple values should be absent now, so get distinct rows distinct(sampleid, Isolnr, AB_WHO, RIS, .keep_all = T) y = x %>% pivot_wider( id_cols = c(sampleid, Isolnr, MO), names_from = AB_WHO, values_from = RIS ) radarData = x %>% # filter(!is.na(AB_WHO)) %>% mutate( RIS = as.rsi(RIS), # MIC = as.mic(MIC), mo = as.mo(MO)) %>% pivot_wider(names_from = AB_WHO, values_from = RIS) %>% # pivot_wider(names_from = Rapport.TS_IsolaatTest, # values_from = Waarde.TS_IsolaatTest) select(order(colnames(.))) %>% relocate( all_of( c( 'sampleid', 'Isolnr', 'patientid', 'year', 'weeknr', 'date', 'MMBProc', 'department', 'type_dept', 'specialism', 'is_clinical', 'is_icu', 'is_outward', 'specimen_type', 'specimen_group', 'rap.st', # 'rap.ab', 'age', 'gender', # 'Diff', 'Etst', 'Waarde.MSxSTxAB', # 'Waarde.TS_IsolaatTest', # 'MIC', 'MO', 'mo', 'Pos' ) ), .before = 1 ) %>% select(-`NA`) if (minMemoryAsap) { # save memory rm(s) } radarData = radarData %>% filter(date >= '2022-01-01', date <= '2022-09-30') if (F) { # TODO je verwacht meer Neg dan Pos # DONE hiervoor tMS left_join tST ipv andersom radarData %>% filter(!is.na(Pos), MO == 'mrsa') %>% count(patientid) radarData %>% filter(!is.na(Pos), MO == 'esccol') %>% count(patientid) radarData %>% filter(!is.na(Pos), MO == 'pseaer') %>% count(patientid) radarData %>% filter(!is.na(Pos), MO == 'klepne') %>% count(patientid) radarData %>% filter(!is.na(Pos), MO == 'canalb') %>% count(patientid) radarData %>% filter(!is.na(Pos), MO == 'enbcco') %>% count(patientid) radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'enbcco') %>% count(patientid) radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'esccol') %>% count(patientid) radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'klepne') %>% count(patientid) radarData %>% filter(MO == 'klepne', GEN == 'R') %>% count(patientid) radarData %>% filter(MO == 'klepne', CIP == 'R') %>% count(patientid) radarData %>% filter(MO == 'klepne', MEM == 'R') %>% count(patientid) radarData %>% filter(MO == 'staaur', CLI == 'R') %>% count(patientid) radarData %>% filter(is.na(MO), is.na(rap.st)) # rijen voor pat 8000005 in MS file: 36 tMS %>% filter(patientid == '8000005') %>% count(Monsternummer) # rijen voor pat 8000005 in ST file: 23 tST %>% filter(Monsternummer %in% (tMS %>% filter(patientid == '8000005') %>% pull(Monsternummer) %>% unique())) %>% group_by(Monsternummer) %>% mutate(n = n()) %>% print(n=25) # 22 * 36 = 792 x %>% filter(sampleid == '22070019631') %>% select(-year, -weeknr, -date, - patientid, -age, -ObjType, -Lot, -gender, -PC, -department, -Type, -Kamer, -Isolnr, MO, -Waarde.MSxSTxAB, -Pos, -rap.st, -MIC, -Diff, -isolaattest, -Waarde.TS_IsolaatTest, -Rapport.TS_IsolaatTest, -Afname, -is_icu, -is_clinical, -is_outward) %>% print(n=38) x %>% filter(sampleid == '22070019631', Isolnr == 3) %>% # door deze waarden ontstaan dubbelingen in radarData, daarom # tijdelijk wegfilteren. maar gaat later wel nodig zijn # select(-MIC, -Etst, -Diff, -rap.ab) %>% pivot_wider(names_from = AB_WHO, values_from = RIS) %>% view() # pivot_wider(names_from = Rapport.TS_IsolaatTest, # values_from = Waarde.TS_IsolaatTest) %>% view() listMultiValCols = function(x) { z = x colns = colnames(z) collens = c() for (c in 1:length(colns)) { collens = c(collens, z %>% group_by(sampleid, Isolnr) %>% pull(colns[c]) %>% unique() %>% length()) } multValCols = colns[which(collens != 1)] return(z %>% select(sampleid, patientid, multValCols, RIS)) } # dit AB levert nog steeds een List x %>% filter(AB_WHO == 'CAZ') %>% count(sampleid, Isolnr, AB_WHO) %>% filter(n>1) x %>% filter( sampleid == '22070547731' %>% str_trim(), AB_WHO == 'CAZ', RIS != 'V' ) %>% select(sampleid, Isolnr, AB_WHO, RIS) key = c('sampleid', 'Isolnr') y %>% filter(!is.na(FOX))%>% select(key, FOX) x %>% select(sampleid, Isolnr, AB_WHO, RIS) %>% pivot_wider(id_cols = c(sampleid, Isolnr), names_from = AB_WHO, values_from = RIS) %>% select(1:5) selectRelCols = function(x, s = c()) { x %>% select(sampleid, Isolnr, MO, AB_WHO, RIS, all_of(s)) } } # vroom_write(x = x, # file = file.path(getwd(), dataPath, 'ST x AB_2022.csv'), # delim = ';')