mirror of
https://github.com/msberends/AMR.git
synced 2026-06-24 09:36:34 +02:00
623 lines
32 KiB
R
623 lines
32 KiB
R
# Copyright (c) [2022] [Larisse Bolton]
|
|
|
|
wisca_params <- function(x, antibiotic_in, pathogen_in, analysis, exclude, isolate_first, susceptible_I, infection_in,infection_full){
|
|
# Isolate the type of syndrome you would like to investigate
|
|
if (infection_in != ""){
|
|
x <- x %>%
|
|
filter(infection_type == infection_in)
|
|
} else {
|
|
x <- x
|
|
}
|
|
|
|
# Define antibiotic regimens under investigation from "antimicrobials" dataset
|
|
antibiotic_df <- function(abo_in){
|
|
mono_ab <- str_to_title(abo_in[!str_detect(abo_in,fixed("+"))]) #identify single antimicrobials
|
|
|
|
mono_antibiotic_x_set <- data.frame()
|
|
for (j in 1:length(mono_ab)){ #generate dataframe for single antimicrobials with abbreviations and fullnames
|
|
mono_x_name <- ab_name(mono_ab[j], only_first = TRUE)
|
|
mono_x_ab <- as.ab(mono_x_name)
|
|
mono_antibiotic <- subset(antimicrobials, antimicrobials$ab %in% mono_x_ab)[,c("ab","name")]
|
|
mono_antibiotic_x_set <- rbind.data.frame(mono_antibiotic_x_set,mono_antibiotic, row.names = NULL)
|
|
}
|
|
names(mono_antibiotic_x_set) <- c("ab1","namerx")
|
|
mono_antibiotic_x_set$ab1 <- as.character(mono_antibiotic_x_set$ab1)
|
|
|
|
|
|
comb_ab <- str_to_title(abo_in[str_detect(abo_in,fixed("+"))]) #identify combination regimens
|
|
if(length(comb_ab) > 0){
|
|
|
|
comb_antibiotic_x_total <- data.frame()
|
|
for (jj in 1:length(comb_ab)){ #generate dataframe for combination antimicrobials with abbreviations and fullnames
|
|
comb_ab_sep <- unlist(str_split(comb_ab[jj], fixed("+")))
|
|
comb_antibiotic_x_pre <- data.frame(x = rep(0, times = 2), y = rep(0, times = 2))
|
|
for (ii in 1:length(comb_ab_sep)){
|
|
comb_x_name <- ab_name(comb_ab_sep[ii], only_first = TRUE)
|
|
comb_x_ab <- as.ab(comb_x_name)
|
|
comb_antibiotic <- subset(antimicrobials, antimicrobials$ab %in% comb_x_ab)[,c("ab","name")]
|
|
comb_antibiotic_x_pre[ii,] <- comb_antibiotic
|
|
names(comb_antibiotic_x_pre) <- names(comb_antibiotic)
|
|
}
|
|
comb_antibiotic_x <- pivot_wider(comb_antibiotic_x_pre, names_from = name, values_from = ab)
|
|
comb_antibiotic_x_set <- comb_antibiotic_x %>%
|
|
mutate(namerx = str_flatten(str_c(names(comb_antibiotic_x), collapse = "+")))
|
|
new_ab <- sapply(X = as.character(1:(ncol(comb_antibiotic_x_set)-1)), FUN = function(x){str_flatten(str_c("ab",x))})
|
|
names(comb_antibiotic_x_set)[1:(ncol(comb_antibiotic_x_set)-1)] <- new_ab
|
|
names(comb_antibiotic_x_set)[ncol(comb_antibiotic_x_set)]<- "namerx"
|
|
comb_antibiotic_x_total <-rbind.data.frame(comb_antibiotic_x_total, comb_antibiotic_x_set, row.names = NULL)
|
|
}
|
|
|
|
|
|
|
|
antibiotic_x_set <- full_join(x= mono_antibiotic_x_set,y = comb_antibiotic_x_total) #generate full antibiotic dataset for analysis
|
|
antibiotic_x_set <- select(antibiotic_x_set, starts_with("ab"),starts_with("namerx"))
|
|
|
|
} else {
|
|
antibiotic_x_set <- mono_antibiotic_x_set
|
|
}
|
|
return(antibiotic_x_set)
|
|
}
|
|
|
|
antibiotic_rx <- antibiotic_df(abo_in = antibiotic_in) #full dataframe with antibiotic regimens
|
|
|
|
|
|
#if the dataset names are in different format to built-in set
|
|
if ((length(names(x)[sapply(names(x), function(x1){any(str_detect(x1,"[A-Z]$") & !(x1 %in% antimicrobials$ab) & !(x1 %in% antimicrobials$name))})])>0)){
|
|
names(x)[which(str_detect(names(x),"[a-z]+") & !str_detect(names(x),"fullname") & !str_detect(names(x),"mo"))] <- str_to_upper(names(x)[which(str_detect(names(x),"[a-z]+") & !str_detect(names(x),"fullname") & !str_detect(names(x),"mo"))])
|
|
names(x)[which(str_detect(names(x),"^ORG") & str_detect(names(x),"NAME$"))] <- "fullname"
|
|
names(x)[which(str_detect(names(x),"^ORG") & str_detect(names(x),"CODE$"))] <- "organism"
|
|
names(x)[which(str_detect(names(x),"^DATE") & str_detect(names(x),"CULTURE$") | str_detect(names(x),"^DATE") & str_detect(names(x),"SPECIMEN"))] <- "date"
|
|
names(x)[which(str_detect(names(x),"^EPISODE") | str_detect(names(x),"^PAT"))] <- "patient"
|
|
names(x)[which(str_detect(names(x),"^WARD"))] <- "ward"
|
|
names(x)[which(str_detect(names(x),"^HOSP"))] <- "hospital"
|
|
names(x)[which(str_detect(names(x),"DATE") & (str_detect(names(x),"REGISTRATION")))] <- "date"
|
|
|
|
}
|
|
|
|
#remove any duplicated variables
|
|
if (any(duplicated(names(x)))){
|
|
x <- x[,-max(which(names(x) == names(x)[which(duplicated(names(x)))]))]
|
|
} else {
|
|
x <- x
|
|
}
|
|
|
|
x <- arrange(x,"date") #arrange in increasing date
|
|
|
|
#if your dataset does not have an episode identifier
|
|
if (length(names(x)[sapply(names(x), function(x1){any(str_detect(x1,"patient"))})]) == 0){
|
|
x$patient <- seq(1,nrow(x),1)
|
|
}
|
|
|
|
|
|
|
|
#find demographic data within dataset
|
|
micro_df_ind <- which(sapply(x,function(y) any(str_detect(y,"^S$"))|any(str_detect(y,"^R$"))|any(str_detect(y,"^SENSITIVE$"))| any(str_detect(y,"^RESISTANT$"))) == TRUE)
|
|
micro_df <- names(x)[micro_df_ind]#column names for susceptibility data
|
|
demograph_df <- names(x)[which(!names(x) %in% micro_df)] #column names for demographic or organism data
|
|
|
|
#Dataset required to contain both mo and fullname
|
|
if ("mo" %in% names(x)){
|
|
# If mo included but not fullname:
|
|
x <- x %>%
|
|
mutate(mo = as.mo(mo), keep_synonyms = TRUE)
|
|
test_isolates_pre <- x %>%
|
|
mutate(across(.cols = all_of(demograph_df),~ str_to_title(.x))) %>%
|
|
mutate(mo = toupper(mo))
|
|
if ("fullname" %in% names(x)){
|
|
test_isolates <- test_isolates_pre[,c(demograph_df)]
|
|
} else {
|
|
test_isolates <- left_join_microorganisms(x = test_isolates_pre, by = "mo")[,c(demograph_df,"fullname")] #add in fullname
|
|
}
|
|
test_isolates <- test_isolates %>% #remove bugs to be excluded and final style conversions
|
|
filter(!(fullname %in% exclude)) %>%
|
|
mutate(fullname = str_to_title(fullname))
|
|
|
|
rx_abo <- x %>% #from orginal dataset
|
|
filter(mo %in% test_isolates$mo) %>% #select only those bugs that should be included
|
|
distinct(date, patient, mo, .keep_all = TRUE) #ensure that there are no duplications
|
|
rx_regs <- names(rx_abo)[!(names(rx_abo) %in% demograph_df)] #extract susceptibility data
|
|
|
|
if (any(str_detect(rx_regs,fixed("+")))){#if combinations have already been accounted for in susceptibility profiling
|
|
rx_regs_ind <- which(!(names(rx_abo) %in% demograph_df)) # extract susceptibility data
|
|
rx_regs_comb <- rx_regs[str_detect(rx_regs,fixed("+"))]# find combination regimen variables
|
|
rx_regs_comb_x_total <- data.frame()
|
|
|
|
for (jj in 1:length(rx_regs_comb)){ # for every combination regimen
|
|
rx_regs_comb_sep <- str_remove_all(unlist(str_split(rx_regs_comb[jj], fixed("+"))),"[[:punct:]]") #extract separate antimicrobials included in regimen
|
|
rx_regs_comb_pre <- data.frame(x = rep(0, times = 2), y = rep(0, times = 2))
|
|
for (ii in 1:length(rx_regs_comb_sep)){ #for every antibiotic in the combination regimen
|
|
rx_regs_comb_name <- ab_name(rx_regs_comb_sep[ii], only_first = TRUE) #extract their antibiotic name
|
|
rx_regs_comb_ab <- as.ab(rx_regs_comb_name) #convert antibiotic name to abbreviation
|
|
rx_regs_comb_antibiotic <- subset(antimicrobials, antimicrobials$ab %in% rx_regs_comb_ab)[,c("ab","name")]
|
|
rx_regs_comb_pre[ii,] <- rx_regs_comb_antibiotic
|
|
names(rx_regs_comb_pre) <- names(rx_regs_comb_antibiotic)
|
|
}
|
|
rx_regs_comb_x <- pivot_wider(rx_regs_comb_pre, names_from = name, values_from = ab)
|
|
rx_regs_comb_x_set <- rx_regs_comb_x %>%
|
|
mutate(namerx = str_flatten(str_c(names(rx_regs_comb_x), collapse = "+"))) #generate new antimicrobials table
|
|
rx_regs_new_ab <- sapply(X = as.character(1:(ncol(rx_regs_comb_x_set)-1)), FUN = function(x){str_flatten(str_c("ab",x))})
|
|
names(rx_regs_comb_x_set)[1:(ncol(rx_regs_comb_x_set)-1)] <- rx_regs_new_ab
|
|
names(rx_regs_comb_x_set)[ncol(rx_regs_comb_x_set)]<- "namerx"
|
|
rx_regs_comb_x_total <-rbind.data.frame(rx_regs_comb_x_total, rx_regs_comb_x_set, row.names = NULL)
|
|
}
|
|
|
|
rx_regs_comb_rename <- select(rx_regs_comb_x_total,starts_with("ab")) #extract all antibiotic abbreviations
|
|
#regenerate variable names for dataset
|
|
suppressWarnings({
|
|
for (k in 1:nrow(rx_regs_comb_x_total)){
|
|
rx_regs_comb_rename_2 <- unlist(rx_regs_comb_rename[k,])
|
|
rx_regs_comb_rename_vec <- str_flatten(str_c(rx_regs_comb_rename_2, collapse = "+"))
|
|
names(rx_abo)[names(rx_abo) == rx_regs_comb[k]] <- rx_regs_comb_rename_vec
|
|
|
|
}
|
|
})
|
|
names(rx_abo)[!(str_detect(names(rx_abo),fixed("+"))) & !(names(rx_abo) %in% demograph_df)] <-
|
|
as.ab(names(rx_abo)[!(str_detect(names(rx_abo),fixed("+"))) & !(names(rx_abo) %in% demograph_df)])
|
|
names(rx_abo)[which(duplicated(names(rx_abo)))] <- str_c(names(rx_abo)[which(duplicated(names(rx_abo)))],".x")
|
|
rx_abo_upd <- left_join_microorganisms(x = rx_abo, by = "mo")[,c(names(rx_abo),"fullname")]
|
|
|
|
} else {
|
|
names(rx_abo)[!(names(rx_abo) %in% demograph_df)] <- as.ab(names(rx_abo)[!(names(rx_abo) %in% demograph_df)])
|
|
names(rx_abo)[which(duplicated(names(rx_abo)))] <- str_c(names(rx_abo)[which(duplicated(names(rx_abo)))],".x")
|
|
rx_abo_upd <- left_join_microorganisms(x = rx_abo, by = "mo")[,c(names(rx_abo),"fullname")]
|
|
|
|
}
|
|
} else {
|
|
#If fullname included in dataset:
|
|
test_isolates_pre <- x %>%
|
|
mutate(across(.cols = all_of(demograph_df),~ str_to_title(.x))) %>%
|
|
filter(!(fullname %in% unique(exclude)))
|
|
print(nrow(test_isolates_pre))
|
|
test_isolates <- left_join_microorganisms(x = test_isolates_pre, by = "fullname")[,c(demograph_df,"mo")]
|
|
test_isolates$fullname <- str_to_title(test_isolates$fullname)
|
|
test_isolates$mo <- toupper(test_isolates$mo)
|
|
|
|
rx_abo <- x %>%
|
|
mutate(fullname = str_to_title(fullname)) %>%
|
|
filter(!(fullname %in% unique(exclude))) %>%
|
|
distinct(date, patient, fullname, .keep_all = TRUE)
|
|
rx_regs <- names(rx_abo)[!(names(rx_abo) %in% demograph_df)] #susceptibility data
|
|
|
|
if (any(str_detect(rx_regs,fixed("+")))){#if combinations have already been accounted for
|
|
rx_regs_ind <- which(!(names(rx_abo) %in% demograph_df))
|
|
rx_regs_comb <- rx_regs[str_detect(rx_regs,fixed("+"))]
|
|
rx_regs_comb_x_total <- data.frame()
|
|
|
|
for (jj in 1:length(rx_regs_comb)){
|
|
rx_regs_comb_sep <- str_remove_all(unlist(str_split(rx_regs_comb[jj], fixed("+"))),"[[:punct:]]")
|
|
rx_regs_comb_pre <- data.frame(x = rep(0, times = 2), y = rep(0, times = 2))
|
|
for (ii in 1:length(rx_regs_comb_sep)){
|
|
rx_regs_comb_name <- ab_name(rx_regs_comb_sep[ii], only_first = TRUE)
|
|
rx_regs_comb_ab <- as.ab(rx_regs_comb_name)
|
|
rx_regs_comb_antibiotic <- subset(antimicrobials, antimicrobials$ab %in% rx_regs_comb_ab)[,c("ab","name")]
|
|
rx_regs_comb_pre[ii,] <- rx_regs_comb_antibiotic
|
|
names(rx_regs_comb_pre) <- names(rx_regs_comb_antibiotic)
|
|
}
|
|
rx_regs_comb_x <- pivot_wider(rx_regs_comb_pre, names_from = name, values_from = ab)
|
|
rx_regs_comb_x_set <- rx_regs_comb_x %>%
|
|
mutate(namerx = str_flatten(str_c(names(rx_regs_comb_x), collapse = "+")))
|
|
rx_regs_new_ab <- sapply(X = as.character(1:(ncol(rx_regs_comb_x_set)-1)), FUN = function(x){str_flatten(str_c("ab",x))})
|
|
names(rx_regs_comb_x_set)[1:(ncol(rx_regs_comb_x_set)-1)] <- rx_regs_new_ab
|
|
names(rx_regs_comb_x_set)[ncol(rx_regs_comb_x_set)]<- "namerx"
|
|
rx_regs_comb_x_total <-rbind.data.frame(rx_regs_comb_x_total, rx_regs_comb_x_set, row.names = NULL)
|
|
}
|
|
|
|
rx_regs_comb_rename <- select(rx_regs_comb_x_total,starts_with("ab"))
|
|
|
|
suppressWarnings({
|
|
for (k in 1:nrow(rx_regs_comb_x_total)){
|
|
rx_regs_comb_rename_2 <- unlist(rx_regs_comb_rename[k,])
|
|
rx_regs_comb_rename_vec <- str_flatten(str_c(rx_regs_comb_rename_2, collapse = "+"))
|
|
names(rx_abo)[names(rx_abo) == rx_regs_comb[k]] <- rx_regs_comb_rename_vec
|
|
}
|
|
})
|
|
names(rx_abo)[!(str_detect(names(rx_abo),fixed("+"))) & !(names(rx_abo) %in% demograph_df)] <-
|
|
as.ab(names(rx_abo)[!(str_detect(names(rx_abo),fixed("+"))) & !(names(rx_abo) %in% demograph_df)])
|
|
names(rx_abo)[which(duplicated(names(rx_abo)))] <- str_c(names(rx_abo)[which(duplicated(names(rx_abo)))],".x")
|
|
rx_abo_upd <- left_join_microorganisms(x = rx_abo, by = "fullname")[,c(names(rx_abo),"mo")]
|
|
|
|
} else {
|
|
names(rx_abo)[!(names(rx_abo) %in% demograph_df)] <- as.ab(names(rx_abo)[!(names(rx_abo) %in% demograph_df)])
|
|
names(rx_abo)[which(duplicated(names(rx_abo)))] <- str_c(names(rx_abo)[which(duplicated(names(rx_abo)))],".x")
|
|
rx_abo_upd <- left_join_microorganisms(x = rx_abo, by = "fullname")[,c(names(rx_abo),"mo")]
|
|
|
|
}
|
|
}
|
|
|
|
|
|
rx_abo_upd$fullname <- str_to_title(rx_abo_upd$fullname)
|
|
|
|
|
|
#generate new dataset with demographic information and only antimicrobials under investigation
|
|
rx_1 <- select(antibiotic_rx,starts_with("ab")) #all antimicrobials
|
|
rx_df <- test_isolates
|
|
var_date <- names(rx_df)[str_detect(names(rx_df),"date")]
|
|
rx_df[[var_date]] <- ymd(rx_df[[var_date]])
|
|
var_patient <- names(rx_df)[str_detect(names(rx_df),"patient")]
|
|
rx_df[[var_patient]] <- toupper(rx_df[[var_patient]])
|
|
|
|
names_vec <- vector()
|
|
suppressWarnings({
|
|
for (k in 1:nrow(rx_1)){
|
|
rx_2 <- unlist(rx_1[k,][!is.na(str_extract(rx_1[k,],"[A-Z]+"))])
|
|
if ((length(rx_2) > 1) & !(str_flatten(str_c(rx_2, collapse = "+")) %in% names(rx_abo_upd))){ # if more than one antibiotic in regimen and susceptibility of the combination not considered
|
|
rx_df <- merge(x = rx_df, y = rx_abo_upd[,c("date","patient","mo",rx_2)], by = c("date","patient","mo")) #then bind separate antimicrobials in regimen
|
|
#if prior individual antibiotic was added, suffix will be generated
|
|
for (jj in 1:length(rx_2)){
|
|
if (any(str_detect(names(rx_df),paste0(rx_2[jj],".y")))){
|
|
rx_df[[paste0(rx_2[jj],".y")]] <- str_trim(rx_df[[paste0(rx_2[jj],".y")]])
|
|
} else {
|
|
rx_df[[rx_2[jj]]] <- str_trim(rx_df[[rx_2[jj]]])
|
|
}
|
|
}
|
|
names(rx_df)[names(rx_df) %in% rx_2] <- str_flatten(str_c(rx_2, collapse = "+")) #rename both variables as the combination
|
|
names(rx_df)[names(rx_df) %in% paste0(rx_2,".y")] <- str_flatten(str_c(rx_2, collapse = "+"))
|
|
names(rx_df)[names(rx_df) %in% paste0(rx_2,".x")] <- rx_2
|
|
names_vec[k] <- str_flatten(str_c(rx_2, collapse = "+"))
|
|
} else if ((length(rx_2) > 1) & (str_flatten(str_c(rx_2, collapse = "+")) %in% names(rx_abo_upd))){#if more than one antibiotic in regimen and susceptibility of the combination considered
|
|
rx_df <- merge(x = rx_df, y = rx_abo_upd[,c("date","patient","mo",str_flatten(str_c(rx_2, collapse = "+")))], by = c("date","patient","mo"))
|
|
rx_df[[str_flatten(str_c(rx_2, collapse = "+"))]] <- str_trim(rx_df[[str_flatten(str_c(rx_2, collapse = "+"))]])
|
|
names_vec[k] <- str_flatten(str_c(rx_2, collapse = "+"))
|
|
} else {
|
|
rx_df <- merge(x = rx_df, y = rx_abo_upd[,c("date","patient","mo",rx_2)], by = c("date","patient","mo")) #then bind separate antimicrobials in regimen
|
|
rx_df[[rx_2]] <- str_trim(rx_df[[rx_2]])
|
|
names(rx_df)[names(rx_df) %in% rx_2] <- str_flatten(str_c(rx_2, collapse = "+")) #rename both variables as the combination
|
|
names(rx_df)[names(rx_df) %in% paste0(rx_2,".y")] <- str_flatten(str_c(rx_2, collapse = "+"))
|
|
names(rx_df)[names(rx_df) %in% paste0(rx_2,".x")] <- rx_2
|
|
names_vec[k] <- rx_2
|
|
}
|
|
}
|
|
})
|
|
|
|
if (any(is.na(rx_df$mo))){
|
|
missing1 <- which(is.na(rx_df$mo))
|
|
if (rx_df[missing1,"fullname"] != "NA"){
|
|
rx_df[missing1,"mo"] <- as.mo(rx_df[missing1,]$fullname, keep_synonyms = TRUE)
|
|
} else {
|
|
rx_df <- rx_df[-missing1,]
|
|
}
|
|
}
|
|
|
|
|
|
#Generate adapted first isolate dataset on antimicrobial level
|
|
if (isolate_first == "yes"){ #if dataset has already been deduplicated for first isolates
|
|
test_isolates_first <- rx_df
|
|
} else {
|
|
test_isolates_first <- rx_df[first_isolate(x = rx_df,
|
|
method = "e",
|
|
episode_days = 14,
|
|
col_date = var_date,
|
|
col_mo = "mo") == TRUE,] #Only include isolates for microorganisms < 14 days apart
|
|
}
|
|
|
|
|
|
|
|
test_isolates_first_upd <<- test_isolates_first %>%
|
|
pivot_longer(cols = names(test_isolates_first)[which(sapply(test_isolates_first,
|
|
function(y) any(str_detect(y,"^S$"))|any(str_detect(y,"^R$"))|any(str_detect(y,"^SENSITIVE$"))| any(str_detect(y,"^RESISTANT$"))))],
|
|
names_to = "keyantimicrobials", values_to = "antibiogram") %>% #combine all regimens and antibiograms into single variables
|
|
mutate(antibiogram = ifelse(antibiogram == "I"|antibiogram == "INTERMEDIATE",susceptible_I,antibiogram)) %>% #simplify susceptibility outputs - 90-60 rule
|
|
mutate(antibiogram = ifelse(antibiogram == "SENSITIVE","S", antibiogram)) %>%
|
|
mutate(antibiogram = ifelse(antibiogram == "RESISTANT","R",antibiogram)) %>%
|
|
mutate(antibiogram = factor(antibiogram),
|
|
keyantimicrobials = as.factor(keyantimicrobials))
|
|
|
|
|
|
#recode susceptibility of regimes if not already in the dataset: if any S, then all S; if any NA and not S, then combination NA; if all R, then combination R.
|
|
print("-------------------")
|
|
print(test_isolates_first_upd)
|
|
print(names(test_isolates_first_upd))
|
|
print(duplicated(names(test_isolates_first_upd)))
|
|
print(any(duplicated(names(test_isolates_first_upd))))
|
|
print("-------------------")
|
|
|
|
if (any(duplicated(names(test_isolates_first_upd)))){
|
|
test_isolates_temp3 <- test_isolates_upd %>%
|
|
mutate(antibiogram = as.character(antibiogram)) %>%
|
|
mutate(antibiogram = replace_na(antibiogram,"U")) %>%
|
|
mutate(combiantibiogram = "A")
|
|
test_isolates_temp4 <- test_isolates_temp3 %>%
|
|
group_by(date,patient,mo,keyantimicrobials) %>%
|
|
mutate(combiantibiogram = ifelse(any(str_detect(antibiogram,"S")), "S",combiantibiogram)) %>%
|
|
mutate(combiantibiogram = ifelse(any(str_detect(antibiogram,"U")) & !any(str_detect(antibiogram,"S")), "U",combiantibiogram)) %>%
|
|
mutate(combiantibiogram = ifelse(!any(str_detect(antibiogram,"U")) & !any(str_detect(antibiogram,"S")), "R", combiantibiogram)) %>%
|
|
distinct(date,patient,mo,.keep_all = TRUE) %>%
|
|
ungroup() %>%
|
|
select(-antibiogram) %>%
|
|
rename("antibiogram" = "combiantibiogram") %>%
|
|
mutate(antibiogram = ifelse(antibiogram == "U",NA_character_, antibiogram)) %>%
|
|
mutate(antibiogram = as.factor(antibiogram))
|
|
|
|
test_isolates_temp5 <- test_isolates_upd %>% #isolate the single regimens
|
|
group_by(date,patient,mo) %>%
|
|
filter(!str_detect(keyantimicrobials,fixed("+"))) %>%
|
|
ungroup()
|
|
|
|
test_isolates_final <- bind_rows(test_isolates_temp4,test_isolates_temp5)
|
|
test_isolates_final <- test_isolates_final %>%
|
|
arrange(patient)
|
|
|
|
} else {
|
|
test_isolates_final <- test_isolates_first_upd
|
|
}
|
|
|
|
test_isolates_final <<- test_isolates_final
|
|
|
|
test_isolates_final$fullname <- as.factor(test_isolates_final$fullname)
|
|
test_isolates_final$date <- ymd(test_isolates_final$date)
|
|
|
|
|
|
#Calculate pathogen incidence and susceptible by regimen
|
|
mo_name_total <- test_isolates_final %>% #deduplicate pathogen list
|
|
filter(!duplicated(mo)) %>%
|
|
select(mo,fullname)
|
|
|
|
organism_count_total <- test_isolates_final %>% #count the number of occurrences of pathogens within first isolates set
|
|
group_by(mo) %>%
|
|
distinct(date,patient,.keep_all = TRUE) %>%
|
|
count(mo) %>%
|
|
ungroup() %>%
|
|
arrange(desc(n))
|
|
|
|
organism_count_total_upd <- organism_count_total %>%
|
|
mutate(mo = as.character(mo)) %>%
|
|
mutate(mo = ifelse(str_detect(mo, "^B_KLBSL"),"B_KLBSL",mo)) %>%
|
|
mutate(mo = as.factor(mo))
|
|
klebsiella_combine <- organism_count_total_upd %>%
|
|
filter(mo == "B_KLBSL") %>%
|
|
summarise(n_kleb = sum(n))
|
|
organism_count_total_upd <- organism_count_total_upd %>%
|
|
mutate(n = ifelse(mo == "B_KLBSL", klebsiella_combine$n_kleb, n)) %>%
|
|
distinct(mo,.keep_all = TRUE)
|
|
organism_count_total_upd <- rename(organism_count_total_upd,
|
|
"n_inc" = "n")
|
|
|
|
regimen_organism <- slice_max(organism_count_total_upd, order_by = n_inc, n = pathogen_in) #choose only the top n number of occurring pathogens to consider in WISCA
|
|
total_pathogens <- sum(regimen_organism$n_inc) #total number of occurrences of pathogens
|
|
print(paste0("Total pathogens = ",sum(organism_count_total_upd$n_inc)))
|
|
print(paste0("Number of pathogens contributing to WISCA = ", total_pathogens))
|
|
print(paste0("Percentage of pathogens contributing to WISCA = ",round((total_pathogens/sum(organism_count_total_upd$n_inc))*100,0),"%"))
|
|
|
|
test_isolates_final <- test_isolates_final %>%
|
|
mutate(mo = as.character(mo)) %>%
|
|
mutate(mo = ifelse(str_detect(mo, "^B_KLBSL"), "B_KLBSL",mo)) %>% #combine all Klebsiella species together
|
|
mutate(mo = as.factor(mo)) %>%
|
|
mutate(fullname = as.character(fullname)) %>%
|
|
mutate(fullname = ifelse(str_detect(fullname, "Klebsiella"), "Klebsiella sp.", fullname)) %>% #rename all Klebsiella species
|
|
mutate(fullname = as.factor(fullname))
|
|
|
|
|
|
#for each antimicrobial regimen calculate the pathogen incidence and sensitivity
|
|
params_set_out <- data.frame()
|
|
|
|
if (analysis %in% names(test_isolates_final)){
|
|
|
|
#if there is a facility/ward/category level analysis required
|
|
for (j in 1:length(unique(test_isolates_final[[analysis]]))){#for every facility/ward/category
|
|
infection_episodes <- length(unique(test_isolates_final[test_isolates_final[[analysis]] == unique(test_isolates_final[[analysis]])[j],]$patient))
|
|
|
|
if (infection_episodes < 100){#must have at least 100 infection episodes for WISCA
|
|
warning(paste0("Number of infection episodes = ",infection_episodes,". Minimum sample size for WISCA is 100 infection episodes. Below minimum sample size will produce inaccurate coverage estimates."))
|
|
|
|
} else {
|
|
print(paste0("Number of infection episodes = ",infection_episodes," > 100 minimum required. Sample size adequate for WISCA analysis."))
|
|
}
|
|
|
|
|
|
y_site <- subset(test_isolates_final, test_isolates_final[[analysis]] == unique(test_isolates_final[[analysis]])[j])
|
|
|
|
mo_name_sites <- y_site %>% #deduplicate
|
|
filter(!duplicated(mo)) %>%
|
|
select(mo,fullname,all_of(analysis))
|
|
|
|
organism_count_regimen <- y_site %>% #count the number of organisms per regimen
|
|
filter(mo %in% regimen_organism$mo) %>%
|
|
group_by(mo) %>%
|
|
distinct(date, patient, .keep_all = TRUE) %>%
|
|
count(mo) %>%
|
|
ungroup() %>%
|
|
arrange(desc(n))
|
|
|
|
|
|
organism_count_df <- left_join(x = organism_count_regimen, y = mo_name_sites, by = "mo")[,c("mo",analysis,"fullname","n")]
|
|
|
|
total_sites_pathogens <- sum(organism_count_df$n)
|
|
print(paste0("Number of pathogens contributing to WISCA in ",unique(test_isolates_final[[analysis]])[j]," = ", total_sites_pathogens))
|
|
|
|
organism_count_df <- organism_count_df %>%
|
|
mutate(prop.n = n/total_sites_pathogens) #pathogen incidence = number of pathogen occurring/total pathogen occurrences included in WISCA
|
|
#
|
|
|
|
susceptible_regimen_tested <<- y_site %>% #determine sensitivity profile - assume those with NA not tested. Calculate susceptibility only on tested
|
|
filter(mo %in% regimen_organism$mo) %>%
|
|
filter(!is.na(antibiogram)) %>%
|
|
group_by(mo,keyantimicrobials) %>%
|
|
distinct(date, patient, .keep_all = TRUE) %>%
|
|
count(antibiogram) %>%
|
|
ungroup() %>%
|
|
arrange(desc(n))
|
|
susceptible_regimen_ntested <<- y_site %>% #determine sensitivity profile - assume those with NA not tested. Calculate susceptibility only on tested
|
|
filter(mo %in% regimen_organism$mo) %>%
|
|
filter(is.na(antibiogram)) %>% #Not tested
|
|
group_by(mo,keyantimicrobials) %>%
|
|
distinct(date, patient, .keep_all = TRUE) %>%
|
|
count(antibiogram) %>%
|
|
ungroup() %>%
|
|
arrange(desc(n))
|
|
|
|
organism_count_df_tested <<- susceptible_regimen_tested %>%
|
|
group_by(mo,keyantimicrobials) %>%
|
|
summarise(tested_n = sum(n), .groups = "keep") %>%
|
|
ungroup()
|
|
organism_count_df_ntested <<- susceptible_regimen_ntested %>%
|
|
group_by(mo,keyantimicrobials) %>%
|
|
summarise(ntested_n = sum(n), .groups = "keep") %>%
|
|
ungroup()
|
|
|
|
organism_count_df_combine_tested <- full_join(x = organism_count_df_tested, y = organism_count_df_ntested, by = c("mo","keyantimicrobials")) #combine tested and nontested
|
|
organism_count_df_combine_tested <- organism_count_df_combine_tested %>%
|
|
mutate(tested_n = ifelse(is.na(tested_n), 0 ,tested_n),
|
|
ntested_n = ifelse(is.na(ntested_n), 0, ntested_n))
|
|
|
|
organism_count_df_regimen_tested <- left_join(x = organism_count_df, y = organism_count_df_combine_tested[,c("mo","keyantimicrobials","tested_n")], by = "mo")
|
|
|
|
#minimum number of tested isolates per ABO should be 30 for accurate coverage estimates
|
|
condition_tested <- organism_count_df_regimen_tested %>%
|
|
filter(tested_n <30)
|
|
if (nrow(condition_tested) > 0){
|
|
warning("Number of tested isolates per regimen should exceed 30. Coverage estimates will be inaccurate for following regimens")
|
|
View(condition_tested[c("mo","n","keyantimicrobials","tested_n")])
|
|
} else {
|
|
print("Number of tested isolates per regimen exceed minimum threshold of 30.")
|
|
}
|
|
|
|
organism_tested_sensitive <- susceptible_regimen_tested %>%
|
|
filter(antibiogram == "S")
|
|
|
|
if (nrow(susceptible_regimen_tested)> 0){
|
|
sensitivity <- left_join(x = organism_count_df_regimen_tested, y = organism_tested_sensitive, by = c("mo","keyantimicrobials"))
|
|
sensitivity <- sensitivity %>%
|
|
rename("S_n" = "n.y") %>%
|
|
mutate(S_n = ifelse(is.na(S_n),0,S_n)) %>%
|
|
select(-antibiogram)
|
|
sensitivity <- sensitivity %>% #calculate proportion sensitive out of those tested and having results
|
|
mutate(prop.S = ifelse(tested_n != 0, S_n/tested_n, 0))
|
|
sensitivity$perc.S <- sensitivity$prop.S * 100
|
|
|
|
params_set_out <- rbind.data.frame(params_set_out, sensitivity, row.names = NULL)
|
|
} else {
|
|
next
|
|
}
|
|
}
|
|
|
|
params_set_out$fullname <- droplevels(params_set_out$fullname)
|
|
params_set_out[[names(test_isolates_final)[names(test_isolates_final) == analysis]]] <- as.factor(params_set_out[[names(test_isolates_final)[names(test_isolates_final) == analysis]]])
|
|
|
|
} else{
|
|
|
|
infection_episodes <- length(unique(test_isolates_final$patient))
|
|
if (infection_episodes < 100){#must have at least 100 infection episodes for WISCA
|
|
warning(paste0("Number of infection episodes = ",infection_episodes,". Minimum sample size for WISCA is 100 infection episodes. Below minimum sample size will produce inaccurate coverage estimates."))
|
|
|
|
} else {
|
|
print(paste0("Number of infection episodes = ",infection_episodes," > 100 minimum required. Sample size adequate for WISCA analysis."))
|
|
}
|
|
|
|
mo_name_sites <- test_isolates_final %>% #deduplicate
|
|
filter(!duplicated(mo)) %>%
|
|
select(mo,fullname)
|
|
|
|
organism_count_path <- test_isolates_final %>% #count the number of organisms
|
|
filter(mo %in% regimen_organism$mo) %>%
|
|
group_by(mo) %>%
|
|
distinct(date, patient, .keep_all = TRUE) %>%
|
|
count(mo) %>%
|
|
ungroup() %>%
|
|
arrange(desc(n))
|
|
|
|
organism_count_df <- left_join(x = organism_count_path, y = mo_name_sites, by = "mo")[,c("mo","fullname","n")]
|
|
|
|
total_sites_pathogens <- sum(organism_count_df$n)
|
|
|
|
|
|
|
|
organism_count_df <- organism_count_df %>%
|
|
mutate(prop.n = n/total_pathogens) #pathogen incidence = number of pathogen occurring/total pathogen occurrences included in WISCA
|
|
|
|
|
|
susceptible_regimen_tested <<- test_isolates_final %>% #determine sensitivity profile - assume those with NA not tested. Calculate susceptibility only on tested
|
|
filter(mo %in% regimen_organism$mo) %>%
|
|
filter(!is.na(antibiogram)) %>% #Tested
|
|
group_by(mo,keyantimicrobials) %>%
|
|
distinct(date, patient, .keep_all = TRUE) %>%
|
|
count(antibiogram) %>%
|
|
ungroup() %>%
|
|
arrange(desc(n))
|
|
susceptible_regimen_ntested <<- test_isolates_final %>% #determine sensitivity profile - assume those with NA not tested. Calculate susceptibility only on tested
|
|
filter(mo %in% regimen_organism$mo) %>%
|
|
filter(is.na(antibiogram)) %>% #Not tested
|
|
group_by(mo,keyantimicrobials) %>%
|
|
distinct(date, patient, .keep_all = TRUE) %>%
|
|
count(antibiogram) %>%
|
|
ungroup() %>%
|
|
arrange(desc(n))
|
|
|
|
organism_count_df_tested <<- susceptible_regimen_tested %>%
|
|
group_by(mo,keyantimicrobials) %>%
|
|
summarise(tested_n = sum(n), .groups = "keep") %>%
|
|
ungroup()
|
|
organism_count_df_ntested <<- susceptible_regimen_ntested %>%
|
|
group_by(mo,keyantimicrobials) %>%
|
|
summarise(ntested_n = sum(n), .groups = "keep") %>%
|
|
ungroup()
|
|
|
|
organism_count_df_combine_tested <- full_join(x = organism_count_df_tested, y = organism_count_df_ntested, by = c("mo","keyantimicrobials")) #combine tested and nontested
|
|
organism_count_df_combine_tested <- organism_count_df_combine_tested %>%
|
|
mutate(tested_n = ifelse(is.na(tested_n), 0 ,tested_n),
|
|
ntested_n = ifelse(is.na(ntested_n), 0, ntested_n))
|
|
|
|
organism_count_df_regimen_tested <- left_join(x = organism_count_df, y = organism_count_df_combine_tested[,c("mo","keyantimicrobials","tested_n")], by = "mo")
|
|
|
|
#minimum number of tested isolates per ABO should be 30 for accurate coverage estimates
|
|
condition_tested <- organism_count_df_regimen_tested %>%
|
|
filter(tested_n <30)
|
|
if (nrow(condition_tested) > 0){
|
|
warning("Number of tested isolates per regimen should exceed 30. Coverage estimates will be inaccurate for these regimens.")
|
|
View(condition_tested[,c("mo","n","keyantimicrobials","tested_n")])
|
|
} else {
|
|
print("Number of tested isolates per regimen exceed minimum threshold of 30.")
|
|
}
|
|
|
|
|
|
organism_tested_sensitive <- susceptible_regimen_tested %>%
|
|
filter(antibiogram == "S")
|
|
|
|
if (nrow(susceptible_regimen_tested)> 0){
|
|
sensitivity <- left_join(x = organism_count_df_regimen_tested, y = organism_tested_sensitive, by = c("mo","keyantimicrobials"))
|
|
sensitivity <- sensitivity %>%
|
|
rename("S_n" = "n.y") %>%
|
|
mutate(S_n = ifelse(is.na(S_n),0,S_n)) %>%
|
|
select(-antibiogram)
|
|
sensitivity <- sensitivity %>% #calculate proportion sensitive out of those tested and having results
|
|
mutate(prop.S = ifelse(tested_n != 0, S_n/tested_n, 0))
|
|
sensitivity$perc.S <- sensitivity$prop.S * 100
|
|
|
|
params_set_out <- rbind.data.frame(params_set_out, sensitivity, row.names = NULL)
|
|
|
|
} else {
|
|
next
|
|
}
|
|
params_set_out$fullname <- droplevels(params_set_out$fullname)
|
|
|
|
}
|
|
|
|
|
|
# if (infection_in != ""){
|
|
# params_set_out <- params_set_out %>%
|
|
# mutate(infection_type = infection_in) %>%
|
|
# mutate(infection_type_full = infection_full) %>%
|
|
# rowwise() %>%
|
|
# mutate(regimens = ifelse(str_detect(keyantimicrobials, fixed("+")),
|
|
# str_c(sapply(unlist(unique(str_split(keyantimicrobials, fixed("+")))),ab_name,USE.NAMES = FALSE), collapse = " + "),
|
|
# ab_name(keyantimicrobials))) %>%
|
|
# select(mo:prop.n, regimens,keyantimicrobials,tested_n:infection_type, infection_type_full)
|
|
#
|
|
#
|
|
# } else {
|
|
# print("here??")
|
|
# params_set_out <- params_set_out %>%
|
|
# mutate(infection_type_full = infection_full) %>%
|
|
# rowwise() %>%
|
|
# mutate(regimens = ifelse(str_detect(keyantimicrobials, fixed("+")),
|
|
# str_c(sapply(unlist(unique(str_split(keyantimicrobials, fixed("+")))),ab_name,USE.NAMES = FALSE), collapse = " + "),
|
|
# ab_name(keyantimicrobials))) %>%
|
|
# select(mo:prop.n, regimens,keyantimicrobials,tested_n:infection_type, infection_type_full)
|
|
# }
|
|
|
|
|
|
|
|
return(params_set_out)
|
|
|
|
|
|
}
|