1
0
mirror of https://github.com/msberends/AMR.git synced 2026-06-24 09:36:34 +02:00
Files
AMR/data-raw/wisca_test/parameters_amr.R
2026-06-23 01:38:13 +02:00

623 lines
32 KiB
R

# Copyright (c) [2022] [Larisse Bolton]
wisca_params <- function(x, antibiotic_in, pathogen_in, analysis, exclude, isolate_first, susceptible_I, infection_in,infection_full){
# Isolate the type of syndrome you would like to investigate
if (infection_in != ""){
x <- x %>%
filter(infection_type == infection_in)
} else {
x <- x
}
# Define antibiotic regimens under investigation from "antimicrobials" dataset
antibiotic_df <- function(abo_in){
mono_ab <- str_to_title(abo_in[!str_detect(abo_in,fixed("+"))]) #identify single antimicrobials
mono_antibiotic_x_set <- data.frame()
for (j in 1:length(mono_ab)){ #generate dataframe for single antimicrobials with abbreviations and fullnames
mono_x_name <- ab_name(mono_ab[j], only_first = TRUE)
mono_x_ab <- as.ab(mono_x_name)
mono_antibiotic <- subset(antimicrobials, antimicrobials$ab %in% mono_x_ab)[,c("ab","name")]
mono_antibiotic_x_set <- rbind.data.frame(mono_antibiotic_x_set,mono_antibiotic, row.names = NULL)
}
names(mono_antibiotic_x_set) <- c("ab1","namerx")
mono_antibiotic_x_set$ab1 <- as.character(mono_antibiotic_x_set$ab1)
comb_ab <- str_to_title(abo_in[str_detect(abo_in,fixed("+"))]) #identify combination regimens
if(length(comb_ab) > 0){
comb_antibiotic_x_total <- data.frame()
for (jj in 1:length(comb_ab)){ #generate dataframe for combination antimicrobials with abbreviations and fullnames
comb_ab_sep <- unlist(str_split(comb_ab[jj], fixed("+")))
comb_antibiotic_x_pre <- data.frame(x = rep(0, times = 2), y = rep(0, times = 2))
for (ii in 1:length(comb_ab_sep)){
comb_x_name <- ab_name(comb_ab_sep[ii], only_first = TRUE)
comb_x_ab <- as.ab(comb_x_name)
comb_antibiotic <- subset(antimicrobials, antimicrobials$ab %in% comb_x_ab)[,c("ab","name")]
comb_antibiotic_x_pre[ii,] <- comb_antibiotic
names(comb_antibiotic_x_pre) <- names(comb_antibiotic)
}
comb_antibiotic_x <- pivot_wider(comb_antibiotic_x_pre, names_from = name, values_from = ab)
comb_antibiotic_x_set <- comb_antibiotic_x %>%
mutate(namerx = str_flatten(str_c(names(comb_antibiotic_x), collapse = "+")))
new_ab <- sapply(X = as.character(1:(ncol(comb_antibiotic_x_set)-1)), FUN = function(x){str_flatten(str_c("ab",x))})
names(comb_antibiotic_x_set)[1:(ncol(comb_antibiotic_x_set)-1)] <- new_ab
names(comb_antibiotic_x_set)[ncol(comb_antibiotic_x_set)]<- "namerx"
comb_antibiotic_x_total <-rbind.data.frame(comb_antibiotic_x_total, comb_antibiotic_x_set, row.names = NULL)
}
antibiotic_x_set <- full_join(x= mono_antibiotic_x_set,y = comb_antibiotic_x_total) #generate full antibiotic dataset for analysis
antibiotic_x_set <- select(antibiotic_x_set, starts_with("ab"),starts_with("namerx"))
} else {
antibiotic_x_set <- mono_antibiotic_x_set
}
return(antibiotic_x_set)
}
antibiotic_rx <- antibiotic_df(abo_in = antibiotic_in) #full dataframe with antibiotic regimens
#if the dataset names are in different format to built-in set
if ((length(names(x)[sapply(names(x), function(x1){any(str_detect(x1,"[A-Z]$") & !(x1 %in% antimicrobials$ab) & !(x1 %in% antimicrobials$name))})])>0)){
names(x)[which(str_detect(names(x),"[a-z]+") & !str_detect(names(x),"fullname") & !str_detect(names(x),"mo"))] <- str_to_upper(names(x)[which(str_detect(names(x),"[a-z]+") & !str_detect(names(x),"fullname") & !str_detect(names(x),"mo"))])
names(x)[which(str_detect(names(x),"^ORG") & str_detect(names(x),"NAME$"))] <- "fullname"
names(x)[which(str_detect(names(x),"^ORG") & str_detect(names(x),"CODE$"))] <- "organism"
names(x)[which(str_detect(names(x),"^DATE") & str_detect(names(x),"CULTURE$") | str_detect(names(x),"^DATE") & str_detect(names(x),"SPECIMEN"))] <- "date"
names(x)[which(str_detect(names(x),"^EPISODE") | str_detect(names(x),"^PAT"))] <- "patient"
names(x)[which(str_detect(names(x),"^WARD"))] <- "ward"
names(x)[which(str_detect(names(x),"^HOSP"))] <- "hospital"
names(x)[which(str_detect(names(x),"DATE") & (str_detect(names(x),"REGISTRATION")))] <- "date"
}
#remove any duplicated variables
if (any(duplicated(names(x)))){
x <- x[,-max(which(names(x) == names(x)[which(duplicated(names(x)))]))]
} else {
x <- x
}
x <- arrange(x,"date") #arrange in increasing date
#if your dataset does not have an episode identifier
if (length(names(x)[sapply(names(x), function(x1){any(str_detect(x1,"patient"))})]) == 0){
x$patient <- seq(1,nrow(x),1)
}
#find demographic data within dataset
micro_df_ind <- which(sapply(x,function(y) any(str_detect(y,"^S$"))|any(str_detect(y,"^R$"))|any(str_detect(y,"^SENSITIVE$"))| any(str_detect(y,"^RESISTANT$"))) == TRUE)
micro_df <- names(x)[micro_df_ind]#column names for susceptibility data
demograph_df <- names(x)[which(!names(x) %in% micro_df)] #column names for demographic or organism data
#Dataset required to contain both mo and fullname
if ("mo" %in% names(x)){
# If mo included but not fullname:
x <- x %>%
mutate(mo = as.mo(mo), keep_synonyms = TRUE)
test_isolates_pre <- x %>%
mutate(across(.cols = all_of(demograph_df),~ str_to_title(.x))) %>%
mutate(mo = toupper(mo))
if ("fullname" %in% names(x)){
test_isolates <- test_isolates_pre[,c(demograph_df)]
} else {
test_isolates <- left_join_microorganisms(x = test_isolates_pre, by = "mo")[,c(demograph_df,"fullname")] #add in fullname
}
test_isolates <- test_isolates %>% #remove bugs to be excluded and final style conversions
filter(!(fullname %in% exclude)) %>%
mutate(fullname = str_to_title(fullname))
rx_abo <- x %>% #from orginal dataset
filter(mo %in% test_isolates$mo) %>% #select only those bugs that should be included
distinct(date, patient, mo, .keep_all = TRUE) #ensure that there are no duplications
rx_regs <- names(rx_abo)[!(names(rx_abo) %in% demograph_df)] #extract susceptibility data
if (any(str_detect(rx_regs,fixed("+")))){#if combinations have already been accounted for in susceptibility profiling
rx_regs_ind <- which(!(names(rx_abo) %in% demograph_df)) # extract susceptibility data
rx_regs_comb <- rx_regs[str_detect(rx_regs,fixed("+"))]# find combination regimen variables
rx_regs_comb_x_total <- data.frame()
for (jj in 1:length(rx_regs_comb)){ # for every combination regimen
rx_regs_comb_sep <- str_remove_all(unlist(str_split(rx_regs_comb[jj], fixed("+"))),"[[:punct:]]") #extract separate antimicrobials included in regimen
rx_regs_comb_pre <- data.frame(x = rep(0, times = 2), y = rep(0, times = 2))
for (ii in 1:length(rx_regs_comb_sep)){ #for every antibiotic in the combination regimen
rx_regs_comb_name <- ab_name(rx_regs_comb_sep[ii], only_first = TRUE) #extract their antibiotic name
rx_regs_comb_ab <- as.ab(rx_regs_comb_name) #convert antibiotic name to abbreviation
rx_regs_comb_antibiotic <- subset(antimicrobials, antimicrobials$ab %in% rx_regs_comb_ab)[,c("ab","name")]
rx_regs_comb_pre[ii,] <- rx_regs_comb_antibiotic
names(rx_regs_comb_pre) <- names(rx_regs_comb_antibiotic)
}
rx_regs_comb_x <- pivot_wider(rx_regs_comb_pre, names_from = name, values_from = ab)
rx_regs_comb_x_set <- rx_regs_comb_x %>%
mutate(namerx = str_flatten(str_c(names(rx_regs_comb_x), collapse = "+"))) #generate new antimicrobials table
rx_regs_new_ab <- sapply(X = as.character(1:(ncol(rx_regs_comb_x_set)-1)), FUN = function(x){str_flatten(str_c("ab",x))})
names(rx_regs_comb_x_set)[1:(ncol(rx_regs_comb_x_set)-1)] <- rx_regs_new_ab
names(rx_regs_comb_x_set)[ncol(rx_regs_comb_x_set)]<- "namerx"
rx_regs_comb_x_total <-rbind.data.frame(rx_regs_comb_x_total, rx_regs_comb_x_set, row.names = NULL)
}
rx_regs_comb_rename <- select(rx_regs_comb_x_total,starts_with("ab")) #extract all antibiotic abbreviations
#regenerate variable names for dataset
suppressWarnings({
for (k in 1:nrow(rx_regs_comb_x_total)){
rx_regs_comb_rename_2 <- unlist(rx_regs_comb_rename[k,])
rx_regs_comb_rename_vec <- str_flatten(str_c(rx_regs_comb_rename_2, collapse = "+"))
names(rx_abo)[names(rx_abo) == rx_regs_comb[k]] <- rx_regs_comb_rename_vec
}
})
names(rx_abo)[!(str_detect(names(rx_abo),fixed("+"))) & !(names(rx_abo) %in% demograph_df)] <-
as.ab(names(rx_abo)[!(str_detect(names(rx_abo),fixed("+"))) & !(names(rx_abo) %in% demograph_df)])
names(rx_abo)[which(duplicated(names(rx_abo)))] <- str_c(names(rx_abo)[which(duplicated(names(rx_abo)))],".x")
rx_abo_upd <- left_join_microorganisms(x = rx_abo, by = "mo")[,c(names(rx_abo),"fullname")]
} else {
names(rx_abo)[!(names(rx_abo) %in% demograph_df)] <- as.ab(names(rx_abo)[!(names(rx_abo) %in% demograph_df)])
names(rx_abo)[which(duplicated(names(rx_abo)))] <- str_c(names(rx_abo)[which(duplicated(names(rx_abo)))],".x")
rx_abo_upd <- left_join_microorganisms(x = rx_abo, by = "mo")[,c(names(rx_abo),"fullname")]
}
} else {
#If fullname included in dataset:
test_isolates_pre <- x %>%
mutate(across(.cols = all_of(demograph_df),~ str_to_title(.x))) %>%
filter(!(fullname %in% unique(exclude)))
print(nrow(test_isolates_pre))
test_isolates <- left_join_microorganisms(x = test_isolates_pre, by = "fullname")[,c(demograph_df,"mo")]
test_isolates$fullname <- str_to_title(test_isolates$fullname)
test_isolates$mo <- toupper(test_isolates$mo)
rx_abo <- x %>%
mutate(fullname = str_to_title(fullname)) %>%
filter(!(fullname %in% unique(exclude))) %>%
distinct(date, patient, fullname, .keep_all = TRUE)
rx_regs <- names(rx_abo)[!(names(rx_abo) %in% demograph_df)] #susceptibility data
if (any(str_detect(rx_regs,fixed("+")))){#if combinations have already been accounted for
rx_regs_ind <- which(!(names(rx_abo) %in% demograph_df))
rx_regs_comb <- rx_regs[str_detect(rx_regs,fixed("+"))]
rx_regs_comb_x_total <- data.frame()
for (jj in 1:length(rx_regs_comb)){
rx_regs_comb_sep <- str_remove_all(unlist(str_split(rx_regs_comb[jj], fixed("+"))),"[[:punct:]]")
rx_regs_comb_pre <- data.frame(x = rep(0, times = 2), y = rep(0, times = 2))
for (ii in 1:length(rx_regs_comb_sep)){
rx_regs_comb_name <- ab_name(rx_regs_comb_sep[ii], only_first = TRUE)
rx_regs_comb_ab <- as.ab(rx_regs_comb_name)
rx_regs_comb_antibiotic <- subset(antimicrobials, antimicrobials$ab %in% rx_regs_comb_ab)[,c("ab","name")]
rx_regs_comb_pre[ii,] <- rx_regs_comb_antibiotic
names(rx_regs_comb_pre) <- names(rx_regs_comb_antibiotic)
}
rx_regs_comb_x <- pivot_wider(rx_regs_comb_pre, names_from = name, values_from = ab)
rx_regs_comb_x_set <- rx_regs_comb_x %>%
mutate(namerx = str_flatten(str_c(names(rx_regs_comb_x), collapse = "+")))
rx_regs_new_ab <- sapply(X = as.character(1:(ncol(rx_regs_comb_x_set)-1)), FUN = function(x){str_flatten(str_c("ab",x))})
names(rx_regs_comb_x_set)[1:(ncol(rx_regs_comb_x_set)-1)] <- rx_regs_new_ab
names(rx_regs_comb_x_set)[ncol(rx_regs_comb_x_set)]<- "namerx"
rx_regs_comb_x_total <-rbind.data.frame(rx_regs_comb_x_total, rx_regs_comb_x_set, row.names = NULL)
}
rx_regs_comb_rename <- select(rx_regs_comb_x_total,starts_with("ab"))
suppressWarnings({
for (k in 1:nrow(rx_regs_comb_x_total)){
rx_regs_comb_rename_2 <- unlist(rx_regs_comb_rename[k,])
rx_regs_comb_rename_vec <- str_flatten(str_c(rx_regs_comb_rename_2, collapse = "+"))
names(rx_abo)[names(rx_abo) == rx_regs_comb[k]] <- rx_regs_comb_rename_vec
}
})
names(rx_abo)[!(str_detect(names(rx_abo),fixed("+"))) & !(names(rx_abo) %in% demograph_df)] <-
as.ab(names(rx_abo)[!(str_detect(names(rx_abo),fixed("+"))) & !(names(rx_abo) %in% demograph_df)])
names(rx_abo)[which(duplicated(names(rx_abo)))] <- str_c(names(rx_abo)[which(duplicated(names(rx_abo)))],".x")
rx_abo_upd <- left_join_microorganisms(x = rx_abo, by = "fullname")[,c(names(rx_abo),"mo")]
} else {
names(rx_abo)[!(names(rx_abo) %in% demograph_df)] <- as.ab(names(rx_abo)[!(names(rx_abo) %in% demograph_df)])
names(rx_abo)[which(duplicated(names(rx_abo)))] <- str_c(names(rx_abo)[which(duplicated(names(rx_abo)))],".x")
rx_abo_upd <- left_join_microorganisms(x = rx_abo, by = "fullname")[,c(names(rx_abo),"mo")]
}
}
rx_abo_upd$fullname <- str_to_title(rx_abo_upd$fullname)
#generate new dataset with demographic information and only antimicrobials under investigation
rx_1 <- select(antibiotic_rx,starts_with("ab")) #all antimicrobials
rx_df <- test_isolates
var_date <- names(rx_df)[str_detect(names(rx_df),"date")]
rx_df[[var_date]] <- ymd(rx_df[[var_date]])
var_patient <- names(rx_df)[str_detect(names(rx_df),"patient")]
rx_df[[var_patient]] <- toupper(rx_df[[var_patient]])
names_vec <- vector()
suppressWarnings({
for (k in 1:nrow(rx_1)){
rx_2 <- unlist(rx_1[k,][!is.na(str_extract(rx_1[k,],"[A-Z]+"))])
if ((length(rx_2) > 1) & !(str_flatten(str_c(rx_2, collapse = "+")) %in% names(rx_abo_upd))){ # if more than one antibiotic in regimen and susceptibility of the combination not considered
rx_df <- merge(x = rx_df, y = rx_abo_upd[,c("date","patient","mo",rx_2)], by = c("date","patient","mo")) #then bind separate antimicrobials in regimen
#if prior individual antibiotic was added, suffix will be generated
for (jj in 1:length(rx_2)){
if (any(str_detect(names(rx_df),paste0(rx_2[jj],".y")))){
rx_df[[paste0(rx_2[jj],".y")]] <- str_trim(rx_df[[paste0(rx_2[jj],".y")]])
} else {
rx_df[[rx_2[jj]]] <- str_trim(rx_df[[rx_2[jj]]])
}
}
names(rx_df)[names(rx_df) %in% rx_2] <- str_flatten(str_c(rx_2, collapse = "+")) #rename both variables as the combination
names(rx_df)[names(rx_df) %in% paste0(rx_2,".y")] <- str_flatten(str_c(rx_2, collapse = "+"))
names(rx_df)[names(rx_df) %in% paste0(rx_2,".x")] <- rx_2
names_vec[k] <- str_flatten(str_c(rx_2, collapse = "+"))
} else if ((length(rx_2) > 1) & (str_flatten(str_c(rx_2, collapse = "+")) %in% names(rx_abo_upd))){#if more than one antibiotic in regimen and susceptibility of the combination considered
rx_df <- merge(x = rx_df, y = rx_abo_upd[,c("date","patient","mo",str_flatten(str_c(rx_2, collapse = "+")))], by = c("date","patient","mo"))
rx_df[[str_flatten(str_c(rx_2, collapse = "+"))]] <- str_trim(rx_df[[str_flatten(str_c(rx_2, collapse = "+"))]])
names_vec[k] <- str_flatten(str_c(rx_2, collapse = "+"))
} else {
rx_df <- merge(x = rx_df, y = rx_abo_upd[,c("date","patient","mo",rx_2)], by = c("date","patient","mo")) #then bind separate antimicrobials in regimen
rx_df[[rx_2]] <- str_trim(rx_df[[rx_2]])
names(rx_df)[names(rx_df) %in% rx_2] <- str_flatten(str_c(rx_2, collapse = "+")) #rename both variables as the combination
names(rx_df)[names(rx_df) %in% paste0(rx_2,".y")] <- str_flatten(str_c(rx_2, collapse = "+"))
names(rx_df)[names(rx_df) %in% paste0(rx_2,".x")] <- rx_2
names_vec[k] <- rx_2
}
}
})
if (any(is.na(rx_df$mo))){
missing1 <- which(is.na(rx_df$mo))
if (rx_df[missing1,"fullname"] != "NA"){
rx_df[missing1,"mo"] <- as.mo(rx_df[missing1,]$fullname, keep_synonyms = TRUE)
} else {
rx_df <- rx_df[-missing1,]
}
}
#Generate adapted first isolate dataset on antimicrobial level
if (isolate_first == "yes"){ #if dataset has already been deduplicated for first isolates
test_isolates_first <- rx_df
} else {
test_isolates_first <- rx_df[first_isolate(x = rx_df,
method = "e",
episode_days = 14,
col_date = var_date,
col_mo = "mo") == TRUE,] #Only include isolates for microorganisms < 14 days apart
}
test_isolates_first_upd <<- test_isolates_first %>%
pivot_longer(cols = names(test_isolates_first)[which(sapply(test_isolates_first,
function(y) any(str_detect(y,"^S$"))|any(str_detect(y,"^R$"))|any(str_detect(y,"^SENSITIVE$"))| any(str_detect(y,"^RESISTANT$"))))],
names_to = "keyantimicrobials", values_to = "antibiogram") %>% #combine all regimens and antibiograms into single variables
mutate(antibiogram = ifelse(antibiogram == "I"|antibiogram == "INTERMEDIATE",susceptible_I,antibiogram)) %>% #simplify susceptibility outputs - 90-60 rule
mutate(antibiogram = ifelse(antibiogram == "SENSITIVE","S", antibiogram)) %>%
mutate(antibiogram = ifelse(antibiogram == "RESISTANT","R",antibiogram)) %>%
mutate(antibiogram = factor(antibiogram),
keyantimicrobials = as.factor(keyantimicrobials))
#recode susceptibility of regimes if not already in the dataset: if any S, then all S; if any NA and not S, then combination NA; if all R, then combination R.
print("-------------------")
print(test_isolates_first_upd)
print(names(test_isolates_first_upd))
print(duplicated(names(test_isolates_first_upd)))
print(any(duplicated(names(test_isolates_first_upd))))
print("-------------------")
if (any(duplicated(names(test_isolates_first_upd)))){
test_isolates_temp3 <- test_isolates_upd %>%
mutate(antibiogram = as.character(antibiogram)) %>%
mutate(antibiogram = replace_na(antibiogram,"U")) %>%
mutate(combiantibiogram = "A")
test_isolates_temp4 <- test_isolates_temp3 %>%
group_by(date,patient,mo,keyantimicrobials) %>%
mutate(combiantibiogram = ifelse(any(str_detect(antibiogram,"S")), "S",combiantibiogram)) %>%
mutate(combiantibiogram = ifelse(any(str_detect(antibiogram,"U")) & !any(str_detect(antibiogram,"S")), "U",combiantibiogram)) %>%
mutate(combiantibiogram = ifelse(!any(str_detect(antibiogram,"U")) & !any(str_detect(antibiogram,"S")), "R", combiantibiogram)) %>%
distinct(date,patient,mo,.keep_all = TRUE) %>%
ungroup() %>%
select(-antibiogram) %>%
rename("antibiogram" = "combiantibiogram") %>%
mutate(antibiogram = ifelse(antibiogram == "U",NA_character_, antibiogram)) %>%
mutate(antibiogram = as.factor(antibiogram))
test_isolates_temp5 <- test_isolates_upd %>% #isolate the single regimens
group_by(date,patient,mo) %>%
filter(!str_detect(keyantimicrobials,fixed("+"))) %>%
ungroup()
test_isolates_final <- bind_rows(test_isolates_temp4,test_isolates_temp5)
test_isolates_final <- test_isolates_final %>%
arrange(patient)
} else {
test_isolates_final <- test_isolates_first_upd
}
test_isolates_final <<- test_isolates_final
test_isolates_final$fullname <- as.factor(test_isolates_final$fullname)
test_isolates_final$date <- ymd(test_isolates_final$date)
#Calculate pathogen incidence and susceptible by regimen
mo_name_total <- test_isolates_final %>% #deduplicate pathogen list
filter(!duplicated(mo)) %>%
select(mo,fullname)
organism_count_total <- test_isolates_final %>% #count the number of occurrences of pathogens within first isolates set
group_by(mo) %>%
distinct(date,patient,.keep_all = TRUE) %>%
count(mo) %>%
ungroup() %>%
arrange(desc(n))
organism_count_total_upd <- organism_count_total %>%
mutate(mo = as.character(mo)) %>%
mutate(mo = ifelse(str_detect(mo, "^B_KLBSL"),"B_KLBSL",mo)) %>%
mutate(mo = as.factor(mo))
klebsiella_combine <- organism_count_total_upd %>%
filter(mo == "B_KLBSL") %>%
summarise(n_kleb = sum(n))
organism_count_total_upd <- organism_count_total_upd %>%
mutate(n = ifelse(mo == "B_KLBSL", klebsiella_combine$n_kleb, n)) %>%
distinct(mo,.keep_all = TRUE)
organism_count_total_upd <- rename(organism_count_total_upd,
"n_inc" = "n")
regimen_organism <- slice_max(organism_count_total_upd, order_by = n_inc, n = pathogen_in) #choose only the top n number of occurring pathogens to consider in WISCA
total_pathogens <- sum(regimen_organism$n_inc) #total number of occurrences of pathogens
print(paste0("Total pathogens = ",sum(organism_count_total_upd$n_inc)))
print(paste0("Number of pathogens contributing to WISCA = ", total_pathogens))
print(paste0("Percentage of pathogens contributing to WISCA = ",round((total_pathogens/sum(organism_count_total_upd$n_inc))*100,0),"%"))
test_isolates_final <- test_isolates_final %>%
mutate(mo = as.character(mo)) %>%
mutate(mo = ifelse(str_detect(mo, "^B_KLBSL"), "B_KLBSL",mo)) %>% #combine all Klebsiella species together
mutate(mo = as.factor(mo)) %>%
mutate(fullname = as.character(fullname)) %>%
mutate(fullname = ifelse(str_detect(fullname, "Klebsiella"), "Klebsiella sp.", fullname)) %>% #rename all Klebsiella species
mutate(fullname = as.factor(fullname))
#for each antimicrobial regimen calculate the pathogen incidence and sensitivity
params_set_out <- data.frame()
if (analysis %in% names(test_isolates_final)){
#if there is a facility/ward/category level analysis required
for (j in 1:length(unique(test_isolates_final[[analysis]]))){#for every facility/ward/category
infection_episodes <- length(unique(test_isolates_final[test_isolates_final[[analysis]] == unique(test_isolates_final[[analysis]])[j],]$patient))
if (infection_episodes < 100){#must have at least 100 infection episodes for WISCA
warning(paste0("Number of infection episodes = ",infection_episodes,". Minimum sample size for WISCA is 100 infection episodes. Below minimum sample size will produce inaccurate coverage estimates."))
} else {
print(paste0("Number of infection episodes = ",infection_episodes," > 100 minimum required. Sample size adequate for WISCA analysis."))
}
y_site <- subset(test_isolates_final, test_isolates_final[[analysis]] == unique(test_isolates_final[[analysis]])[j])
mo_name_sites <- y_site %>% #deduplicate
filter(!duplicated(mo)) %>%
select(mo,fullname,all_of(analysis))
organism_count_regimen <- y_site %>% #count the number of organisms per regimen
filter(mo %in% regimen_organism$mo) %>%
group_by(mo) %>%
distinct(date, patient, .keep_all = TRUE) %>%
count(mo) %>%
ungroup() %>%
arrange(desc(n))
organism_count_df <- left_join(x = organism_count_regimen, y = mo_name_sites, by = "mo")[,c("mo",analysis,"fullname","n")]
total_sites_pathogens <- sum(organism_count_df$n)
print(paste0("Number of pathogens contributing to WISCA in ",unique(test_isolates_final[[analysis]])[j]," = ", total_sites_pathogens))
organism_count_df <- organism_count_df %>%
mutate(prop.n = n/total_sites_pathogens) #pathogen incidence = number of pathogen occurring/total pathogen occurrences included in WISCA
#
susceptible_regimen_tested <<- y_site %>% #determine sensitivity profile - assume those with NA not tested. Calculate susceptibility only on tested
filter(mo %in% regimen_organism$mo) %>%
filter(!is.na(antibiogram)) %>%
group_by(mo,keyantimicrobials) %>%
distinct(date, patient, .keep_all = TRUE) %>%
count(antibiogram) %>%
ungroup() %>%
arrange(desc(n))
susceptible_regimen_ntested <<- y_site %>% #determine sensitivity profile - assume those with NA not tested. Calculate susceptibility only on tested
filter(mo %in% regimen_organism$mo) %>%
filter(is.na(antibiogram)) %>% #Not tested
group_by(mo,keyantimicrobials) %>%
distinct(date, patient, .keep_all = TRUE) %>%
count(antibiogram) %>%
ungroup() %>%
arrange(desc(n))
organism_count_df_tested <<- susceptible_regimen_tested %>%
group_by(mo,keyantimicrobials) %>%
summarise(tested_n = sum(n), .groups = "keep") %>%
ungroup()
organism_count_df_ntested <<- susceptible_regimen_ntested %>%
group_by(mo,keyantimicrobials) %>%
summarise(ntested_n = sum(n), .groups = "keep") %>%
ungroup()
organism_count_df_combine_tested <- full_join(x = organism_count_df_tested, y = organism_count_df_ntested, by = c("mo","keyantimicrobials")) #combine tested and nontested
organism_count_df_combine_tested <- organism_count_df_combine_tested %>%
mutate(tested_n = ifelse(is.na(tested_n), 0 ,tested_n),
ntested_n = ifelse(is.na(ntested_n), 0, ntested_n))
organism_count_df_regimen_tested <- left_join(x = organism_count_df, y = organism_count_df_combine_tested[,c("mo","keyantimicrobials","tested_n")], by = "mo")
#minimum number of tested isolates per ABO should be 30 for accurate coverage estimates
condition_tested <- organism_count_df_regimen_tested %>%
filter(tested_n <30)
if (nrow(condition_tested) > 0){
warning("Number of tested isolates per regimen should exceed 30. Coverage estimates will be inaccurate for following regimens")
View(condition_tested[c("mo","n","keyantimicrobials","tested_n")])
} else {
print("Number of tested isolates per regimen exceed minimum threshold of 30.")
}
organism_tested_sensitive <- susceptible_regimen_tested %>%
filter(antibiogram == "S")
if (nrow(susceptible_regimen_tested)> 0){
sensitivity <- left_join(x = organism_count_df_regimen_tested, y = organism_tested_sensitive, by = c("mo","keyantimicrobials"))
sensitivity <- sensitivity %>%
rename("S_n" = "n.y") %>%
mutate(S_n = ifelse(is.na(S_n),0,S_n)) %>%
select(-antibiogram)
sensitivity <- sensitivity %>% #calculate proportion sensitive out of those tested and having results
mutate(prop.S = ifelse(tested_n != 0, S_n/tested_n, 0))
sensitivity$perc.S <- sensitivity$prop.S * 100
params_set_out <- rbind.data.frame(params_set_out, sensitivity, row.names = NULL)
} else {
next
}
}
params_set_out$fullname <- droplevels(params_set_out$fullname)
params_set_out[[names(test_isolates_final)[names(test_isolates_final) == analysis]]] <- as.factor(params_set_out[[names(test_isolates_final)[names(test_isolates_final) == analysis]]])
} else{
infection_episodes <- length(unique(test_isolates_final$patient))
if (infection_episodes < 100){#must have at least 100 infection episodes for WISCA
warning(paste0("Number of infection episodes = ",infection_episodes,". Minimum sample size for WISCA is 100 infection episodes. Below minimum sample size will produce inaccurate coverage estimates."))
} else {
print(paste0("Number of infection episodes = ",infection_episodes," > 100 minimum required. Sample size adequate for WISCA analysis."))
}
mo_name_sites <- test_isolates_final %>% #deduplicate
filter(!duplicated(mo)) %>%
select(mo,fullname)
organism_count_path <- test_isolates_final %>% #count the number of organisms
filter(mo %in% regimen_organism$mo) %>%
group_by(mo) %>%
distinct(date, patient, .keep_all = TRUE) %>%
count(mo) %>%
ungroup() %>%
arrange(desc(n))
organism_count_df <- left_join(x = organism_count_path, y = mo_name_sites, by = "mo")[,c("mo","fullname","n")]
total_sites_pathogens <- sum(organism_count_df$n)
organism_count_df <- organism_count_df %>%
mutate(prop.n = n/total_pathogens) #pathogen incidence = number of pathogen occurring/total pathogen occurrences included in WISCA
susceptible_regimen_tested <<- test_isolates_final %>% #determine sensitivity profile - assume those with NA not tested. Calculate susceptibility only on tested
filter(mo %in% regimen_organism$mo) %>%
filter(!is.na(antibiogram)) %>% #Tested
group_by(mo,keyantimicrobials) %>%
distinct(date, patient, .keep_all = TRUE) %>%
count(antibiogram) %>%
ungroup() %>%
arrange(desc(n))
susceptible_regimen_ntested <<- test_isolates_final %>% #determine sensitivity profile - assume those with NA not tested. Calculate susceptibility only on tested
filter(mo %in% regimen_organism$mo) %>%
filter(is.na(antibiogram)) %>% #Not tested
group_by(mo,keyantimicrobials) %>%
distinct(date, patient, .keep_all = TRUE) %>%
count(antibiogram) %>%
ungroup() %>%
arrange(desc(n))
organism_count_df_tested <<- susceptible_regimen_tested %>%
group_by(mo,keyantimicrobials) %>%
summarise(tested_n = sum(n), .groups = "keep") %>%
ungroup()
organism_count_df_ntested <<- susceptible_regimen_ntested %>%
group_by(mo,keyantimicrobials) %>%
summarise(ntested_n = sum(n), .groups = "keep") %>%
ungroup()
organism_count_df_combine_tested <- full_join(x = organism_count_df_tested, y = organism_count_df_ntested, by = c("mo","keyantimicrobials")) #combine tested and nontested
organism_count_df_combine_tested <- organism_count_df_combine_tested %>%
mutate(tested_n = ifelse(is.na(tested_n), 0 ,tested_n),
ntested_n = ifelse(is.na(ntested_n), 0, ntested_n))
organism_count_df_regimen_tested <- left_join(x = organism_count_df, y = organism_count_df_combine_tested[,c("mo","keyantimicrobials","tested_n")], by = "mo")
#minimum number of tested isolates per ABO should be 30 for accurate coverage estimates
condition_tested <- organism_count_df_regimen_tested %>%
filter(tested_n <30)
if (nrow(condition_tested) > 0){
warning("Number of tested isolates per regimen should exceed 30. Coverage estimates will be inaccurate for these regimens.")
View(condition_tested[,c("mo","n","keyantimicrobials","tested_n")])
} else {
print("Number of tested isolates per regimen exceed minimum threshold of 30.")
}
organism_tested_sensitive <- susceptible_regimen_tested %>%
filter(antibiogram == "S")
if (nrow(susceptible_regimen_tested)> 0){
sensitivity <- left_join(x = organism_count_df_regimen_tested, y = organism_tested_sensitive, by = c("mo","keyantimicrobials"))
sensitivity <- sensitivity %>%
rename("S_n" = "n.y") %>%
mutate(S_n = ifelse(is.na(S_n),0,S_n)) %>%
select(-antibiogram)
sensitivity <- sensitivity %>% #calculate proportion sensitive out of those tested and having results
mutate(prop.S = ifelse(tested_n != 0, S_n/tested_n, 0))
sensitivity$perc.S <- sensitivity$prop.S * 100
params_set_out <- rbind.data.frame(params_set_out, sensitivity, row.names = NULL)
} else {
next
}
params_set_out$fullname <- droplevels(params_set_out$fullname)
}
# if (infection_in != ""){
# params_set_out <- params_set_out %>%
# mutate(infection_type = infection_in) %>%
# mutate(infection_type_full = infection_full) %>%
# rowwise() %>%
# mutate(regimens = ifelse(str_detect(keyantimicrobials, fixed("+")),
# str_c(sapply(unlist(unique(str_split(keyantimicrobials, fixed("+")))),ab_name,USE.NAMES = FALSE), collapse = " + "),
# ab_name(keyantimicrobials))) %>%
# select(mo:prop.n, regimens,keyantimicrobials,tested_n:infection_type, infection_type_full)
#
#
# } else {
# print("here??")
# params_set_out <- params_set_out %>%
# mutate(infection_type_full = infection_full) %>%
# rowwise() %>%
# mutate(regimens = ifelse(str_detect(keyantimicrobials, fixed("+")),
# str_c(sapply(unlist(unique(str_split(keyantimicrobials, fixed("+")))),ab_name,USE.NAMES = FALSE), collapse = " + "),
# ab_name(keyantimicrobials))) %>%
# select(mo:prop.n, regimens,keyantimicrobials,tested_n:infection_type, infection_type_full)
# }
return(params_set_out)
}