# ==================================================================== # # TITLE # # Antimicrobial Resistance (AMR) Analysis # # # # SOURCE # # https://gitlab.com/msberends/AMR # # # # LICENCE # # (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # # GNU General Public License version 2.0 (GNU GPL-2), as published by # # the Free Software Foundation. # # # # This R package was created for academic research and was publicly # # released in the hope that it will be useful, but it comes WITHOUT # # ANY WARRANTY OR LIABILITY. # # Visit our website for more info: https://msberends.gitlab.io/AMR. # # ==================================================================== # #' Guess antibiotic column #' #' This tries to find a column name in a data set based on information from the \code{\link{antibiotics}} data set. Also supports WHONET abbreviations. You can look for an antibiotic (trade) name or abbreviation and it will search the \code{data.frame} for any column containing a name or ATC code of that antibiotic. #' @param tbl a \code{data.frame} #' @param col a character to look for #' @param verbose a logical to indicate whether additional info should be printed #' @importFrom dplyr %>% select filter_all any_vars #' @importFrom crayon blue #' @export #' @inheritSection AMR Read more on our website! #' @examples #' df <- data.frame(amox = "S", #' tetr = "R") #' #' guess_ab_col(df, "amoxicillin") #' # [1] "amox" #' guess_ab_col(df, "J01AA07") # ATC code of Tetracycline #' # [1] "tetr" #' #' guess_ab_col(df, "J01AA07", verbose = TRUE) #' # using column `tetr` for col "J01AA07" #' # [1] "tetr" #' #' # WHONET codes #' df <- data.frame(AMP_ND10 = "R", #' AMC_ED20 = "S") #' guess_ab_col(df, "ampicillin") #' # [1] "AMP_ND10" #' guess_ab_col(df, "J01CR02") #' # [1] "AMC_ED20" #' guess_ab_col(df, as.atc("augmentin")) #' # [1] "AMC_ED20" guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) { if (is.null(tbl) & is.null(col)) { return(as.name("guess_ab_col")) } if (length(col) > 1) { warning("argument 'col' has length > 1 and only the first element will be used") col <- col[1] } if (!is.data.frame(tbl)) { stop("`tbl` must be a data.frame") } tbl_names <- colnames(tbl) tbl_names_stripped <- colnames(tbl) %>% strsplit("_") %>% lapply(function(x) {x[1]}) %>% unlist() if (col %in% tbl_names) { if (verbose == TRUE) { message(blue(paste0("NOTE: Using column `", bold(col), "` as input for `", col, "`."))) } return(col) } ab_result <- antibiotics %>% select(atc:trade_name) %>% filter_all(any_vars(tolower(.) == tolower(col))) %>% filter_all(any_vars(. %in% tbl_names)) if (nrow(ab_result) == 0 & nchar(col) >= 5) { # use like when col >= 5 characters ab_result <- antibiotics %>% select(atc:trade_name) %>% filter_all(any_vars(tolower(.) %like% tolower(col))) %>% filter_all(any_vars(. %in% tbl_names)) } # WHONET if (nrow(ab_result) == 0) { # use like for any case ab_result <- antibiotics %>% select(atc:trade_name) %>% filter_all(any_vars(tolower(.) == tolower(col))) %>% filter_all(any_vars(. %in% tbl_names_stripped)) } found_based_on_official_name <- FALSE if (nrow(ab_result) == 0) { # check if first part of official name resembles the columns that's been looking for name <- suppressWarnings(atc_name(col)) if (!is.null(name)) { ab_result <- antibiotics %>% filter(official == name) %>% pull(official) ab_result <- tbl_names[tbl_names %like% paste0("^", substr(ab_result, 1, 5))] found_based_on_official_name <- TRUE } } if (NROW(ab_result) > 1 & found_based_on_official_name == FALSE) { # looking more and more for reliable hit ab_result_1 <- ab_result %>% filter(tolower(atc) == tolower(col)) if (nrow(ab_result_1) == 0) { ab_result_1 <- ab_result %>% filter(tolower(certe) == tolower(col)) } if (nrow(ab_result_1) == 0) { ab_result_1 <- ab_result %>% filter(tolower(umcg) == tolower(col)) } if (nrow(ab_result_1) == 0) { ab_result_1 <- ab_result %>% filter(tolower(official) == tolower(col)) } if (nrow(ab_result_1) == 0) { ab_result_1 <- ab_result %>% filter(tolower(official) == tolower(col)) } if (nrow(ab_result_1) == 0) { ab_result_1 <- ab_result[1, ] } ab_result <- ab_result_1 } if (length(ab_result) == 0) { if (verbose == TRUE) { message('No column found as input for `', col, '`.') } return(NULL) } else { result <- tbl_names[tbl_names %in% ab_result] if (length(result) == 0) { result <- tbl_names[tbl_names_stripped %in% ab_result] } if (length(result) == 0 | length(result) > 1) { if (verbose == TRUE) { message('No column found as input for `', col, '`.') } return(NULL) } if (verbose == TRUE) { message(blue(paste0("NOTE: Using column `", bold(result), "` as input for `", col, "`."))) } return(result) } }