AMR/R/guess_ab_col.R

92 lines
3.7 KiB
R
Raw Normal View History

2019-01-03 23:56:19 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://gitlab.com/msberends/AMR #
# #
# LICENCE #
# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# This R package was created for academic research and was publicly #
# released in the hope that it will be useful, but it comes WITHOUT #
# ANY WARRANTY OR LIABILITY. #
2019-04-05 18:47:39 +02:00
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
2019-01-03 23:56:19 +01:00
# ==================================================================== #
#' Guess antibiotic column
#'
2019-01-29 00:06:50 +01:00
#' This tries to find a column name in a data set based on information from the \code{\link{antibiotics}} data set. Also supports WHONET abbreviations. You can look for an antibiotic (trade) name or abbreviation and it will search the \code{data.frame} for any column containing a name or ATC code of that antibiotic.
2019-01-03 23:56:19 +01:00
#' @param tbl a \code{data.frame}
#' @param col a character to look for
#' @param verbose a logical to indicate whether additional info should be printed
#' @importFrom dplyr %>% select filter_all any_vars
2019-04-09 14:59:17 +02:00
#' @importFrom crayon blue
2019-01-03 23:56:19 +01:00
#' @export
#' @inheritSection AMR Read more on our website!
2019-01-11 20:37:23 +01:00
#' @examples
#' df <- data.frame(amox = "S",
#' tetr = "R")
#'
#' guess_ab_col(df, "amoxicillin")
#' # [1] "amox"
2019-05-10 16:44:59 +02:00
#' guess_ab_col(df, "J01AA07") # ATC code of tetracycline
2019-01-11 20:37:23 +01:00
#' # [1] "tetr"
#'
#' guess_ab_col(df, "J01AA07", verbose = TRUE)
#' # using column `tetr` for col "J01AA07"
#' # [1] "tetr"
2019-01-29 00:06:50 +01:00
#'
#' # WHONET codes
#' df <- data.frame(AMP_ND10 = "R",
#' AMC_ED20 = "S")
#' guess_ab_col(df, "ampicillin")
#' # [1] "AMP_ND10"
#' guess_ab_col(df, "J01CR02")
#' # [1] "AMC_ED20"
2019-05-10 16:44:59 +02:00
#' guess_ab_col(df, as.ab("augmentin"))
2019-01-29 00:06:50 +01:00
#' # [1] "AMC_ED20"
2019-01-11 20:37:23 +01:00
guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) {
2019-01-03 23:56:19 +01:00
if (is.null(tbl) & is.null(col)) {
2019-01-11 20:37:23 +01:00
return(as.name("guess_ab_col"))
2019-01-03 23:56:19 +01:00
}
2019-03-28 21:33:28 +01:00
2019-01-03 23:56:19 +01:00
if (length(col) > 1) {
warning("argument 'col' has length > 1 and only the first element will be used")
col <- col[1]
}
2019-05-10 16:44:59 +02:00
col <- as.character(col)
2019-01-03 23:56:19 +01:00
if (!is.data.frame(tbl)) {
stop("`tbl` must be a data.frame")
}
2019-01-11 20:37:23 +01:00
2019-05-10 16:44:59 +02:00
if (col %in% colnames(tbl)) {
ab_result <- col
} else {
# sort colnames on length - longest first
cols <- colnames(tbl[, tbl %>% colnames() %>% nchar() %>% order() %>% rev()])
df_trans <- data.frame(cols = cols,
abs = suppressWarnings(as.ab(cols)),
stringsAsFactors = FALSE)
ab_result <- df_trans[which(df_trans$abs == as.ab(col)), "cols"]
ab_result <- ab_result[!is.na(ab_result)][1L]
2019-01-03 23:56:19 +01:00
}
2019-01-11 20:37:23 +01:00
if (length(ab_result) == 0) {
2019-01-03 23:56:19 +01:00
if (verbose == TRUE) {
2019-04-09 14:59:17 +02:00
message('No column found as input for `', col, '`.')
2019-01-03 23:56:19 +01:00
}
return(NULL)
2019-01-11 20:37:23 +01:00
} else {
if (verbose == TRUE) {
2019-05-10 16:44:59 +02:00
message(blue(paste0("NOTE: Using column `", bold(ab_result), "` as input for `", col, "`.")))
2019-01-11 20:37:23 +01:00
}
2019-05-10 16:44:59 +02:00
return(ab_result)
2019-01-03 23:56:19 +01:00
}
}