AMR/R/guess_ab_col.R

# ==================================================================== #
# TITLE                                                                #
# Antimicrobial Resistance (AMR) Analysis for R                        #
#                                                                      #
# SOURCE                                                               #
# https://github.com/msberends/AMR                                     #
#                                                                      #
# LICENCE                                                              #
# (c) 2018-2021 Berends MS, Luz CF et al.                              #
# Developed at the University of Groningen, the Netherlands, in        #
# collaboration with non-profit organisations Certe Medical            #
# Diagnostics & Advice, and University Medical Center Groningen.       # 
#                                                                      #
# This R package is free software; you can freely use and distribute   #
# it for both personal and commercial purposes under the terms of the  #
# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
# the Free Software Foundation.                                        #
# We created this package for both routine data analysis and academic  #
# research and it was publicly released in the hope that it will be    #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
#                                                                      #
# Visit our website for the full manual and a complete tutorial about  #
# how to conduct AMR analysis: https://msberends.github.io/AMR/        #
# ==================================================================== #

#' Guess antibiotic column
#'
#' This tries to find a column name in a data set based on information from the [antibiotics] data set. Also supports WHONET abbreviations.
#' @inheritSection lifecycle Stable lifecycle
#' @param x a [data.frame]
#' @param search_string a text to search `x` for, will be checked with [as.ab()] if this value is not a column in `x`
#' @param verbose a logical to indicate whether additional info should be printed
#' @details You can look for an antibiotic (trade) name or abbreviation and it will search `x` and the [antibiotics] data set for any column containing a name or code of that antibiotic. **Longer columns names take precedence over shorter column names.**
#' @return A column name of `x`, or `NULL` when no result is found.
#' @export
#' @inheritSection AMR Read more on our website!
#' @examples
#' df <- data.frame(amox = "S",
#'                  tetr = "R")
#'
#' guess_ab_col(df, "amoxicillin")
#' # [1] "amox"
#' guess_ab_col(df, "J01AA07") # ATC code of tetracycline
#' # [1] "tetr"
#'
#' guess_ab_col(df, "J01AA07", verbose = TRUE)
#' # NOTE: Using column 'tetr' as input for J01AA07 (tetracycline).
#' # [1] "tetr"
#'
#' # WHONET codes
#' df <- data.frame(AMP_ND10 = "R",
#'                  AMC_ED20 = "S")
#' guess_ab_col(df, "ampicillin")
#' # [1] "AMP_ND10"
#' guess_ab_col(df, "J01CR02")
#' # [1] "AMC_ED20"
#' guess_ab_col(df, as.ab("augmentin"))
#' # [1] "AMC_ED20"
#'
#' # Longer names take precendence:
#' df <- data.frame(AMP_ED2 = "S",
#'                  AMP_ED20 = "S")
#' guess_ab_col(df, "ampicillin")
#' # [1] "AMP_ED20"
guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE) {
  meet_criteria(x, allow_class = "data.frame", allow_NULL = TRUE)
  meet_criteria(search_string, allow_class = "character", has_length = 1, allow_NULL = TRUE)
  meet_criteria(verbose, allow_class = "logical", has_length = 1)
  
  if (is.null(x) & is.null(search_string)) {
    return(as.name("guess_ab_col"))
  }
  
  if (search_string %in% colnames(x)) {
    ab_result <- search_string
  } else {
    search_string.ab <- suppressWarnings(as.ab(search_string))
    if (search_string.ab %in% colnames(x)) {
      ab_result <- colnames(x)[colnames(x) == search_string.ab][1L]
      
    } else if (any(tolower(colnames(x)) %in% tolower(unlist(ab_property(search_string.ab, "abbreviations", language = NULL))))) {
      ab_result <- colnames(x)[tolower(colnames(x)) %in% tolower(unlist(ab_property(search_string.ab, "abbreviations", language = NULL)))][1L]
      
    } else {
      # sort colnames on length - longest first
      cols <- colnames(x[, x %pm>% colnames() %pm>% nchar() %pm>% order() %pm>% rev()])
      df_trans <- data.frame(cols = cols,
                             abs = suppressWarnings(as.ab(cols)),
                             stringsAsFactors = FALSE)
      ab_result <- df_trans[which(df_trans$abs == search_string.ab), "cols"]
      ab_result <- ab_result[!is.na(ab_result)][1L]
    }
  }
  
  if (length(ab_result) == 0) {
    if (verbose == TRUE) {
      message_("No column found as input for ", search_string,
               " (", ab_name(search_string, language = NULL, tolower = TRUE), ").",
               add_fn = font_black,
               as_note = FALSE)
    }
    return(NULL)
  } else {
    if (verbose == TRUE) {
      message_("Using column '", font_bold(ab_result), "' as input for ", search_string,
               " (", ab_name(search_string, language = NULL, tolower = TRUE), ").")
    }
    return(ab_result)
  }
}

get_column_abx <- function(x,
                           soft_dependencies = NULL,
                           hard_dependencies = NULL,
                           verbose = FALSE,
                           info = TRUE,
                           ...) {
  meet_criteria(x, allow_class = "data.frame")
  meet_criteria(soft_dependencies, allow_class = "character", allow_NULL = TRUE)
  meet_criteria(hard_dependencies, allow_class = "character", allow_NULL = TRUE)
  meet_criteria(verbose, allow_class = "logical", has_length = 1)
  meet_criteria(info, allow_class = "logical", has_length = 1)
  
  if (info == TRUE) {
    message_("Auto-guessing columns suitable for analysis", appendLF = FALSE)
  }
  
  x <- as.data.frame(x, stringsAsFactors = FALSE)
  if (NROW(x) > 10000) {
    # only test maximum of 10,000 values per column
    if (info == TRUE) {
      message_(" (using only ", font_bold("the first 10,000 rows"), ")...",
               appendLF = FALSE, 
               as_note = FALSE)
    }
    x <- x[1:10000, , drop = FALSE]
  } else if (info == TRUE) {
    message_("...", appendLF = FALSE, as_note = FALSE)
  }
  x_bak <- x
  # only check columns that are a valid AB code, ATC code, name, abbreviation or synonym,
  # or already have the <rsi> class (as.rsi) 
  # and that they have no more than 50% invalid values
  vectr_antibiotics <- unique(toupper(unlist(antibiotics[, c("ab", "atc", "name", "abbreviations", "synonyms")])))
  vectr_antibiotics <- vectr_antibiotics[!is.na(vectr_antibiotics) & nchar(vectr_antibiotics) >= 3]
  x_columns <- vapply(FUN.VALUE = character(1), colnames(x), function(col, df = x_bak) {
    if (toupper(col) %in% vectr_antibiotics || 
        is.rsi(as.data.frame(df, stringsAsFactors = FALSE)[, col, drop = TRUE]) ||
        is.rsi.eligible(as.data.frame(df, stringsAsFactors = FALSE)[, col, drop = TRUE],
                        threshold = 0.5)) {
      return(col)
    } else {
      return(NA_character_)
    }
  })
  x_columns <- x_columns[!is.na(x_columns)]
  x <- x[, x_columns, drop = FALSE] # without drop = TRUE, x will become a vector when x_columns is length 1
  
  df_trans <- data.frame(colnames = colnames(x),
                         abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)),
                         stringsAsFactors = FALSE)
  df_trans <- df_trans[!is.na(df_trans$abcode), , drop = FALSE]
  x <- as.character(df_trans$colnames)
  names(x) <- df_trans$abcode
  
  # add from self-defined dots (...):
  # such as get_column_abx(example_isolates %pm>% rename(thisone = AMX), amox = "thisone")
  dots <- list(...)
  if (length(dots) > 0) {
    newnames <- suppressWarnings(as.ab(names(dots), info = FALSE))
    if (any(is.na(newnames))) {
      warning_("Invalid antibiotic reference(s): ", toString(names(dots)[is.na(newnames)]),
               call = FALSE,
               immediate = TRUE)
    }
    # turn all NULLs to NAs
    dots <- unlist(lapply(dots, function(x) if (is.null(x)) NA else x))
    names(dots) <- newnames
    dots <- dots[!is.na(names(dots))]
    # merge, but overwrite automatically determined ones by 'dots'
    x <- c(x[!x %in% dots & !names(x) %in% names(dots)], dots)
    # delete NAs, this will make e.g. eucast_rules(... TMP = NULL) work to prevent TMP from being used
    x <- x[!is.na(x)]
  }
  
  if (length(x) == 0) {
    if (info == TRUE) {
      message_("No columns found.")
    }
    return(x)
  }
  
  # sort on name
  x <- x[order(names(x), x)]
  duplicates <- c(x[duplicated(x)], x[duplicated(names(x))]) 
  duplicates <- duplicates[unique(names(duplicates))]
  x <- c(x[!names(x) %in% names(duplicates)], duplicates)
  x <- x[order(names(x), x)]
  
  # succeeded with auto-guessing
  if (info == TRUE) {
    message_(" OK.", add_fn = list(font_green, font_bold), as_note = FALSE)
  }
  
  for (i in seq_len(length(x))) {
    if (info == TRUE & verbose == TRUE & !names(x[i]) %in% names(duplicates)) {
      message_("Using column '", font_bold(x[i]), "' as input for ", names(x)[i],
               " (", ab_name(names(x)[i], tolower = TRUE, language = NULL), ").")
    }
    if (info == TRUE & names(x[i]) %in% names(duplicates)) {
      warning_(paste0("Using column '", font_bold(x[i]), "' as input for ", names(x)[i],
                      " (", ab_name(names(x)[i], tolower = TRUE, language = NULL),
                      "), although it was matched for multiple antibiotics or columns."),
               add_fn = font_red,
               call = FALSE, 
               immediate = verbose)
    }
  }
  
  
  if (!is.null(hard_dependencies)) {
    hard_dependencies <- unique(hard_dependencies)
    if (!all(hard_dependencies %in% names(x))) {
      # missing a hard dependency will return NA and consequently the data will not be analysed
      missing <- hard_dependencies[!hard_dependencies %in% names(x)]
      generate_warning_abs_missing(missing, any = FALSE)
      return(NA)
    }
  }
  if (!is.null(soft_dependencies)) {
    soft_dependencies <- unique(soft_dependencies)
    if (info == TRUE & !all(soft_dependencies %in% names(x))) {
      # missing a soft dependency may lower the reliability
      missing <- soft_dependencies[!soft_dependencies %in% names(x)]
      missing_msg <- paste(paste0(ab_name(missing, tolower = TRUE, language = NULL), 
                                  " (", font_bold(missing, collapse = NULL), ")"), 
                           collapse = ", ")
      message_("Reliability would be improved if these antimicrobial results would be available too: ",
               missing_msg)
    }
  }
  x
}

generate_warning_abs_missing <- function(missing, any = FALSE) {
  missing <- paste0(missing, " (", ab_name(missing, tolower = TRUE, language = NULL), ")")
  if (any == TRUE) {
    any_txt <- c(" any of", "is")
  } else {
    any_txt <- c("", "are")
  }
  warning_(paste0("Introducing NAs since", any_txt[1], " these antimicrobials ", any_txt[2], " required: ",
                  paste(missing, collapse = ", ")),
           immediate = TRUE,
           call = FALSE)
}
guess_ab 2019-01-03 23:56:19 +01:00			`# ==================================================================== #`
			`# TITLE #`
(v1.4.0) matching score update 2020-10-08 11:16:03 +02:00			`# Antimicrobial Resistance (AMR) Analysis for R #`
guess_ab 2019-01-03 23:56:19 +01:00			`# #`
			`# SOURCE #`
(v1.2.0.9026) move to github 2020-07-08 14:48:06 +02:00			`# https://github.com/msberends/AMR #`
guess_ab 2019-01-03 23:56:19 +01:00			`# #`
			`# LICENCE #`
(v1.4.0.9047) unit tests 2020-12-27 00:30:28 +01:00			`# (c) 2018-2021 Berends MS, Luz CF et al. #`
(v1.4.0) matching score update 2020-10-08 11:16:03 +02:00			`# Developed at the University of Groningen, the Netherlands, in #`
			`# collaboration with non-profit organisations Certe Medical #`
			`# Diagnostics & Advice, and University Medical Center Groningen. #`
guess_ab 2019-01-03 23:56:19 +01:00			`# #`
			`# This R package is free software; you can freely use and distribute #`
			`# it for both personal and commercial purposes under the terms of the #`
			`# GNU General Public License version 2.0 (GNU GPL-2), as published by #`
			`# the Free Software Foundation. #`
(v0.9.0.9008) Happy new year! Add lifecycles 2020-01-05 17:22:09 +01:00			`# We created this package for both routine data analysis and academic #`
			`# research and it was publicly released in the hope that it will be #`
			`# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #`
(v1.4.0) matching score update 2020-10-08 11:16:03 +02:00			`# #`
			`# Visit our website for the full manual and a complete tutorial about #`
			`# how to conduct AMR analysis: https://msberends.github.io/AMR/ #`
guess_ab 2019-01-03 23:56:19 +01:00			`# ==================================================================== #`

			`#' Guess antibiotic column`
			`#'`
(v0.8.0.9036) complete documentation rewrite 2019-11-28 22:32:17 +01:00			`#' This tries to find a column name in a data set based on information from the [antibiotics] data set. Also supports WHONET abbreviations.`
(v1.3.0.9039) lifecycle updates, added excess kurtosis 2020-10-04 21:02:16 +02:00			`#' @inheritSection lifecycle Stable lifecycle`
(v1.3.0.9022) mo_matching_score(), poorman update, as.rsi() fix 2020-09-18 16:05:53 +02:00			`#' @param x a [data.frame]`
(v0.8.0.9036) complete documentation rewrite 2019-11-28 22:32:17 +01:00			#' @param search_string a text to search `x` for, will be checked with [as.ab()] if this value is not a column in `x`
guess_ab 2019-01-03 23:56:19 +01:00			`#' @param verbose a logical to indicate whether additional info should be printed`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			#' @details You can look for an antibiotic (trade) name or abbreviation and it will search `x` and the [antibiotics] data set for any column containing a name or code of that antibiotic. Longer columns names take precedence over shorter column names.
(v0.8.0.9036) complete documentation rewrite 2019-11-28 22:32:17 +01:00			#' @return A column name of `x`, or `NULL` when no result is found.
guess_ab 2019-01-03 23:56:19 +01:00			`#' @export`
			`#' @inheritSection AMR Read more on our website!`
guess_ab_col, benchmarks 2019-01-11 20:37:23 +01:00			`#' @examples`
			`#' df <- data.frame(amox = "S",`
			`#' tetr = "R")`
			`#'`
			`#' guess_ab_col(df, "amoxicillin")`
			`#' # [1] "amox"`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' guess_ab_col(df, "J01AA07") # ATC code of tetracycline`
guess_ab_col, benchmarks 2019-01-11 20:37:23 +01:00			`#' # [1] "tetr"`
			`#'`
			`#' guess_ab_col(df, "J01AA07", verbose = TRUE)`
(v1.4.0.9030) as.mo() fix for known lab codes 2020-12-03 16:59:04 +01:00			`#' # NOTE: Using column 'tetr' as input for J01AA07 (tetracycline).`
guess_ab_col, benchmarks 2019-01-11 20:37:23 +01:00			`#' # [1] "tetr"`
WHONET/EARS-Net support 2019-01-29 00:06:50 +01:00			`#'`
			`#' # WHONET codes`
			`#' df <- data.frame(AMP_ND10 = "R",`
			`#' AMC_ED20 = "S")`
			`#' guess_ab_col(df, "ampicillin")`
			`#' # [1] "AMP_ND10"`
			`#' guess_ab_col(df, "J01CR02")`
			`#' # [1] "AMC_ED20"`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' guess_ab_col(df, as.ab("augmentin"))`
WHONET/EARS-Net support 2019-01-29 00:06:50 +01:00			`#' # [1] "AMC_ED20"`
(v0.6.1.9045) age test fix 2019-05-31 14:40:15 +02:00			`#'`
			`#' # Longer names take precendence:`
			`#' df <- data.frame(AMP_ED2 = "S",`
			`#' AMP_ED20 = "S")`
			`#' guess_ab_col(df, "ampicillin")`
			`#' # [1] "AMP_ED20"`
CI tests 2019-05-13 10:10:16 +02:00			`guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE) {`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`meet_criteria(x, allow_class = "data.frame", allow_NULL = TRUE)`
			`meet_criteria(search_string, allow_class = "character", has_length = 1, allow_NULL = TRUE)`
			`meet_criteria(verbose, allow_class = "logical", has_length = 1)`

CI tests 2019-05-13 10:10:16 +02:00			`if (is.null(x) & is.null(search_string)) {`
guess_ab_col, benchmarks 2019-01-11 20:37:23 +01:00			`return(as.name("guess_ab_col"))`
guess_ab 2019-01-03 23:56:19 +01:00			`}`
(v1.2.0.9034) code cleaning 2020-07-13 09:17:24 +02:00
CI tests 2019-05-13 10:10:16 +02:00			`if (search_string %in% colnames(x)) {`
			`ab_result <- search_string`
new antibiotics 2019-05-10 16:44:59 +02:00			`} else {`
documentation fix 2019-05-13 20:16:51 +02:00			`search_string.ab <- suppressWarnings(as.ab(search_string))`
			`if (search_string.ab %in% colnames(x)) {`
			`ab_result <- colnames(x)[colnames(x) == search_string.ab][1L]`
(v1.2.0.9034) code cleaning 2020-07-13 09:17:24 +02:00
(v1.1.0.9010) lose dependencies 2020-05-18 11:09:02 +02:00			`} else if (any(tolower(colnames(x)) %in% tolower(unlist(ab_property(search_string.ab, "abbreviations", language = NULL))))) {`
			`ab_result <- colnames(x)[tolower(colnames(x)) %in% tolower(unlist(ab_property(search_string.ab, "abbreviations", language = NULL)))][1L]`
(v1.2.0.9034) code cleaning 2020-07-13 09:17:24 +02:00
documentation fix 2019-05-13 20:16:51 +02:00			`} else {`
			`# sort colnames on length - longest first`
(v1.3.0.9022) mo_matching_score(), poorman update, as.rsi() fix 2020-09-18 16:05:53 +02:00			`cols <- colnames(x[, x %pm>% colnames() %pm>% nchar() %pm>% order() %pm>% rev()])`
documentation fix 2019-05-13 20:16:51 +02:00			`df_trans <- data.frame(cols = cols,`
			`abs = suppressWarnings(as.ab(cols)),`
			`stringsAsFactors = FALSE)`
			`ab_result <- df_trans[which(df_trans$abs == search_string.ab), "cols"]`
			`ab_result <- ab_result[!is.na(ab_result)][1L]`
			`}`
guess_ab 2019-01-03 23:56:19 +01:00			`}`
(v1.2.0.9034) code cleaning 2020-07-13 09:17:24 +02:00
guess_ab_col, benchmarks 2019-01-11 20:37:23 +01:00			`if (length(ab_result) == 0) {`
guess_ab 2019-01-03 23:56:19 +01:00			`if (verbose == TRUE) {`
(v1.4.0.9030) as.mo() fix for known lab codes 2020-12-03 16:59:04 +01:00			`message_("No column found as input for ", search_string,`
			`" (", ab_name(search_string, language = NULL, tolower = TRUE), ").",`
(v1.4.0.9011) message formatting 2020-10-27 15:56:51 +01:00			`add_fn = font_black,`
			`as_note = FALSE)`
guess_ab 2019-01-03 23:56:19 +01:00			`}`
			`return(NULL)`
guess_ab_col, benchmarks 2019-01-11 20:37:23 +01:00			`} else {`
			`if (verbose == TRUE) {`
(v1.4.0.9030) as.mo() fix for known lab codes 2020-12-03 16:59:04 +01:00			`message_("Using column '", font_bold(ab_result), "' as input for ", search_string,`
			`" (", ab_name(search_string, language = NULL, tolower = TRUE), ").")`
guess_ab_col, benchmarks 2019-01-11 20:37:23 +01:00			`}`
new antibiotics 2019-05-10 16:44:59 +02:00			`return(ab_result)`
guess_ab 2019-01-03 23:56:19 +01:00			`}`
			`}`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00
			`get_column_abx <- function(x,`
			`soft_dependencies = NULL,`
			`hard_dependencies = NULL,`
			`verbose = FALSE,`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`info = TRUE,`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`...) {`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`meet_criteria(x, allow_class = "data.frame")`
			`meet_criteria(soft_dependencies, allow_class = "character", allow_NULL = TRUE)`
			`meet_criteria(hard_dependencies, allow_class = "character", allow_NULL = TRUE)`
			`meet_criteria(verbose, allow_class = "logical", has_length = 1)`
			`meet_criteria(info, allow_class = "logical", has_length = 1)`
(v1.2.0.9034) code cleaning 2020-07-13 09:17:24 +02:00
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`if (info == TRUE) {`
(v1.4.0.9011) message formatting 2020-10-27 15:56:51 +01:00			`message_("Auto-guessing columns suitable for analysis", appendLF = FALSE)`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`}`
Update guess_ab_col.R 2019-10-08 10:02:19 +02:00
			`x <- as.data.frame(x, stringsAsFactors = FALSE)`
(v1.2.0.9004) rsi_df() fix for groups 2020-06-09 16:18:03 +02:00			`if (NROW(x) > 10000) {`
			`# only test maximum of 10,000 values per column`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`if (info == TRUE) {`
(v1.4.0.9011) message formatting 2020-10-27 15:56:51 +01:00			`message_(" (using only ", font_bold("the first 10,000 rows"), ")...",`
			`appendLF = FALSE,`
			`as_note = FALSE)`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`}`
(v1.2.0.9004) rsi_df() fix for groups 2020-06-09 16:18:03 +02:00			`x <- x[1:10000, , drop = FALSE]`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`} else if (info == TRUE) {`
(v1.4.0.9011) message formatting 2020-10-27 15:56:51 +01:00			`message_("...", appendLF = FALSE, as_note = FALSE)`
(v1.2.0.9004) rsi_df() fix for groups 2020-06-09 16:18:03 +02:00			`}`
(v0.7.1.9094) get_column_abx() improvement 2019-10-06 21:44:08 +02:00			`x_bak <- x`
Update guess_ab_col.R 2019-10-08 10:02:19 +02:00			`# only check columns that are a valid AB code, ATC code, name, abbreviation or synonym,`
(v1.4.0.9052) replaced all sapply's with type-safe vapply's 2020-12-28 22:24:33 +01:00			`# or already have the <rsi> class (as.rsi)`
			`# and that they have no more than 50% invalid values`
(v0.9.0.9023) EUCAST 2020 guidelines 2020-02-14 19:54:13 +01:00			`vectr_antibiotics <- unique(toupper(unlist(antibiotics[, c("ab", "atc", "name", "abbreviations", "synonyms")])))`
Update guess_ab_col.R 2019-10-08 10:02:19 +02:00			`vectr_antibiotics <- vectr_antibiotics[!is.na(vectr_antibiotics) & nchar(vectr_antibiotics) >= 3]`
(v1.4.0.9052) replaced all sapply's with type-safe vapply's 2020-12-28 22:24:33 +01:00			`x_columns <- vapply(FUN.VALUE = character(1), colnames(x), function(col, df = x_bak) {`
			`if (toupper(col) %in% vectr_antibiotics \|\|`
			`is.rsi(as.data.frame(df, stringsAsFactors = FALSE)[, col, drop = TRUE]) \|\|`
(v1.4.0.9017) stringsAsFactors definitions 2020-11-11 16:49:27 +01:00			`is.rsi.eligible(as.data.frame(df, stringsAsFactors = FALSE)[, col, drop = TRUE],`
			`threshold = 0.5)) {`
Update guess_ab_col.R 2019-10-08 10:02:19 +02:00			`return(col)`
			`} else {`
			`return(NA_character_)`
			`}`
			`})`
(v0.7.1.9100) bug_drug speed, MIC levels 2019-10-08 22:21:33 +02:00			`x_columns <- x_columns[!is.na(x_columns)]`
			`x <- x[, x_columns, drop = FALSE] # without drop = TRUE, x will become a vector when x_columns is length 1`
(v1.2.0.9034) code cleaning 2020-07-13 09:17:24 +02:00
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`df_trans <- data.frame(colnames = colnames(x),`
(v1.4.0.9017) stringsAsFactors definitions 2020-11-11 16:49:27 +01:00			`abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)),`
			`stringsAsFactors = FALSE)`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`df_trans <- df_trans[!is.na(df_trans$abcode), , drop = FALSE]`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`x <- as.character(df_trans$colnames)`
			`names(x) <- df_trans$abcode`
(v1.2.0.9034) code cleaning 2020-07-13 09:17:24 +02:00
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`# add from self-defined dots (...):`
(v1.3.0.9022) mo_matching_score(), poorman update, as.rsi() fix 2020-09-18 16:05:53 +02:00			`# such as get_column_abx(example_isolates %pm>% rename(thisone = AMX), amox = "thisone")`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`dots <- list(...)`
			`if (length(dots) > 0) {`
(v1.3.0.9002) intrinsic_resistant data set 2020-08-14 13:36:10 +02:00			`newnames <- suppressWarnings(as.ab(names(dots), info = FALSE))`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`if (any(is.na(newnames))) {`
(v1.4.0.9015) bugfix 2020-11-10 16:35:56 +01:00			`warning_("Invalid antibiotic reference(s): ", toString(names(dots)[is.na(newnames)]),`
			`call = FALSE,`
			`immediate = TRUE)`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`}`
			`# turn all NULLs to NAs`
			`dots <- unlist(lapply(dots, function(x) if (is.null(x)) NA else x))`
			`names(dots) <- newnames`
			`dots <- dots[!is.na(names(dots))]`
			`# merge, but overwrite automatically determined ones by 'dots'`
			`x <- c(x[!x %in% dots & !names(x) %in% names(dots)], dots)`
			`# delete NAs, this will make e.g. eucast_rules(... TMP = NULL) work to prevent TMP from being used`
			`x <- x[!is.na(x)]`
			`}`
(v1.2.0.9034) code cleaning 2020-07-13 09:17:24 +02:00
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`if (length(x) == 0) {`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`if (info == TRUE) {`
(v1.4.0.9011) message formatting 2020-10-27 15:56:51 +01:00			`message_("No columns found.")`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`}`
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`return(x)`
			`}`

(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`# sort on name`
(v0.7.1.9057) get_column_abx() improvement 2019-08-15 17:09:27 +02:00			`x <- x[order(names(x), x)]`
(v1.3.0.9014) as.mo() speed improvement 2020-09-03 12:31:48 +02:00			`duplicates <- c(x[duplicated(x)], x[duplicated(names(x))])`
(v0.8.0.9004) added MDR guideline by Magiorakos et al. 2019-10-26 21:56:41 +02:00			`duplicates <- duplicates[unique(names(duplicates))]`
			`x <- c(x[!names(x) %in% names(duplicates)], duplicates)`
			`x <- x[order(names(x), x)]`
(v0.7.1.9057) get_column_abx() improvement 2019-08-15 17:09:27 +02:00
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`# succeeded with auto-guessing`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`if (info == TRUE) {`
(v1.4.0.9017) stringsAsFactors definitions 2020-11-11 16:49:27 +01:00			`message_(" OK.", add_fn = list(font_green, font_bold), as_note = FALSE)`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`}`
(v1.2.0.9034) code cleaning 2020-07-13 09:17:24 +02:00
(v0.8.0.9004) added MDR guideline by Magiorakos et al. 2019-10-26 21:56:41 +02:00			`for (i in seq_len(length(x))) {`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`if (info == TRUE & verbose == TRUE & !names(x[i]) %in% names(duplicates)) {`
(v1.4.0.9030) as.mo() fix for known lab codes 2020-12-03 16:59:04 +01:00			`message_("Using column '", font_bold(x[i]), "' as input for ", names(x)[i],`
			`" (", ab_name(names(x)[i], tolower = TRUE, language = NULL), ").")`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`}`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`if (info == TRUE & names(x[i]) %in% names(duplicates)) {`
(v1.4.0.9030) as.mo() fix for known lab codes 2020-12-03 16:59:04 +01:00			`warning_(paste0("Using column '", font_bold(x[i]), "' as input for ", names(x)[i],`
			`" (", ab_name(names(x)[i], tolower = TRUE, language = NULL),`
(v1.4.0.9015) bugfix 2020-11-10 16:35:56 +01:00			`"), although it was matched for multiple antibiotics or columns."),`
			`add_fn = font_red,`
			`call = FALSE,`
			`immediate = verbose)`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`}`
			`}`
(v0.8.0.9004) added MDR guideline by Magiorakos et al. 2019-10-26 21:56:41 +02:00

(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`if (!is.null(hard_dependencies)) {`
(v0.8.0.9004) added MDR guideline by Magiorakos et al. 2019-10-26 21:56:41 +02:00			`hard_dependencies <- unique(hard_dependencies)`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`if (!all(hard_dependencies %in% names(x))) {`
			`# missing a hard dependency will return NA and consequently the data will not be analysed`
			`missing <- hard_dependencies[!hard_dependencies %in% names(x)]`
			`generate_warning_abs_missing(missing, any = FALSE)`
			`return(NA)`
			`}`
			`}`
			`if (!is.null(soft_dependencies)) {`
(v0.8.0.9004) added MDR guideline by Magiorakos et al. 2019-10-26 21:56:41 +02:00			`soft_dependencies <- unique(soft_dependencies)`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`if (info == TRUE & !all(soft_dependencies %in% names(x))) {`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`# missing a soft dependency may lower the reliability`
			`missing <- soft_dependencies[!soft_dependencies %in% names(x)]`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`missing_msg <- paste(paste0(ab_name(missing, tolower = TRUE, language = NULL),`
(v1.4.0.9011) message formatting 2020-10-27 15:56:51 +01:00			`" (", font_bold(missing, collapse = NULL), ")"),`
(v1.2.0.9034) code cleaning 2020-07-13 09:17:24 +02:00			`collapse = ", ")`
(v1.4.0.9011) message formatting 2020-10-27 15:56:51 +01:00			`message_("Reliability would be improved if these antimicrobial results would be available too: ",`
			`missing_msg)`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`}`
			`}`
			`x`
			`}`

			`generate_warning_abs_missing <- function(missing, any = FALSE) {`
(v1.1.0.9010) lose dependencies 2020-05-18 11:09:02 +02:00			`missing <- paste0(missing, " (", ab_name(missing, tolower = TRUE, language = NULL), ")")`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`if (any == TRUE) {`
			`any_txt <- c(" any of", "is")`
			`} else {`
			`any_txt <- c("", "are")`
			`}`
(v1.4.0.9015) bugfix 2020-11-10 16:35:56 +01:00			`warning_(paste0("Introducing NAs since", any_txt[1], " these antimicrobials ", any_txt[2], " required: ",`
			`paste(missing, collapse = ", ")),`
			`immediate = TRUE,`
			`call = FALSE)`
(v0.7.1.9004) atc class removal 2019-06-27 11:57:45 +02:00			`}`