AMR/R/random.R

# ==================================================================== #
# TITLE                                                                #
# AMR: An R Package for Working with Antimicrobial Resistance Data     #
#                                                                      #
# SOURCE                                                               #
# https://github.com/msberends/AMR                                     #
#                                                                      #
# CITE AS                                                              #
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C    #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance  #
# Data. Journal of Statistical Software, 104(3), 1-31.                 #
# doi:10.18637/jss.v104.i03                                            #
#                                                                      #
# Developed at the University of Groningen and the University Medical  #
# Center Groningen in The Netherlands, in collaboration with many      #
# colleagues from around the world, see our website.                   #
#                                                                      #
# This R package is free software; you can freely use and distribute   #
# it for both personal and commercial purposes under the terms of the  #
# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
# the Free Software Foundation.                                        #
# We created this package for both routine data analysis and academic  #
# research and it was publicly released in the hope that it will be    #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
#                                                                      #
# Visit our website for the full manual and a complete tutorial about  #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/   #
# ==================================================================== #

#' Random MIC Values/Disk Zones/SIR Generation
#'
#' These functions can be used for generating random MIC values and disk diffusion diameters, for AMR data analysis practice. By providing a microorganism and antimicrobial drug, the generated results will reflect reality as much as possible.
#' @param size desired size of the returned vector. If used in a [data.frame] call or `dplyr` verb, will get the current (group) size if left blank.
#' @param mo any [character] that can be coerced to a valid microorganism code with [as.mo()]
#' @param ab any [character] that can be coerced to a valid antimicrobial drug code with [as.ab()]
#' @param prob_SIR a vector of length 3: the probabilities for "S" (1st value), "I" (2nd value) and "R" (3rd value)
#' @param ... ignored, only in place to allow future extensions
#' @details The base \R function [sample()] is used for generating values.
#'
#' Generated values are based on the EUCAST `r max(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "EUCAST")$guideline)))` guideline as implemented in the [clinical_breakpoints] data set. To create specific generated values per bug or drug, set the `mo` and/or `ab` argument.
#' @return class `mic` for [random_mic()] (see [as.mic()]) and class `disk` for [random_disk()] (see [as.disk()])
#' @name random
#' @rdname random
#' @export
#' @examples
#' random_mic(25)
#' random_disk(25)
#' random_sir(25)
#'
#' \donttest{
#' # make the random generation more realistic by setting a bug and/or drug:
#' random_mic(25, "Klebsiella pneumoniae") # range 0.0625-64
#' random_mic(25, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16
#' random_mic(25, "Streptococcus pneumoniae", "meropenem") # range 0.0625-4
#'
#' random_disk(25, "Klebsiella pneumoniae") # range 8-50
#' random_disk(25, "Klebsiella pneumoniae", "ampicillin") # range 11-17
#' random_disk(25, "Streptococcus pneumoniae", "ampicillin") # range 12-27
#' }
random_mic <- function(size = NULL, mo = NULL, ab = NULL, ...) {
  meet_criteria(size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE, allow_NULL = TRUE)
  meet_criteria(mo, allow_class = "character", has_length = 1, allow_NULL = TRUE)
  meet_criteria(ab, allow_class = "character", has_length = 1, allow_NULL = TRUE)
  if (is.null(size)) {
    size <- NROW(get_current_data(arg_name = "size", call = -3))
  }
  random_exec("MIC", size = size, mo = mo, ab = ab)
}

#' @rdname random
#' @export
random_disk <- function(size = NULL, mo = NULL, ab = NULL, ...) {
  meet_criteria(size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE, allow_NULL = TRUE)
  meet_criteria(mo, allow_class = "character", has_length = 1, allow_NULL = TRUE)
  meet_criteria(ab, allow_class = "character", has_length = 1, allow_NULL = TRUE)
  if (is.null(size)) {
    size <- NROW(get_current_data(arg_name = "size", call = -3))
  }
  random_exec("DISK", size = size, mo = mo, ab = ab)
}

#' @rdname random
#' @export
random_sir <- function(size = NULL, prob_SIR = c(0.33, 0.33, 0.33), ...) {
  meet_criteria(size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE, allow_NULL = TRUE)
  meet_criteria(prob_SIR, allow_class = c("numeric", "integer"), has_length = 3)
  if (is.null(size)) {
    size <- NROW(get_current_data(arg_name = "size", call = -3))
  }
  sample(as.sir(c("S", "I", "R")), size = size, replace = TRUE, prob = prob_SIR)
}

random_exec <- function(type, size, mo = NULL, ab = NULL) {
  df <- clinical_breakpoints %pm>%
    pm_filter(guideline %like% "EUCAST") %pm>%
    pm_arrange(pm_desc(guideline)) %pm>%
    subset(guideline == max(guideline) &
      method == type)

  if (!is.null(mo)) {
    mo_coerced <- as.mo(mo)
    mo_include <- c(
      mo_coerced,
      as.mo(mo_genus(mo_coerced)),
      as.mo(mo_family(mo_coerced)),
      as.mo(mo_order(mo_coerced))
    )
    df_new <- df %pm>%
      subset(mo %in% mo_include)
    if (nrow(df_new) > 0) {
      df <- df_new
    } else {
      warning_("in `random_", tolower(type), "()`: no rows found that match mo '", mo, "', ignoring argument `mo`")
    }
  }

  if (!is.null(ab)) {
    ab_coerced <- as.ab(ab)
    df_new <- df %pm>%
      subset(ab %in% ab_coerced)
    if (nrow(df_new) > 0) {
      df <- df_new
    } else {
      warning_("in `random_", tolower(type), "()`: no rows found that match ab '", ab, "', ignoring argument `ab`")
    }
  }

  if (type == "MIC") {
    # set range
    mic_range <- c(0.001, 0.002, 0.005, 0.010, 0.025, 0.0625, 0.125, 0.250, 0.5, 1, 2, 4, 8, 16, 32, 64, 128, 256)

    # get highest/lowest +/- random 1 to 3 higher factors of two
    max_range <- mic_range[min(
      length(mic_range),
      which(mic_range == max(df$breakpoint_R)) + sample(c(1:3), 1)
    )]
    min_range <- mic_range[max(
      1,
      which(mic_range == min(df$breakpoint_S)) - sample(c(1:3), 1)
    )]

    mic_range_new <- mic_range[mic_range <= max_range & mic_range >= min_range]
    if (length(mic_range_new) == 0) {
      mic_range_new <- mic_range
    }
    out <- as.mic(sample(mic_range_new, size = size, replace = TRUE))
    # 50% chance that lowest will get <= and highest will get >=
    if (stats::runif(1) > 0.5) {
      out[out == min(out)] <- paste0("<=", out[out == min(out)])
    }
    if (stats::runif(1) > 0.5) {
      out[out == max(out)] <- paste0(">=", out[out == max(out)])
    }
    return(out)
  } else if (type == "DISK") {
    set_range <- seq(
      from = as.integer(min(df$breakpoint_R) / 1.25),
      to = as.integer(max(df$breakpoint_S) * 1.25),
      by = 1
    )
    out <- sample(set_range, size = size, replace = TRUE)
    out[out < 6] <- sample(c(6:10), length(out[out < 6]), replace = TRUE)
    out[out > 50] <- sample(c(40:50), length(out[out > 50]), replace = TRUE)
    return(as.disk(out))
  }
}
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`# ==================================================================== #`
			`# TITLE #`
New mo algorithm, prepare for 2.0 2022-10-05 09:12:22 +02:00			`# AMR: An R Package for Working with Antimicrobial Resistance Data #`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`# #`
			`# SOURCE #`
			`# https://github.com/msberends/AMR #`
			`# #`
New mo algorithm, prepare for 2.0 2022-10-05 09:12:22 +02:00			`# CITE AS #`
			`# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #`
			`# (2022). AMR: An R Package for Working with Antimicrobial Resistance #`
			`# Data. Journal of Statistical Software, 104(3), 1-31. #`
			`# doi:10.18637/jss.v104.i03 #`
			`# #`
support new mo codes 2022-12-27 15:16:15 +01:00			`# Developed at the University of Groningen and the University Medical #`
			`# Center Groningen in The Netherlands, in collaboration with many #`
			`# colleagues from around the world, see our website. #`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`# #`
			`# This R package is free software; you can freely use and distribute #`
			`# it for both personal and commercial purposes under the terms of the #`
			`# GNU General Public License version 2.0 (GNU GPL-2), as published by #`
			`# the Free Software Foundation. #`
			`# We created this package for both routine data analysis and academic #`
			`# research and it was publicly released in the hope that it will be #`
			`# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #`
			`# #`
			`# Visit our website for the full manual and a complete tutorial about #`
(v1.5.0.9014) only_rsi_columns, is.rsi.eligible improvement 2021-02-02 23:57:35 +01:00			`# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`# ==================================================================== #`

Replace RSI with SIR 2023-01-21 23:47:20 +01:00			`#' Random MIC Values/Disk Zones/SIR Generation`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`#'`
cleanup and new intro logo 2022-11-13 13:44:25 +01:00			`#' These functions can be used for generating random MIC values and disk diffusion diameters, for AMR data analysis practice. By providing a microorganism and antimicrobial drug, the generated results will reflect reality as much as possible.`
(v1.7.1.9022) rely on vctrs for ab selectors 2021-07-23 21:42:11 +02:00			#' @param size desired size of the returned vector. If used in a [data.frame] call or `dplyr` verb, will get the current (group) size if left blank.
(v1.6.0.9021) join functions update 2021-05-12 18:15:03 +02:00			`#' @param mo any [character] that can be coerced to a valid microorganism code with [as.mo()]`
cleanup and new intro logo 2022-11-13 13:44:25 +01:00			`#' @param ab any [character] that can be coerced to a valid antimicrobial drug code with [as.ab()]`
Replace RSI with SIR 2023-01-21 23:47:20 +01:00			`#' @param prob_SIR a vector of length 3: the probabilities for "S" (1st value), "I" (2nd value) and "R" (3rd value)`
(v1.6.0.9013) website update 2021-04-29 17:16:30 +02:00			`#' @param ... ignored, only in place to allow future extensions`
(v1.7.1.9005) ab class selectors for R-3.0 and R-3.1 2021-06-22 12:16:42 +02:00			`#' @details The base \R function [sample()] is used for generating values.`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`#'`
Replace RSI with SIR 2023-01-21 23:47:20 +01:00			#' Generated values are based on the EUCAST `r max(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "EUCAST")$guideline)))` guideline as implemented in the [clinical_breakpoints] data set. To create specific generated values per bug or drug, set the `mo` and/or `ab` argument.
remove warnings from unit tests 2022-10-19 11:47:57 +02:00			#' @return class `mic` for [random_mic()] (see [as.mic()]) and class `disk` for [random_disk()] (see [as.disk()])
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`#' @name random`
			`#' @rdname random`
			`#' @export`
			`#' @examples`
new, automated website 2022-08-21 16:37:20 +02:00			`#' random_mic(25)`
			`#' random_disk(25)`
Replace RSI with SIR 2023-01-21 23:47:20 +01:00			`#' random_sir(25)`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`#'`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`#' \donttest{`
			`#' # make the random generation more realistic by setting a bug and/or drug:`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`#' random_mic(25, "Klebsiella pneumoniae") # range 0.0625-64`
			`#' random_mic(25, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16`
new, automated website 2022-08-21 16:37:20 +02:00			`#' random_mic(25, "Streptococcus pneumoniae", "meropenem") # range 0.0625-4`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`#'`
			`#' random_disk(25, "Klebsiella pneumoniae") # range 8-50`
			`#' random_disk(25, "Klebsiella pneumoniae", "ampicillin") # range 11-17`
new, automated website 2022-08-21 16:37:20 +02:00			`#' random_disk(25, "Streptococcus pneumoniae", "ampicillin") # range 12-27`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`#' }`
(v1.7.1.9022) rely on vctrs for ab selectors 2021-07-23 21:42:11 +02:00			`random_mic <- function(size = NULL, mo = NULL, ab = NULL, ...) {`
			`meet_criteria(size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE, allow_NULL = TRUE)`
(v1.7.1.9005) ab class selectors for R-3.0 and R-3.1 2021-06-22 12:16:42 +02:00			`meet_criteria(mo, allow_class = "character", has_length = 1, allow_NULL = TRUE)`
			`meet_criteria(ab, allow_class = "character", has_length = 1, allow_NULL = TRUE)`
(v1.7.1.9022) rely on vctrs for ab selectors 2021-07-23 21:42:11 +02:00			`if (is.null(size)) {`
			`size <- NROW(get_current_data(arg_name = "size", call = -3))`
			`}`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`random_exec("MIC", size = size, mo = mo, ab = ab)`
			`}`

			`#' @rdname random`
			`#' @export`
(v1.7.1.9022) rely on vctrs for ab selectors 2021-07-23 21:42:11 +02:00			`random_disk <- function(size = NULL, mo = NULL, ab = NULL, ...) {`
			`meet_criteria(size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE, allow_NULL = TRUE)`
(v1.7.1.9005) ab class selectors for R-3.0 and R-3.1 2021-06-22 12:16:42 +02:00			`meet_criteria(mo, allow_class = "character", has_length = 1, allow_NULL = TRUE)`
			`meet_criteria(ab, allow_class = "character", has_length = 1, allow_NULL = TRUE)`
(v1.7.1.9022) rely on vctrs for ab selectors 2021-07-23 21:42:11 +02:00			`if (is.null(size)) {`
			`size <- NROW(get_current_data(arg_name = "size", call = -3))`
			`}`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`random_exec("DISK", size = size, mo = mo, ab = ab)`
			`}`

			`#' @rdname random`
			`#' @export`
Replace RSI with SIR 2023-01-21 23:47:20 +01:00			`random_sir <- function(size = NULL, prob_SIR = c(0.33, 0.33, 0.33), ...) {`
(v1.7.1.9022) rely on vctrs for ab selectors 2021-07-23 21:42:11 +02:00			`meet_criteria(size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE, allow_NULL = TRUE)`
Replace RSI with SIR 2023-01-21 23:47:20 +01:00			`meet_criteria(prob_SIR, allow_class = c("numeric", "integer"), has_length = 3)`
(v1.7.1.9022) rely on vctrs for ab selectors 2021-07-23 21:42:11 +02:00			`if (is.null(size)) {`
			`size <- NROW(get_current_data(arg_name = "size", call = -3))`
			`}`
Replace RSI with SIR 2023-01-21 23:47:20 +01:00			`sample(as.sir(c("S", "I", "R")), size = size, replace = TRUE, prob = prob_SIR)`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`}`

			`random_exec <- function(type, size, mo = NULL, ab = NULL) {`
Replace RSI with SIR 2023-01-21 23:47:20 +01:00			`df <- clinical_breakpoints %pm>%`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`pm_filter(guideline %like% "EUCAST") %pm>%`
			`pm_arrange(pm_desc(guideline)) %pm>%`
			`subset(guideline == max(guideline) &`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`method == type)`

(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`if (!is.null(mo)) {`
			`mo_coerced <- as.mo(mo)`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`mo_include <- c(`
			`mo_coerced,`
			`as.mo(mo_genus(mo_coerced)),`
			`as.mo(mo_family(mo_coerced)),`
			`as.mo(mo_order(mo_coerced))`
			`)`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`df_new <- df %pm>%`
			`subset(mo %in% mo_include)`
			`if (nrow(df_new) > 0) {`
			`df <- df_new`
			`} else {`
(v1.8.0.9002) as.rsi() cleanup, more informative warnings 2022-03-02 15:38:55 +01:00			warning_("in `random_", tolower(type), "()`: no rows found that match mo '", mo, "', ignoring argument `mo`")
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`}`
			`}`
styled, unit test fix 2022-08-28 10:31:50 +02:00
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`if (!is.null(ab)) {`
			`ab_coerced <- as.ab(ab)`
			`df_new <- df %pm>%`
			`subset(ab %in% ab_coerced)`
			`if (nrow(df_new) > 0) {`
			`df <- df_new`
			`} else {`
(v1.8.0.9002) as.rsi() cleanup, more informative warnings 2022-03-02 15:38:55 +01:00			warning_("in `random_", tolower(type), "()`: no rows found that match ab '", ab, "', ignoring argument `ab`")
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`}`
			`}`
styled, unit test fix 2022-08-28 10:31:50 +02:00
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`if (type == "MIC") {`
(v1.7.1.9022) rely on vctrs for ab selectors 2021-07-23 21:42:11 +02:00			`# set range`
			`mic_range <- c(0.001, 0.002, 0.005, 0.010, 0.025, 0.0625, 0.125, 0.250, 0.5, 1, 2, 4, 8, 16, 32, 64, 128, 256)`

			`# get highest/lowest +/- random 1 to 3 higher factors of two`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`max_range <- mic_range[min(`
			`length(mic_range),`
			`which(mic_range == max(df$breakpoint_R)) + sample(c(1:3), 1)`
			`)]`
			`min_range <- mic_range[max(`
			`1,`
			`which(mic_range == min(df$breakpoint_S)) - sample(c(1:3), 1)`
			`)]`
(v1.7.1.9022) rely on vctrs for ab selectors 2021-07-23 21:42:11 +02:00
			`mic_range_new <- mic_range[mic_range <= max_range & mic_range >= min_range]`
			`if (length(mic_range_new) == 0) {`
			`mic_range_new <- mic_range`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`}`
(v1.7.1.9022) rely on vctrs for ab selectors 2021-07-23 21:42:11 +02:00			`out <- as.mic(sample(mic_range_new, size = size, replace = TRUE))`
(v1.5.0.9032) All group generics for MICs 2021-03-07 13:52:39 +01:00			`# 50% chance that lowest will get <= and highest will get >=`
(v1.5.0.9034) unit test fix 2021-03-07 21:16:45 +01:00			`if (stats::runif(1) > 0.5) {`
(v1.5.0.9032) All group generics for MICs 2021-03-07 13:52:39 +01:00			`out[out == min(out)] <- paste0("<=", out[out == min(out)])`
			`}`
(v1.5.0.9034) unit test fix 2021-03-07 21:16:45 +01:00			`if (stats::runif(1) > 0.5) {`
(v1.5.0.9032) All group generics for MICs 2021-03-07 13:52:39 +01:00			`out[out == max(out)] <- paste0(">=", out[out == max(out)])`
			`}`
			`return(out)`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`} else if (type == "DISK") {`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`set_range <- seq(`
			`from = as.integer(min(df$breakpoint_R) / 1.25),`
			`to = as.integer(max(df$breakpoint_S) * 1.25),`
			`by = 1`
			`)`
(v1.4.0.9037) random_* functions 2020-12-12 23:17:29 +01:00			`out <- sample(set_range, size = size, replace = TRUE)`
			`out[out < 6] <- sample(c(6:10), length(out[out < 6]), replace = TRUE)`
			`out[out > 50] <- sample(c(40:50), length(out[out > 50]), replace = TRUE)`
			`return(as.disk(out))`
			`}`
			`}`