2020-12-12 23:17:29 +01:00
# ==================================================================== #
2023-07-08 17:30:05 +02:00
# TITLE: #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2020-12-12 23:17:29 +01:00
# #
2023-07-08 17:30:05 +02:00
# SOURCE CODE: #
2020-12-12 23:17:29 +01:00
# https://github.com/msberends/AMR #
# #
2023-07-08 17:30:05 +02:00
# PLEASE CITE THIS SOFTWARE AS: #
2022-10-05 09:12:22 +02:00
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
# Data. Journal of Statistical Software, 104(3), 1-31. #
2023-05-27 10:39:22 +02:00
# https://doi.org/10.18637/jss.v104.i03 #
2022-10-05 09:12:22 +02:00
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2020-12-12 23:17:29 +01:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
2021-02-02 23:57:35 +01:00
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2020-12-12 23:17:29 +01:00
# ==================================================================== #
2023-01-21 23:47:20 +01:00
#' Random MIC Values/Disk Zones/SIR Generation
2020-12-12 23:17:29 +01:00
#'
2022-11-13 13:44:25 +01:00
#' These functions can be used for generating random MIC values and disk diffusion diameters, for AMR data analysis practice. By providing a microorganism and antimicrobial drug, the generated results will reflect reality as much as possible.
2021-07-23 21:42:11 +02:00
#' @param size desired size of the returned vector. If used in a [data.frame] call or `dplyr` verb, will get the current (group) size if left blank.
2021-05-12 18:15:03 +02:00
#' @param mo any [character] that can be coerced to a valid microorganism code with [as.mo()]
2022-11-13 13:44:25 +01:00
#' @param ab any [character] that can be coerced to a valid antimicrobial drug code with [as.ab()]
2023-01-21 23:47:20 +01:00
#' @param prob_SIR a vector of length 3: the probabilities for "S" (1st value), "I" (2nd value) and "R" (3rd value)
2021-04-29 17:16:30 +02:00
#' @param ... ignored, only in place to allow future extensions
2021-06-22 12:16:42 +02:00
#' @details The base \R function [sample()] is used for generating values.
2022-08-28 10:31:50 +02:00
#'
2023-01-21 23:47:20 +01:00
#' Generated values are based on the EUCAST `r max(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "EUCAST")$guideline)))` guideline as implemented in the [clinical_breakpoints] data set. To create specific generated values per bug or drug, set the `mo` and/or `ab` argument.
2022-10-19 11:47:57 +02:00
#' @return class `mic` for [random_mic()] (see [as.mic()]) and class `disk` for [random_disk()] (see [as.disk()])
2020-12-12 23:17:29 +01:00
#' @name random
#' @rdname random
#' @export
#' @examples
2022-08-21 16:37:20 +02:00
#' random_mic(25)
#' random_disk(25)
2023-01-21 23:47:20 +01:00
#' random_sir(25)
2022-08-28 10:31:50 +02:00
#'
2020-12-12 23:17:29 +01:00
#' \donttest{
#' # make the random generation more realistic by setting a bug and/or drug:
2022-08-28 10:31:50 +02:00
#' random_mic(25, "Klebsiella pneumoniae") # range 0.0625-64
#' random_mic(25, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16
2022-08-21 16:37:20 +02:00
#' random_mic(25, "Streptococcus pneumoniae", "meropenem") # range 0.0625-4
2022-08-28 10:31:50 +02:00
#'
#' random_disk(25, "Klebsiella pneumoniae") # range 8-50
#' random_disk(25, "Klebsiella pneumoniae", "ampicillin") # range 11-17
2022-08-21 16:37:20 +02:00
#' random_disk(25, "Streptococcus pneumoniae", "ampicillin") # range 12-27
2020-12-12 23:17:29 +01:00
#' }
2021-07-23 21:42:11 +02:00
random_mic <- function ( size = NULL , mo = NULL , ab = NULL , ... ) {
meet_criteria ( size , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE , allow_NULL = TRUE )
2021-06-22 12:16:42 +02:00
meet_criteria ( mo , allow_class = " character" , has_length = 1 , allow_NULL = TRUE )
meet_criteria ( ab , allow_class = " character" , has_length = 1 , allow_NULL = TRUE )
2021-07-23 21:42:11 +02:00
if ( is.null ( size ) ) {
size <- NROW ( get_current_data ( arg_name = " size" , call = -3 ) )
}
2020-12-12 23:17:29 +01:00
random_exec ( " MIC" , size = size , mo = mo , ab = ab )
}
#' @rdname random
#' @export
2021-07-23 21:42:11 +02:00
random_disk <- function ( size = NULL , mo = NULL , ab = NULL , ... ) {
meet_criteria ( size , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE , allow_NULL = TRUE )
2021-06-22 12:16:42 +02:00
meet_criteria ( mo , allow_class = " character" , has_length = 1 , allow_NULL = TRUE )
meet_criteria ( ab , allow_class = " character" , has_length = 1 , allow_NULL = TRUE )
2021-07-23 21:42:11 +02:00
if ( is.null ( size ) ) {
size <- NROW ( get_current_data ( arg_name = " size" , call = -3 ) )
}
2020-12-12 23:17:29 +01:00
random_exec ( " DISK" , size = size , mo = mo , ab = ab )
}
#' @rdname random
#' @export
2023-01-21 23:47:20 +01:00
random_sir <- function ( size = NULL , prob_SIR = c ( 0.33 , 0.33 , 0.33 ) , ... ) {
2021-07-23 21:42:11 +02:00
meet_criteria ( size , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE , allow_NULL = TRUE )
2023-03-11 14:24:34 +01:00
if ( " prob_RSI" %in% names ( list ( ... ) ) ) {
deprecation_warning ( " prob_RSI" , " prob_SIR" , is_function = FALSE )
prob_SIR <- list ( ... ) $ prob_RSI
}
2023-01-21 23:47:20 +01:00
meet_criteria ( prob_SIR , allow_class = c ( " numeric" , " integer" ) , has_length = 3 )
2021-07-23 21:42:11 +02:00
if ( is.null ( size ) ) {
size <- NROW ( get_current_data ( arg_name = " size" , call = -3 ) )
}
2023-01-21 23:47:20 +01:00
sample ( as.sir ( c ( " S" , " I" , " R" ) ) , size = size , replace = TRUE , prob = prob_SIR )
2020-12-12 23:17:29 +01:00
}
2023-07-08 21:00:49 +02:00
random_exec <- function ( method_type , size , mo = NULL , ab = NULL ) {
2023-03-11 14:24:34 +01:00
df <- AMR :: clinical_breakpoints %pm>%
2023-02-09 13:07:39 +01:00
pm_filter ( guideline %like% " EUCAST" ) %pm>%
pm_arrange ( pm_desc ( guideline ) ) %pm>%
subset ( guideline == max ( guideline ) &
2023-07-08 21:00:49 +02:00
method == method_type &
type == " human" )
2020-12-12 23:17:29 +01:00
if ( ! is.null ( mo ) ) {
mo_coerced <- as.mo ( mo )
2022-08-28 10:31:50 +02:00
mo_include <- c (
mo_coerced ,
as.mo ( mo_genus ( mo_coerced ) ) ,
as.mo ( mo_family ( mo_coerced ) ) ,
as.mo ( mo_order ( mo_coerced ) )
)
2023-02-09 13:07:39 +01:00
df_new <- df %pm>%
2020-12-12 23:17:29 +01:00
subset ( mo %in% mo_include )
if ( nrow ( df_new ) > 0 ) {
df <- df_new
} else {
2023-07-08 21:00:49 +02:00
warning_ ( " in `random_" , tolower ( method_type ) , " ()`: no rows found that match mo '" , mo , " ', ignoring argument `mo`" )
2020-12-12 23:17:29 +01:00
}
}
2022-08-28 10:31:50 +02:00
2020-12-12 23:17:29 +01:00
if ( ! is.null ( ab ) ) {
ab_coerced <- as.ab ( ab )
2023-02-09 13:07:39 +01:00
df_new <- df %pm>%
2020-12-12 23:17:29 +01:00
subset ( ab %in% ab_coerced )
if ( nrow ( df_new ) > 0 ) {
df <- df_new
} else {
2023-07-08 21:00:49 +02:00
warning_ ( " in `random_" , tolower ( method_type ) , " ()`: no rows found that match ab '" , ab , " ' (" , ab_name ( ab_coerced , tolower = TRUE , language = NULL ) , " ), ignoring argument `ab`" )
2020-12-12 23:17:29 +01:00
}
}
2022-08-28 10:31:50 +02:00
2023-07-08 21:00:49 +02:00
if ( method_type == " MIC" ) {
2021-07-23 21:42:11 +02:00
# set range
mic_range <- c ( 0.001 , 0.002 , 0.005 , 0.010 , 0.025 , 0.0625 , 0.125 , 0.250 , 0.5 , 1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 , 256 )
# get highest/lowest +/- random 1 to 3 higher factors of two
2022-08-28 10:31:50 +02:00
max_range <- mic_range [min (
length ( mic_range ) ,
2023-06-22 15:10:59 +02:00
which ( mic_range == max ( df $ breakpoint_R , na.rm = TRUE ) ) + sample ( c ( 1 : 3 ) , 1 )
2022-08-28 10:31:50 +02:00
) ]
min_range <- mic_range [max (
1 ,
2023-06-22 15:10:59 +02:00
which ( mic_range == min ( df $ breakpoint_S , na.rm = TRUE ) ) - sample ( c ( 1 : 3 ) , 1 )
2022-08-28 10:31:50 +02:00
) ]
2021-07-23 21:42:11 +02:00
mic_range_new <- mic_range [mic_range <= max_range & mic_range >= min_range ]
if ( length ( mic_range_new ) == 0 ) {
mic_range_new <- mic_range
2020-12-12 23:17:29 +01:00
}
2021-07-23 21:42:11 +02:00
out <- as.mic ( sample ( mic_range_new , size = size , replace = TRUE ) )
2021-03-07 13:52:39 +01:00
# 50% chance that lowest will get <= and highest will get >=
2021-03-07 21:16:45 +01:00
if ( stats :: runif ( 1 ) > 0.5 ) {
2021-03-07 13:52:39 +01:00
out [out == min ( out ) ] <- paste0 ( " <=" , out [out == min ( out ) ] )
}
2021-03-07 21:16:45 +01:00
if ( stats :: runif ( 1 ) > 0.5 ) {
2021-03-07 13:52:39 +01:00
out [out == max ( out ) ] <- paste0 ( " >=" , out [out == max ( out ) ] )
}
return ( out )
2023-07-08 21:00:49 +02:00
} else if ( method_type == " DISK" ) {
2022-08-28 10:31:50 +02:00
set_range <- seq (
2023-06-22 15:10:59 +02:00
from = as.integer ( min ( df $ breakpoint_R , na.rm = TRUE ) / 1.25 ) ,
to = as.integer ( max ( df $ breakpoint_S , na.rm = TRUE ) * 1.25 ) ,
2022-08-28 10:31:50 +02:00
by = 1
)
2020-12-12 23:17:29 +01:00
out <- sample ( set_range , size = size , replace = TRUE )
out [out < 6 ] <- sample ( c ( 6 : 10 ) , length ( out [out < 6 ] ) , replace = TRUE )
out [out > 50 ] <- sample ( c ( 40 : 50 ) , length ( out [out > 50 ] ) , replace = TRUE )
return ( as.disk ( out ) )
}
}