2020-12-12 23:17:29 +01:00
# ==================================================================== #
# TITLE #
2021-02-02 23:57:35 +01:00
# Antimicrobial Resistance (AMR) Data Analysis for R #
2020-12-12 23:17:29 +01:00
# #
# SOURCE #
# https://github.com/msberends/AMR #
# #
# LICENCE #
2020-12-27 00:30:28 +01:00
# (c) 2018-2021 Berends MS, Luz CF et al. #
2020-12-12 23:17:29 +01:00
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
2021-02-02 23:57:35 +01:00
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2020-12-12 23:17:29 +01:00
# ==================================================================== #
2021-01-18 16:57:56 +01:00
#' Random MIC Values/Disk Zones/RSI Generation
2020-12-12 23:17:29 +01:00
#'
2021-02-25 10:33:08 +01:00
#' These functions can be used for generating random MIC values and disk diffusion diameters, for AMR data analysis practice. By providing a microorganism and antimicrobial agent, the generated results will reflect reality as much as possible.
2021-03-07 13:52:39 +01:00
#' @inheritSection lifecycle Stable Lifecycle
2020-12-12 23:17:29 +01:00
#' @param size desired size of the returned vector
#' @param mo any character that can be coerced to a valid microorganism code with [as.mo()]
#' @param ab any character that can be coerced to a valid antimicrobial agent code with [as.ab()]
#' @param prob_RSI a vector of length 3: the probabilities for R (1st value), S (2nd value) and I (3rd value)
2021-04-29 17:16:30 +02:00
#' @param ... ignored, only in place to allow future extensions
2020-12-12 23:17:29 +01:00
#' @details The base R function [sample()] is used for generating values.
#'
2020-12-22 00:51:17 +01:00
#' Generated values are based on the latest EUCAST guideline implemented in the [rsi_translation] data set. To create specific generated values per bug or drug, set the `mo` and/or `ab` argument.
2020-12-12 23:17:29 +01:00
#' @return class `<mic>` for [random_mic()] (see [as.mic()]) and class `<disk>` for [random_disk()] (see [as.disk()])
#' @name random
#' @rdname random
#' @export
2021-01-18 16:57:56 +01:00
#' @inheritSection AMR Read more on Our Website!
2020-12-12 23:17:29 +01:00
#' @examples
#' random_mic(100)
#' random_disk(100)
#' random_rsi(100)
#'
#' \donttest{
#' # make the random generation more realistic by setting a bug and/or drug:
#' random_mic(100, "Klebsiella pneumoniae") # range 0.0625-64
#' random_mic(100, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16
#' random_mic(100, "Streptococcus pneumoniae", "meropenem") # range 0.0625-4
#'
2021-03-05 15:36:39 +01:00
#' random_disk(100, "Klebsiella pneumoniae") # range 8-50
#' random_disk(100, "Klebsiella pneumoniae", "ampicillin") # range 11-17
#' random_disk(100, "Streptococcus pneumoniae", "ampicillin") # range 12-27
2020-12-12 23:17:29 +01:00
#' }
random_mic <- function ( size , mo = NULL , ab = NULL , ... ) {
random_exec ( " MIC" , size = size , mo = mo , ab = ab )
}
#' @rdname random
#' @export
random_disk <- function ( size , mo = NULL , ab = NULL , ... ) {
random_exec ( " DISK" , size = size , mo = mo , ab = ab )
}
#' @rdname random
#' @export
random_rsi <- function ( size , prob_RSI = c ( 0.33 , 0.33 , 0.33 ) , ... ) {
sample ( as.rsi ( c ( " R" , " S" , " I" ) ) , size = size , replace = TRUE , prob = prob_RSI )
}
random_exec <- function ( type , size , mo = NULL , ab = NULL ) {
df <- rsi_translation %pm>%
pm_filter ( guideline %like% " EUCAST" ) %pm>%
pm_arrange ( pm_desc ( guideline ) ) %pm>%
subset ( guideline == max ( guideline ) &
method == type )
if ( ! is.null ( mo ) ) {
mo_coerced <- as.mo ( mo )
mo_include <- c ( mo_coerced ,
as.mo ( mo_genus ( mo_coerced ) ) ,
as.mo ( mo_family ( mo_coerced ) ) ,
as.mo ( mo_order ( mo_coerced ) ) )
df_new <- df %pm>%
subset ( mo %in% mo_include )
if ( nrow ( df_new ) > 0 ) {
df <- df_new
} else {
2020-12-22 00:51:17 +01:00
warning_ ( " No rows found that match mo '" , mo , " ', ignoring argument `mo`" , call = FALSE )
2020-12-12 23:17:29 +01:00
}
}
if ( ! is.null ( ab ) ) {
ab_coerced <- as.ab ( ab )
df_new <- df %pm>%
subset ( ab %in% ab_coerced )
if ( nrow ( df_new ) > 0 ) {
df <- df_new
} else {
2020-12-22 00:51:17 +01:00
warning_ ( " No rows found that match ab '" , ab , " ', ignoring argument `ab`" , call = FALSE )
2020-12-12 23:17:29 +01:00
}
}
if ( type == " MIC" ) {
# all valid MIC levels
valid_range <- as.mic ( levels ( as.mic ( 1 ) ) )
set_range_max <- max ( df $ breakpoint_R )
if ( log ( set_range_max , 2 ) %% 1 == 0 ) {
# return powers of 2
valid_range <- unique ( as.double ( valid_range ) )
2021-02-25 10:33:08 +01:00
# add 1-3 higher MIC levels to set_range_max
set_range_max <- 2 ^ ( log ( set_range_max , 2 ) + sample ( c ( 1 : 3 ) , 1 ) )
2020-12-12 23:17:29 +01:00
set_range <- as.mic ( valid_range [log ( valid_range , 2 ) %% 1 == 0 & valid_range <= set_range_max ] )
} else {
# no power of 2, return factors of 2 to left and right side
valid_mics <- suppressWarnings ( as.mic ( set_range_max / ( 2 ^ c ( -3 : 3 ) ) ) )
set_range <- valid_mics [ ! is.na ( valid_mics ) ]
}
2021-03-07 13:52:39 +01:00
out <- as.mic ( sample ( set_range , size = size , replace = TRUE ) )
# 50% chance that lowest will get <= and highest will get >=
2021-03-07 21:16:45 +01:00
if ( stats :: runif ( 1 ) > 0.5 ) {
2021-03-07 13:52:39 +01:00
out [out == min ( out ) ] <- paste0 ( " <=" , out [out == min ( out ) ] )
}
2021-03-07 21:16:45 +01:00
if ( stats :: runif ( 1 ) > 0.5 ) {
2021-03-07 13:52:39 +01:00
out [out == max ( out ) ] <- paste0 ( " >=" , out [out == max ( out ) ] )
}
return ( out )
2020-12-12 23:17:29 +01:00
} else if ( type == " DISK" ) {
2021-02-25 10:33:08 +01:00
set_range <- seq ( from = as.integer ( min ( df $ breakpoint_R ) / 1.25 ) ,
to = as.integer ( max ( df $ breakpoint_S ) * 1.25 ) ,
2020-12-12 23:17:29 +01:00
by = 1 )
out <- sample ( set_range , size = size , replace = TRUE )
out [out < 6 ] <- sample ( c ( 6 : 10 ) , length ( out [out < 6 ] ) , replace = TRUE )
out [out > 50 ] <- sample ( c ( 40 : 50 ) , length ( out [out > 50 ] ) , replace = TRUE )
return ( as.disk ( out ) )
}
}