(v1.4.0.9037) random_* functions

2025-08-25 07:42:09 +02:00 · 2020-12-12 23:17:29 +01:00
parent 2edd3339db
commit c8bcecf232
27 changed files with 731 additions and 80 deletions
--- a/R/ab_property.R
+++ b/R/ab_property.R
@@ -260,7 +260,7 @@ ab_validate <- function(x, property, ...) {
  if (!all(x %in% antibiotics[, property])) {
    x <- data.frame(ab = as.ab(x, ...), stringsAsFactors = FALSE) %pm>%
      pm_left_join(antibiotics, by = "ab") %pm>%
-     pm_pull(property)
+      pm_pull(property)
  }
  if (property == "ab") {
    return(set_clean_class(x, new_class = c("ab", "character")))
--- a/R/disk.R
+++ b/R/disk.R
@@ -144,6 +144,30 @@ print.disk <- function(x, ...) {
  print(as.integer(x), quote = FALSE)
 }

+#' @method plot disk
+#' @export
+#' @importFrom graphics barplot axis
+#' @rdname plot
+plot.disk <- function(x,
+                      main = paste("Disk zones values of", deparse(substitute(x))),
+                      ylab = "Frequency",
+                      xlab = "Disk diffusion (mm)",
+                      axes = FALSE,
+                      ...) {
+  meet_criteria(main, allow_class = "character", has_length = 1)
+  meet_criteria(ylab, allow_class = "character", has_length = 1)
+  meet_criteria(xlab, allow_class = "character", has_length = 1)
+  meet_criteria(axes, allow_class = "logical", has_length = 1)
+  
+  barplot(table(x),
+          ylab = ylab,
+          xlab = xlab,
+          axes = axes,
+          main = main,
+          ...)
+  axis(2, seq(0, max(table(x))))
+}
+
 #' @method [ disk
 #' @export
 #' @noRd
--- a/R/is_new_episode.R
+++ b/R/is_new_episode.R
@@ -26,13 +26,15 @@
 #' Determine (new) episodes for patients
 #' 
 #' This function determines which items in a vector can be considered (the start of) a new episode, based on the parameter `episode_days`. This can be used to determine clinical episodes for any epidemiological analysis.
-#' @inheritSection lifecycle Experimental lifecycle
+#' @inheritSection lifecycle Stable lifecycle
 #' @param x vector of dates (class `Date` or `POSIXt`)
 #' @param episode_days length of the required episode in days, defaults to 365. Every element in the input will return `TRUE` after this number of days has passed since the last included date, independent of calendar years. Please see *Details*.
 #' @param ... arguments passed on to [as.Date()]
 #' @details 
 #' Dates are first sorted from old to new. The oldest date will mark the start of the first episode. After this date, the next date will be marked that is at least `episode_days` days later than the start of the first episode. From that second marked date on, the next date will be marked that is at least `episode_days` days later than the start of the second episode which will be the start of the third episode, and so on. Before the vector is being returned, the original order will be restored.
 #' 
+#' The [first_isolate()] function is a wrapper around the [is_new_episode()] function, but more efficient for data sets containing microorganism codes or names.
+#' 
 #' The `dplyr` package is not required for this function to work, but this function works conveniently inside `dplyr` verbs such as [`filter()`][dplyr::filter()], [`mutate()`][dplyr::mutate()] and [`summarise()`][dplyr::summarise()].
 #' @return a [logical] vector
 #' @export
--- a/R/mic.R
+++ b/R/mic.R
@@ -223,7 +223,7 @@ summary.mic <- function(object, ...) {

 #' @method plot mic
 #' @export
-#' @importFrom graphics barplot axis par
+#' @importFrom graphics barplot axis
 #' @rdname plot
 plot.mic <- function(x,
                     main = paste("MIC values of", deparse(substitute(x))),
@@ -236,13 +236,13 @@ plot.mic <- function(x,
  meet_criteria(xlab, allow_class = "character", has_length = 1)
  meet_criteria(axes, allow_class = "logical", has_length = 1)
  
-  barplot(table(droplevels.factor(x)),
+  barplot(table(as.double(x)),
          ylab = ylab,
          xlab = xlab,
          axes = axes,
          main = main,
          ...)
-  axis(2, seq(0, max(table(droplevels.factor(x)))))
+  axis(2, seq(0, max(table(as.double(x)))))
 }

 #' @method barplot mic
@@ -260,13 +260,13 @@ barplot.mic <- function(height,
  meet_criteria(xlab, allow_class = "character", has_length = 1)
  meet_criteria(axes, allow_class = "logical", has_length = 1)
  
-  barplot(table(droplevels.factor(height)),
+  barplot(table(as.double(height)),
          ylab = ylab,
          xlab = xlab,
          axes = axes,
          main = main,
          ...)
-  axis(2, seq(0, max(table(droplevels.factor(height)))))
+  axis(2, seq(0, max(table(as.double(height)))))
 }

 #' @method [ mic
--- a/R/random.R
+++ b/R/random.R
@@ -0,0 +1,133 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Analysis for R                        #
+#                                                                      #
+# SOURCE                                                               #
+# https://github.com/msberends/AMR                                     #
+#                                                                      #
+# LICENCE                                                              #
+# (c) 2018-2020 Berends MS, Luz CF et al.                              #
+# Developed at the University of Groningen, the Netherlands, in        #
+# collaboration with non-profit organisations Certe Medical            #
+# Diagnostics & Advice, and University Medical Center Groningen.       # 
+#                                                                      #
+# This R package is free software; you can freely use and distribute   #
+# it for both personal and commercial purposes under the terms of the  #
+# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
+# the Free Software Foundation.                                        #
+# We created this package for both routine data analysis and academic  #
+# research and it was publicly released in the hope that it will be    #
+# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
+#                                                                      #
+# Visit our website for the full manual and a complete tutorial about  #
+# how to conduct AMR analysis: https://msberends.github.io/AMR/        #
+# ==================================================================== #
+
+#' Random MIC values/disk zones/RSI generation
+#'
+#' These functions can be used for generating random MIC values and disk diffusion diameters, for AMR analysis practice.
+#' @inheritSection lifecycle Maturing lifecycle 
+#' @param size desired size of the returned vector
+#' @param mo any character that can be coerced to a valid microorganism code with [as.mo()]
+#' @param ab any character that can be coerced to a valid antimicrobial agent code with [as.ab()]
+#' @param prob_RSI a vector of length 3: the probabilities for R (1st value), S (2nd value) and I (3rd value)
+#' @param ... extension for future versions, not used at the moment
+#' @details The base R function [sample()] is used for generating values.
+#' 
+#' Generated values are based on the latest EUCAST guideline implemented in the [rsi_translation] data set. To create specific generated values per bug or drug, set the `mo` and/or `ab` parameter.
+#' @return class `<mic>` for [random_mic()] (see [as.mic()]) and class `<disk>` for [random_disk()] (see [as.disk()])
+#' @name random
+#' @rdname random
+#' @export
+#' @inheritSection AMR Read more on our website!
+#' @examples
+#' random_mic(100)
+#' random_disk(100)
+#' random_rsi(100)
+#' 
+#' \donttest{
+#' # make the random generation more realistic by setting a bug and/or drug:
+#' random_mic(100, "Klebsiella pneumoniae")                 # range 0.0625-64
+#' random_mic(100, "Klebsiella pneumoniae", "meropenem")    # range 0.0625-16
+#' random_mic(100, "Streptococcus pneumoniae", "meropenem") # range 0.0625-4
+#' 
+#' random_disk(100, "Klebsiella pneumoniae")                  # range 11-50
+#' random_disk(100, "Klebsiella pneumoniae", "ampicillin")    # range 6-14
+#' random_disk(100, "Streptococcus pneumoniae", "ampicillin") # range 16-22
+#' }
+random_mic <- function(size, mo = NULL, ab = NULL, ...) {
+  random_exec("MIC", size = size, mo = mo, ab = ab)
+}
+
+#' @rdname random
+#' @export
+random_disk <- function(size, mo = NULL, ab = NULL, ...) {
+  random_exec("DISK", size = size, mo = mo, ab = ab)
+}
+
+#' @rdname random
+#' @export
+random_rsi <- function(size, prob_RSI = c(0.33, 0.33, 0.33), ...) {
+  sample(as.rsi(c("R", "S", "I")), size = size, replace = TRUE, prob = prob_RSI)
+}
+
+random_exec <- function(type, size, mo = NULL, ab = NULL) {
+  df <- rsi_translation %pm>%
+    pm_filter(guideline %like% "EUCAST") %pm>%
+    pm_arrange(pm_desc(guideline)) %pm>%
+    subset(guideline == max(guideline) &
+              method == type)
+  
+  if (!is.null(mo)) {
+    mo_coerced <- as.mo(mo)
+    mo_include <- c(mo_coerced,
+                    as.mo(mo_genus(mo_coerced)),
+                    as.mo(mo_family(mo_coerced)),
+                    as.mo(mo_order(mo_coerced)))
+    df_new <- df %pm>%
+      subset(mo %in% mo_include)
+    if (nrow(df_new) > 0) {
+      df <- df_new
+    } else {
+      warning_("No rows found that match mo '", mo, "', ignoring parameter `mo`", call = FALSE)
+    }
+  }
+  
+  if (!is.null(ab)) {
+    ab_coerced <- as.ab(ab)
+    df_new <- df %pm>%
+      subset(ab %in% ab_coerced)
+    if (nrow(df_new) > 0) {
+      df <- df_new
+    } else {
+      warning_("No rows found that match ab '", ab, "', ignoring parameter `ab`", call = FALSE)
+    }
+  }
+  
+  if (type == "MIC") {
+    # all valid MIC levels
+    valid_range <- as.mic(levels(as.mic(1)))
+    set_range_max <- max(df$breakpoint_R)
+    if (log(set_range_max, 2) %% 1 == 0) {
+      # return powers of 2
+      valid_range <- unique(as.double(valid_range))
+      # add one higher MIC level to set_range_max
+      set_range_max <- 2 ^ (log(set_range_max, 2) + 1)
+      set_range <- as.mic(valid_range[log(valid_range, 2) %% 1 == 0 & valid_range <= set_range_max])
+    } else {
+      # no power of 2, return factors of 2 to left and right side
+      valid_mics <- suppressWarnings(as.mic(set_range_max / (2 ^ c(-3:3))))
+      set_range <- valid_mics[!is.na(valid_mics)]
+    }
+    return(as.mic(sample(set_range, size = size, replace = TRUE)))
+  } else if (type == "DISK") {
+    set_range <- seq(from = as.integer(min(df$breakpoint_R)),
+                     to = as.integer(max(df$breakpoint_S)),
+                     by = 1)
+    out <- sample(set_range, size = size, replace = TRUE)
+    out[out < 6] <- sample(c(6:10), length(out[out < 6]), replace = TRUE)
+    out[out > 50] <- sample(c(40:50), length(out[out > 50]), replace = TRUE)
+    return(as.disk(out))
+  }
+}
+