styled, unit test fix

2026-02-09 13:12:58 +01:00 · 2022-08-28 10:31:50 +02:00
parent 4cb1db4554
commit 4d050aef7c
147 changed files with 10897 additions and 8169 deletions
--- a/R/resistance_predict.R
+++ b/R/resistance_predict.R
@@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@@ -34,7 +34,7 @@
 #' @param year_every unit of sequence between lowest year found in the data and `year_max`
 #' @param minimum minimal amount of available isolates per year to include. Years containing less observations will be estimated by the model.
 #' @param model the statistical model of choice. This could be a generalised linear regression model with binomial distribution (i.e. using `glm(..., family = binomial)`, assuming that a period of zero resistance was followed by a period of increasing resistance leading slowly to more and more resistance. See *Details* for all valid options.
-#' @param I_as_S a [logical] to indicate whether values `"I"` should be treated as `"S"` (will otherwise be treated as `"R"`). The default, `TRUE`, follows the redefinition by EUCAST about the interpretation of I (increased exposure) in 2019, see section *Interpretation of S, I and R* below. 
+#' @param I_as_S a [logical] to indicate whether values `"I"` should be treated as `"S"` (will otherwise be treated as `"R"`). The default, `TRUE`, follows the redefinition by EUCAST about the interpretation of I (increased exposure) in 2019, see section *Interpretation of S, I and R* below.
 #' @param preserve_measurements a [logical] to indicate whether predictions of years that are actually available in the data should be overwritten by the original data. The standard errors of those years will be `NA`.
 #' @param info a [logical] to indicate whether textual analysis should be printed with the name and [summary()] of the statistical model.
 #' @param main title of the plot
@@ -55,19 +55,20 @@
 #' - `observations`, the total number of available observations in that year, i.e. \eqn{S + I + R}
 #' - `observed`, the original observed resistant percentages
 #' - `estimated`, the estimated resistant percentages, calculated by the model
-#'   
+#'
 #' Furthermore, the model itself is available as an attribute: `attributes(x)$model`, see *Examples*.
 #' @seealso The [proportion()] functions to calculate resistance
-#' 
+#'
 #' Models: [lm()] [glm()]
 #' @rdname resistance_predict
 #' @export
 #' @importFrom stats predict glm lm
 #' @examples
-#' x <- resistance_predict(example_isolates, 
-#'                         col_ab = "AMX",
-#'                         year_min = 2010,
-#'                         model = "binomial")
+#' x <- resistance_predict(example_isolates,
+#'   col_ab = "AMX",
+#'   year_min = 2010,
+#'   model = "binomial"
+#' )
 #' plot(x)
 #' \donttest{
 #' if (require("ggplot2")) {
@@ -89,14 +90,15 @@
 #'
 #' # create nice plots with ggplot2 yourself
 #' if (require("dplyr") && require("ggplot2")) {
-#' 
 #'   data <- example_isolates %>%
 #'     filter(mo == as.mo("E. coli")) %>%
-#'     resistance_predict(col_ab = "AMX",
-#'                        col_date = "date",
-#'                        model = "binomial",
-#'                        info = FALSE,
-#'                        minimum = 15)
+#'     resistance_predict(
+#'       col_ab = "AMX",
+#'       col_date = "date",
+#'       model = "binomial",
+#'       info = FALSE,
+#'       minimum = 15
+#'     )
 #'   head(data)
 #'   autoplot(data)
 #' }
@@ -124,12 +126,12 @@ resistance_predict <- function(x,
  meet_criteria(I_as_S, allow_class = "logical", has_length = 1)
  meet_criteria(preserve_measurements, allow_class = "logical", has_length = 1)
  meet_criteria(info, allow_class = "logical", has_length = 1)
-  
+
  stop_if(is.null(model), 'choose a regression model with the `model` argument, e.g. resistance_predict(..., model = "binomial")')
-  
+
  x.bak <- x
  x <- as.data.frame(x, stringsAsFactors = FALSE)
-  
+
  dots <- unlist(list(...))
  if (length(dots) != 0) {
    # backwards compatibility with old arguments
@@ -141,15 +143,17 @@ resistance_predict <- function(x,
      warning_("in `resistance_predict()`: I_as_R is deprecated - use I_as_S instead.")
    }
  }
-  
+
  # -- date
  if (is.null(col_date)) {
    col_date <- search_type_in_df(x = x, type = "date")
    stop_if(is.null(col_date), "`col_date` must be set")
  }
-  stop_ifnot(col_date %in% colnames(x),
-             "column '", col_date, "' not found")
-  
+  stop_ifnot(
+    col_date %in% colnames(x),
+    "column '", col_date, "' not found"
+  )
+
  year <- function(x) {
    # don't depend on lubridate or so, would be overkill for only this function
    if (all(grepl("^[0-9]{4}$", x))) {
@@ -158,7 +162,7 @@ resistance_predict <- function(x,
      as.integer(format(as.Date(x), "%Y"))
    }
  }
-  
+
  df <- x
  df[, col_ab] <- droplevels(as.rsi(df[, col_ab, drop = TRUE]))
  if (I_as_S == TRUE) {
@@ -169,22 +173,23 @@ resistance_predict <- function(x,
    df[, col_ab] <- gsub("I", "R", df[, col_ab, drop = TRUE])
  }
  df[, col_ab] <- ifelse(is.na(df[, col_ab, drop = TRUE]), 0, df[, col_ab, drop = TRUE])
-  
+
  # remove rows with NAs
  df <- subset(df, !is.na(df[, col_ab, drop = TRUE]))
  df$year <- year(df[, col_date, drop = TRUE])
  df <- as.data.frame(rbind(table(df[, c("year", col_ab), drop = FALSE])),
-                      stringsAsFactors = FALSE)
+    stringsAsFactors = FALSE
+  )
  df$year <- as.integer(rownames(df))
  rownames(df) <- NULL
-  
+
  df <- subset(df, sum(df$R + df$S, na.rm = TRUE) >= minimum)
  # nolint start
  df_matrix <- as.matrix(df[, c("R", "S"), drop = FALSE])
  # nolint end
-  
+
  stop_if(NROW(df) == 0, "there are no observations")
-  
+
  year_lowest <- min(df$year)
  if (is.null(year_min)) {
    year_min <- year_lowest
@@ -194,9 +199,9 @@ resistance_predict <- function(x,
  if (is.null(year_max)) {
    year_max <- year(Sys.Date()) + 10
  }
-  
+
  years <- list(year = seq(from = year_min, to = year_max, by = year_every))
-  
+
  if (model %in% c("binomial", "binom", "logit")) {
    model <- "binomial"
    model_lm <- with(df, glm(df_matrix ~ year, family = binomial))
@@ -205,11 +210,10 @@ resistance_predict <- function(x,
      cat("\n------------------------------------------------------------\n")
      print(summary(model_lm))
    }
-    
+
    predictmodel <- predict(model_lm, newdata = years, type = "response", se.fit = TRUE)
    prediction <- predictmodel$fit
    se <- predictmodel$se.fit
-    
  } else if (model %in% c("loglin", "poisson")) {
    model <- "poisson"
    model_lm <- with(df, glm(R ~ year, family = poisson))
@@ -218,11 +222,10 @@ resistance_predict <- function(x,
      cat("\n--------------------------------------------------------------\n")
      print(summary(model_lm))
    }
-    
+
    predictmodel <- predict(model_lm, newdata = years, type = "response", se.fit = TRUE)
    prediction <- predictmodel$fit
    se <- predictmodel$se.fit
-    
  } else if (model %in% c("lin", "linear")) {
    model <- "linear"
    model_lm <- with(df, lm((R / (R + S)) ~ year))
@@ -231,59 +234,61 @@ resistance_predict <- function(x,
      cat("\n-----------------------\n")
      print(summary(model_lm))
    }
-    
+
    predictmodel <- predict(model_lm, newdata = years, se.fit = TRUE)
    prediction <- predictmodel$fit
    se <- predictmodel$se.fit
-    
  } else {
    stop("no valid model selected. See ?resistance_predict.")
  }
-  
+
  # prepare the output dataframe
-  df_prediction <- data.frame(year = unlist(years),
-                              value = prediction,
-                              se_min = prediction - se,
-                              se_max = prediction + se,
-                              stringsAsFactors = FALSE)
-  
+  df_prediction <- data.frame(
+    year = unlist(years),
+    value = prediction,
+    se_min = prediction - se,
+    se_max = prediction + se,
+    stringsAsFactors = FALSE
+  )
+
  if (model == "poisson") {
    df_prediction$value <- as.integer(format(df_prediction$value, scientific = FALSE))
    df_prediction$se_min <- as.integer(df_prediction$se_min)
    df_prediction$se_max <- as.integer(df_prediction$se_max)
-    
  } else {
    # se_max not above 1
    df_prediction$se_max <- ifelse(df_prediction$se_max > 1, 1, df_prediction$se_max)
  }
  # se_min not below 0
  df_prediction$se_min <- ifelse(df_prediction$se_min < 0, 0, df_prediction$se_min)
-  
-  df_observations <- data.frame(year = df$year,
-                                observations = df$R + df$S,
-                                observed = df$R / (df$R + df$S),
-                                stringsAsFactors = FALSE)
+
+  df_observations <- data.frame(
+    year = df$year,
+    observations = df$R + df$S,
+    observed = df$R / (df$R + df$S),
+    stringsAsFactors = FALSE
+  )
  df_prediction <- df_prediction %pm>%
    pm_left_join(df_observations, by = "year")
  df_prediction$estimated <- df_prediction$value
-  
+
  if (preserve_measurements == TRUE) {
    # replace estimated data by observed data
    df_prediction$value <- ifelse(!is.na(df_prediction$observed), df_prediction$observed, df_prediction$value)
    df_prediction$se_min <- ifelse(!is.na(df_prediction$observed), NA, df_prediction$se_min)
    df_prediction$se_max <- ifelse(!is.na(df_prediction$observed), NA, df_prediction$se_max)
  }
-  
+
  df_prediction$value <- ifelse(df_prediction$value > 1, 1, ifelse(df_prediction$value < 0, 0, df_prediction$value))
  df_prediction <- df_prediction[order(df_prediction$year), , drop = FALSE]
-  
+
  out <- as_original_data_class(df_prediction, class(x.bak))
  structure(out,
-            class = c("resistance_predict", class(out)),
-            I_as_S = I_as_S,
-            model_title = model,
-            model = model_lm,
-            ab = col_ab
+    class = c("resistance_predict", class(out)),
+    I_as_S = I_as_S,
+    model_title = model,
+    model = model_lm,
+    ab = col_ab
  )
 }

@@ -298,40 +303,48 @@ rsi_predict <- resistance_predict
 plot.resistance_predict <- function(x, main = paste("Resistance Prediction of", x_name), ...) {
  x_name <- paste0(ab_name(attributes(x)$ab), " (", attributes(x)$ab, ")")
  meet_criteria(main, allow_class = "character", has_length = 1)
-  
+
  if (attributes(x)$I_as_S == TRUE) {
    ylab <- "%R"
  } else {
    ylab <- "%IR"
  }
-  
-  plot(x = x$year,
-       y = x$value,
-       ylim = c(0, 1),
-       yaxt = "n", # no y labels
-       pch = 19, # closed dots
-       ylab = paste0("Percentage (", ylab, ")"),
-       xlab = "Year",
-       main = main,
-       sub = paste0("(n = ", sum(x$observations, na.rm = TRUE),
-                    ", model: ", attributes(x)$model_title, ")"),
-       cex.sub = 0.75)
-  
-  
+
+  plot(
+    x = x$year,
+    y = x$value,
+    ylim = c(0, 1),
+    yaxt = "n", # no y labels
+    pch = 19, # closed dots
+    ylab = paste0("Percentage (", ylab, ")"),
+    xlab = "Year",
+    main = main,
+    sub = paste0(
+      "(n = ", sum(x$observations, na.rm = TRUE),
+      ", model: ", attributes(x)$model_title, ")"
+    ),
+    cex.sub = 0.75
+  )
+
+
  axis(side = 2, at = seq(0, 1, 0.1), labels = paste0(0:10 * 10, "%"))
-  
+
  # hack for error bars: https://stackoverflow.com/a/22037078/4575331
-  arrows(x0 = x$year,
-         y0 = x$se_min,
-         x1 = x$year,
-         y1 = x$se_max,
-         length = 0.05, angle = 90, code = 3, lwd = 1.5)
-  
+  arrows(
+    x0 = x$year,
+    y0 = x$se_min,
+    x1 = x$year,
+    y1 = x$se_max,
+    length = 0.05, angle = 90, code = 3, lwd = 1.5
+  )
+
  # overlay grey points for prediction
-  points(x = subset(x, is.na(observations))$year,
-         y = subset(x, is.na(observations))$value,
-         pch = 19,
-         col = "grey40")
+  points(
+    x = subset(x, is.na(observations))$year,
+    y = subset(x, is.na(observations))$value,
+    pch = 19,
+    col = "grey40"
+  )
 }

 #' @rdname resistance_predict
@@ -343,27 +356,35 @@ ggplot_rsi_predict <- function(x,
  x_name <- paste0(ab_name(attributes(x)$ab), " (", attributes(x)$ab, ")")
  meet_criteria(main, allow_class = "character", has_length = 1)
  meet_criteria(ribbon, allow_class = "logical", has_length = 1)
-  
+
  stop_ifnot_installed("ggplot2")
  stop_ifnot(inherits(x, "resistance_predict"), "`x` must be a resistance prediction model created with resistance_predict()")
-  
+
  if (attributes(x)$I_as_S == TRUE) {
    ylab <- "%R"
  } else {
    ylab <- "%IR"
  }
-  
-  p <- ggplot2::ggplot(as.data.frame(x, stringsAsFactors = FALSE),
-                       ggplot2::aes(x = year, y = value)) +
-    ggplot2::geom_point(data = subset(x, !is.na(observations)),
-                        size = 2) +
+
+  p <- ggplot2::ggplot(
+    as.data.frame(x, stringsAsFactors = FALSE),
+    ggplot2::aes(x = year, y = value)
+  ) +
+    ggplot2::geom_point(
+      data = subset(x, !is.na(observations)),
+      size = 2
+    ) +
    scale_y_percent(limits = c(0, 1)) +
-    ggplot2::labs(title = main,
-                  y = paste0("Percentage (", ylab, ")"),
-                  x = "Year",
-                  caption = paste0("(n = ", sum(x$observations, na.rm = TRUE),
-                                   ", model: ", attributes(x)$model_title, ")"))
-  
+    ggplot2::labs(
+      title = main,
+      y = paste0("Percentage (", ylab, ")"),
+      x = "Year",
+      caption = paste0(
+        "(n = ", sum(x$observations, na.rm = TRUE),
+        ", model: ", attributes(x)$model_title, ")"
+      )
+    )
+
  if (ribbon == TRUE) {
    p <- p + ggplot2::geom_ribbon(ggplot2::aes(ymin = se_min, ymax = se_max), alpha = 0.25)
  } else {
@@ -371,9 +392,11 @@ ggplot_rsi_predict <- function(x,
  }
  p <- p +
    # overlay grey points for prediction
-    ggplot2::geom_point(data = subset(x, is.na(observations)),
-                        size = 2,
-                        colour = "grey40")
+    ggplot2::geom_point(
+      data = subset(x, is.na(observations)),
+      size = 2,
+      colour = "grey40"
+    )
  p
 }