new SDD and N for as.sir()

2025-07-12 01:02:41 +02:00 · 2024-05-20 15:27:04 +02:00
parent b68f47d985
commit 08a27922a8
28 changed files with 225 additions and 172 deletions
--- a/4
+++ b/4
@ -1,6 +1,6 @@
 Package: AMR
-Version: 2.1.1.9029
-Date: 2024-05-12
+Version: 2.1.1.9030
+Date: 2024-05-20
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
  data analysis and to work with microbial and antimicrobial properties by
--- a/NEWS.md
+++ b/NEWS.md
@ -1,4 +1,4 @@
-# AMR 2.1.1.9029
+# AMR 2.1.1.9030

 *(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support!)*

@ -15,6 +15,7 @@ This package now supports not only tools for AMR data analysis in clinical setti
  * The `antibiotics` data set contains all veterinary antibiotics, such as pradofloxacin and enrofloxacin. All WHOCC codes for veterinary use have been added as well.
  * `ab_atc()` now supports ATC codes of veterinary antibiotics (that all start with "Q")
  * `ab_url()` now supports retrieving the WHOCC url of their ATCvet pages
+* `as.sir()` now returns additional factor levels "N" for non-interpretable and "SDD" for susceptible dose-dependent. Users can now set their own criteria (using regular expressions) as to what should be considered S, I, R, SDD, and N.
 * The function group `scale_*_mic()`, namely: `scale_x_mic()`, `scale_y_mic()`, `scale_colour_mic()` and `scale_fill_mic()`. They are advanced ggplot2 extensions to allow easy plotting of MIC values. They allow for manual range definition and plotting missing intermediate log2 levels.
 * Function `limit_mic_range()`, which allows to limit MIC values to a manually set range. This is the powerhouse behind the `scale_*_mic()` functions, but it can be used by users directly to e.g. compare equality in MIC distributions by rescaling them to the same range first.
 * Function `mo_group_members()` to retrieve the member microorganisms. For example, `mo_group_members("Strep group C")` returns a vector of all microorganisms that are in that group.
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@ -743,6 +743,10 @@ vector_or <- function(v, quotes = TRUE, reverse = FALSE, sort = TRUE, initial_ca
    # class 'sir' should be sorted like this
    v <- c("S", "I", "R")
  }
+  if (identical(v, c("I", "N", "R", "S", "SDD"))) {
+    # class 'sir' should be sorted like this
+    v <- c("S", "SDD", "I", "R", "N")
+  }
  # oxford comma
  if (last_sep %in% c(" or ", " and ") && length(v) > 2) {
    last_sep <- paste0(",", last_sep)
--- a/R/ab.R
+++ b/R/ab.R
@ -140,10 +140,15 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) {
        abnames <- abnames[!abnames %in% c("clavulanic acid", "avibactam", "tazobactam", "vaborbactam", "monobactam")]
      }
      if (length(abnames) > 1) {
-        message_(
-          "More than one result was found for item ", index, ": ",
-          vector_and(abnames, quotes = FALSE)
-        )
+        if (toupper(paste(abnames, collapse = " ")) %in% AMR_env$AB_lookup$generalised_name) {
+          # if the found values combined is a valid AB, return that
+          found <- AMR_env$AB_lookup$ab[match(toupper(paste(abnames, collapse = " ")), AMR_env$AB_lookup$generalised_name)][1]
+        } else {
+          message_(
+            "More than one result was found for item ", index, ": ",
+            vector_and(abnames, quotes = FALSE)
+          )
+        }
      }
    }
    found[1L]
--- a/R/ab_selectors.R
+++ b/R/ab_selectors.R
@ -676,10 +676,10 @@ c.ab_selector <- function(...) {

 all_any_ab_selector <- function(type, ..., na.rm = TRUE) {
  cols_ab <- c(...)
-  result <- cols_ab[toupper(cols_ab) %in% c("S", "I", "R")]
+  result <- cols_ab[toupper(cols_ab) %in% c("S", "SDD", "I", "R", "N")]
  if (length(result) == 0) {
    message_("Filtering ", type, " of columns ", vector_and(font_bold(cols_ab, collapse = NULL), quotes = "'"), ' to contain value "S", "I" or "R"')
-    result <- c("S", "I", "R")
+    result <- c("S", "SDD", "I", "R", "N")
  }
  cols_ab <- cols_ab[!cols_ab %in% result]
  df <- get_current_data(arg_name = NA, call = -3)
@ -788,7 +788,7 @@ any.ab_selector_any_all <- function(..., na.rm = FALSE) {
    }
  }
  # this is `!=`, so turn around the values
-  sir <- c("S", "I", "R")
+  sir <- c("S", "SDD", "I", "R", "N")
  e2 <- sir[sir != e2]
  structure(all_any_ab_selector(type = type, e1, e2),
    class = c("ab_selector_any_all", "logical")
--- a/R/antibiogram.R
+++ b/R/antibiogram.R
@ -348,11 +348,11 @@ antibiogram <- function(x,
      } else {
        # determine whether this new column should contain S, I, R, or NA
        if (isTRUE(combine_SI)) {
-          S_values <- c("S", "I")
+          S_values <- c("S", "SDD", "I")
        } else {
          S_values <- "S"
        }
-        other_values <- setdiff(c("S", "I", "R"), S_values)
+        other_values <- setdiff(c("S", "SDD", "I", "R", "N"), S_values)
        x_transposed <- as.list(as.data.frame(t(x[, abx, drop = FALSE]), stringsAsFactors = FALSE))
        if (isTRUE(only_all_tested)) {
          x[new_colname] <- as.sir(vapply(FUN.VALUE = character(1), x_transposed, function(x) ifelse(anyNA(x), NA_character_, ifelse(any(x %in% S_values), "S", "R")), USE.NAMES = FALSE))
--- a/R/bug_drug_combinations.R
+++ b/R/bug_drug_combinations.R
@ -42,7 +42,7 @@
 #' @details The function [format()] calculates the resistance per bug-drug combination and returns a table ready for reporting/publishing. Use `combine_SI = TRUE` (default) to test R vs. S+I and `combine_SI = FALSE` to test R+I vs. S. This table can also directly be used in R Markdown / Quarto without the need for e.g. [knitr::kable()].
 #' @export
 #' @rdname bug_drug_combinations
-#' @return The function [bug_drug_combinations()] returns a [data.frame] with columns "mo", "ab", "S", "I", "R" and "total".
+#' @return The function [bug_drug_combinations()] returns a [data.frame] with columns "mo", "ab", "S", "SDD", "I", "R", and "total".
 #' @examples
 #' # example_isolates is a data set available in the AMR package.
 #' # run ?example_isolates for more info.
@ -105,6 +105,7 @@ bug_drug_combinations <- function(x,
      mo = character(0),
      ab = character(0),
      S = integer(0),
+      SDD = integer(0),
      I = integer(0),
      R = integer(0),
      total = integer(0),
@ -122,13 +123,14 @@ bug_drug_combinations <- function(x,
      # turn and merge everything
      pivot <- lapply(x_mo_filter, function(x) {
        m <- as.matrix(table(x))
-        data.frame(S = m["S", ], I = m["I", ], R = m["R", ], stringsAsFactors = FALSE)
+        data.frame(S = m["S", ], SDD = m["SDD", ], I = m["I", ], R = m["R", ], stringsAsFactors = FALSE)
      })
      merged <- do.call(rbind_AMR, pivot)
      out_group <- data.frame(
        mo = rep(unique_mo[i], NROW(merged)),
        ab = rownames(merged),
        S = merged$S,
+        SDD = merged$SSD,
        I = merged$I,
        R = merged$R,
        total = merged$S + merged$I + merged$R,
@ -203,10 +205,12 @@ format.bug_drug_combinations <- function(x,
      mo = gsub("(.*)%%(.*)", "\\1", names(idx)),
      ab = gsub("(.*)%%(.*)", "\\2", names(idx)),
      S = vapply(FUN.VALUE = double(1), idx, function(i) sum(x$S[i], na.rm = TRUE)),
+      SDD = vapply(FUN.VALUE = double(1), idx, function(i) sum(x$SDD[i], na.rm = TRUE)),
      I = vapply(FUN.VALUE = double(1), idx, function(i) sum(x$I[i], na.rm = TRUE)),
      R = vapply(FUN.VALUE = double(1), idx, function(i) sum(x$R[i], na.rm = TRUE)),
      total = vapply(FUN.VALUE = double(1), idx, function(i) {
        sum(x$S[i], na.rm = TRUE) +
+          sum(x$SDD[i], na.rm = TRUE) +
          sum(x$I[i], na.rm = TRUE) +
          sum(x$R[i], na.rm = TRUE)
      }),
@ -223,7 +227,7 @@ format.bug_drug_combinations <- function(x,
  if (combine_SI == TRUE) {
    x$isolates <- x$R
  } else {
-    x$isolates <- x$R + x$I
+    x$isolates <- x$R + x$I + x$SDD
  }

  give_ab_name <- function(ab, format, language) {
--- a/R/count.R
+++ b/R/count.R
@ -143,66 +143,11 @@ count_susceptible <- function(..., only_all_tested = FALSE) {
  )
 }

-#' @rdname count
-#' @export
-count_R <- function(..., only_all_tested = FALSE) {
-  tryCatch(
-    sir_calc(...,
-      ab_result = "R",
-      only_all_tested = only_all_tested,
-      only_count = TRUE
-    ),
-    error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5)
-  )
-}
-
-#' @rdname count
-#' @export
-count_IR <- function(..., only_all_tested = FALSE) {
-  if (message_not_thrown_before("count_IR", entire_session = TRUE)) {
-    message_("Using `count_IR()` is discouraged; use `count_resistant()` instead to not consider \"I\" being resistant. This note will be shown once for this session.", as_note = FALSE)
-  }
-  tryCatch(
-    sir_calc(...,
-      ab_result = c("I", "R"),
-      only_all_tested = only_all_tested,
-      only_count = TRUE
-    ),
-    error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5)
-  )
-}
-
-#' @rdname count
-#' @export
-count_I <- function(..., only_all_tested = FALSE) {
-  tryCatch(
-    sir_calc(...,
-      ab_result = "I",
-      only_all_tested = only_all_tested,
-      only_count = TRUE
-    ),
-    error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5)
-  )
-}
-
-#' @rdname count
-#' @export
-count_SI <- function(..., only_all_tested = FALSE) {
-  tryCatch(
-    sir_calc(...,
-      ab_result = c("S", "I"),
-      only_all_tested = only_all_tested,
-      only_count = TRUE
-    ),
-    error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5)
-  )
-}
-
 #' @rdname count
 #' @export
 count_S <- function(..., only_all_tested = FALSE) {
  if (message_not_thrown_before("count_S", entire_session = TRUE)) {
-    message_("Using `count_S()` is discouraged; use `count_susceptible()` instead to also consider \"I\" being susceptible. This note will be shown once for this session.", as_note = FALSE)
+    message_("Using `count_S()` is discouraged; use `count_susceptible()` instead to also consider \"I\" and \"SDD\" being susceptible. This note will be shown once for this session.", as_note = FALSE)
  }
  tryCatch(
    sir_calc(...,
@ -214,12 +159,73 @@ count_S <- function(..., only_all_tested = FALSE) {
  )
 }

+#' @rdname count
+#' @export
+count_SI <- function(..., only_all_tested = FALSE) {
+  if (message_not_thrown_before("count_SI", entire_session = TRUE)) {
+    message_("Note that `count_SI()` will also count dose-dependent susceptibility, 'SDD'. This note will be shown once for this session.", as_note = FALSE)
+  }
+  tryCatch(
+    sir_calc(...,
+             ab_result = c("S", "SDD", "I"),
+             only_all_tested = only_all_tested,
+             only_count = TRUE
+    ),
+    error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5)
+  )
+}
+
+#' @rdname count
+#' @export
+count_I <- function(..., only_all_tested = FALSE) {
+  if (message_not_thrown_before("count_I", entire_session = TRUE)) {
+    message_("Note that `count_I()` will also count dose-dependent susceptibility, 'SDD'. This note will be shown once for this session.", as_note = FALSE)
+  }
+  tryCatch(
+    sir_calc(...,
+             ab_result = c("I", "SDD"),
+             only_all_tested = only_all_tested,
+             only_count = TRUE
+    ),
+    error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5)
+  )
+}
+
+#' @rdname count
+#' @export
+count_IR <- function(..., only_all_tested = FALSE) {
+  if (message_not_thrown_before("count_IR", entire_session = TRUE)) {
+    message_("Using `count_IR()` is discouraged; use `count_resistant()` instead to not consider \"I\" and \"SDD\" being resistant. This note will be shown once for this session.", as_note = FALSE)
+  }
+  tryCatch(
+    sir_calc(...,
+             ab_result = c("I", "SDD", "R"),
+             only_all_tested = only_all_tested,
+             only_count = TRUE
+    ),
+    error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5)
+  )
+}
+
+#' @rdname count
+#' @export
+count_R <- function(..., only_all_tested = FALSE) {
+  tryCatch(
+    sir_calc(...,
+             ab_result = "R",
+             only_all_tested = only_all_tested,
+             only_count = TRUE
+    ),
+    error = function(e) stop_(gsub("in sir_calc(): ", "", e$message, fixed = TRUE), call = -5)
+  )
+}
+
 #' @rdname count
 #' @export
 count_all <- function(..., only_all_tested = FALSE) {
  tryCatch(
    sir_calc(...,
-      ab_result = c("S", "I", "R"),
+      ab_result = c("S", "SDD", "I", "R", "N"),
      only_all_tested = only_all_tested,
      only_count = TRUE
    ),
--- a/R/custom_eucast_rules.R
+++ b/R/custom_eucast_rules.R
@ -181,8 +181,8 @@ custom_eucast_rules <- function(...) {
    result_value <- as.character(result)[[3]]
    result_value[result_value == "NA"] <- NA
    stop_ifnot(
-      result_value %in% c("S", "I", "R", NA),
-      "the resulting value of rule ", i, " must be either \"S\", \"I\", \"R\" or NA"
+      result_value %in% c("S", "SDD", "I", "R", "N", NA),
+      "the resulting value of rule ", i, " must be either \"S\", \"SDD\", \"I\", \"R\", \"N\" or NA"
    )
    result_value <- as.sir(result_value)

--- a/R/data.R
+++ b/R/data.R
@ -298,7 +298,7 @@
 #' ### Download
 #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). They allow for machine reading EUCAST and CLSI guidelines, which is almost impossible with the MS Excel and PDF files distributed by EUCAST and CLSI, though initiatives have started to overcome these burdens.
 #' 
-#' **NOTE:** this `AMR` package (and the WHONET software as well) contains internal methods to apply the guidelines, which is rather complex. For example, some breakpoints must be applied on certain species groups (which are in case of this package available through the [microorganisms.groups] data set). It is important that this is considered when using the breakpoints for own use.
+#' **NOTE:** this `AMR` package (and the WHONET software as well) contains rather complex internal methods to apply the guidelines. For example, some breakpoints must be applied on certain species groups (which are in case of this package available through the [microorganisms.groups] data set). It is important that this is considered when using the breakpoints for own use.
 #' @seealso [intrinsic_resistant]
 #' @examples
 #' clinical_breakpoints
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@ -236,7 +236,7 @@ first_isolate <- function(x = NULL,
    FUN.VALUE = logical(1),
    X = x,
    # check only first 10,000 rows
-    FUN = function(x) any(as.character(x[1:10000]) %in% c("S", "I", "R"), na.rm = TRUE),
+    FUN = function(x) any(as.character(x[1:10000]) %in% c("S", "SDD", "I", "R", "N"), na.rm = TRUE),
    USE.NAMES = FALSE
  ))
  if (method == "phenotype-based" && !any_col_contains_sir) {
--- a/R/key_antimicrobials.R
+++ b/R/key_antimicrobials.R
@ -282,6 +282,8 @@ generate_antimcrobials_string <- function(df) {
          as.list(df),
          function(x) {
            x <- toupper(as.character(x))
+            x[x == "SDD"] <- "I"
+            # ignore "N" here, no use for determining first isolates
            x[!x %in% c("S", "I", "R")] <- "."
            paste(x)
          }
@ -312,7 +314,7 @@ antimicrobials_equal <- function(y,
    val <- strsplit(val, "", fixed = TRUE)[[1L]]
    val.int <- rep(NA_real_, length(val))
    val.int[val == "S"] <- 1
-    val.int[val == "I"] <- 2
+    val.int[val %in% c("I", "SDD")] <- 2
    val.int[val == "R"] <- 3
    val.int
  }
--- a/R/mdro.R
+++ b/R/mdro.R
@ -732,7 +732,7 @@ mdro <- function(x = NULL,
          sum(vapply(
            FUN.VALUE = logical(1),
            group_tbl,
-            function(group) any(unlist(x[row, group[!is.na(group)], drop = TRUE]) %in% c("S", "I", "R"))
+            function(group) any(unlist(x[row, group[!is.na(group)], drop = TRUE]) %in% c("S", "SDD", "I", "R"))
          ))
        }
      )
--- a/R/plot.R
+++ b/R/plot.R
@ -363,6 +363,7 @@ autoplot.mic <- function(object,
  if (any(colours_SIR %in% cols_sub$cols)) {
    vals <- c(
      "(S) Susceptible" = colours_SIR[1],
+      "(SDD) Susceptible dose-dependent" = colours_SIR[2],
      "(I) Susceptible, incr. exp." = colours_SIR[2],
      "(I) Intermediate" = colours_SIR[2],
      "(R) Resistant" = colours_SIR[3]
@ -595,6 +596,7 @@ autoplot.disk <- function(object,
  if (any(colours_SIR %in% cols_sub$cols)) {
    vals <- c(
      "(S) Susceptible" = colours_SIR[1],
+      "(SDD) Susceptible dose-dependent" = colours_SIR[2],
      "(I) Susceptible, incr. exp." = colours_SIR[2],
      "(I) Intermediate" = colours_SIR[2],
      "(R) Resistant" = colours_SIR[3]
@ -648,14 +650,21 @@ plot.sir <- function(x,
  if (!"S" %in% data$x) {
    data <- rbind_AMR(data, data.frame(x = "S", n = 0, s = 0, stringsAsFactors = FALSE))
  }
+  if (!"SDD" %in% data$x) {
+    data <- rbind_AMR(data, data.frame(x = "SDD", n = 0, s = 0, stringsAsFactors = FALSE))
+  }
  if (!"I" %in% data$x) {
    data <- rbind_AMR(data, data.frame(x = "I", n = 0, s = 0, stringsAsFactors = FALSE))
  }
  if (!"R" %in% data$x) {
    data <- rbind_AMR(data, data.frame(x = "R", n = 0, s = 0, stringsAsFactors = FALSE))
  }
-
-  data$x <- factor(data$x, levels = c("S", "I", "R"), ordered = TRUE)
+  if (!"N" %in% data$x) {
+    data <- rbind_AMR(data, data.frame(x = "N", n = 0, s = 0, stringsAsFactors = FALSE))
+  }
+  
+  data <- data[!(data$n == 0 & data$x %in% c("SDD", "I", "N")), , drop = FALSE]
+  data$x <- factor(data$x, levels = intersect(unique(data$x), c("S", "SDD", "I", "R", "N")), ordered = TRUE)

  ymax <- pm_if_else(max(data$s) > 95, 105, 100)

@ -704,10 +713,15 @@ barplot.sir <- function(height,
  if (length(colours_SIR) == 1) {
    colours_SIR <- rep(colours_SIR, 3)
  }
+  # add SSD and N to colours
+  colours_SIR <- c(colours_SIR[1:2], colours_SIR[2], colours_SIR[3], "#888888")
  main <- gsub(" +", " ", paste0(main, collapse = " "))

  x <- table(height)
-  x <- x[c(1, 2, 3)]
+  # remove missing I, SSD, and N
+  colours_SIR <- colours_SIR[!(names(x) %in% c("SDD", "I", "N") & x == 0)]
+  x <- x[!(names(x) %in% c("SDD", "I", "N") & x == 0)]
+  # plot it
  barplot(x,
    col = colours_SIR,
    xlab = xlab,
@ -753,8 +767,10 @@ autoplot.sir <- function(object,
    ggplot2::scale_fill_manual(
      values = c(
        "S" = colours_SIR[1],
+        "SDD" = colours_SIR[2],
        "I" = colours_SIR[2],
-        "R" = colours_SIR[3]
+        "R" = colours_SIR[3],
+        "N" = "#888888"
      ),
      limits = force
    ) +
@ -882,8 +898,10 @@ plot_colours_subtitle_guideline <- function(x, mo, ab, guideline, colours_SIR, f
    cols <- character(length = length(sir))
    cols[is.na(sir)] <- "#BEBEBE"
    cols[sir == "S"] <- colours_SIR[1]
+    cols[sir == "SDD"] <- colours_SIR[2]
    cols[sir == "I"] <- colours_SIR[2]
    cols[sir == "R"] <- colours_SIR[3]
+    cols[sir == "N"] <- "#888888"
    sub <- bquote(.(abname) ~ "-" ~ italic(.(moname)) ~ .(guideline_txt))
  } else {
    cols <- "#BEBEBE"
--- a/R/proportion.R
+++ b/R/proportion.R
@ -48,7 +48,7 @@
 #' @details
 #' **Remember that you should filter your data to let it contain only first isolates!** This is needed to exclude duplicates and to reduce selection bias. Use [first_isolate()] to determine them in your data set with one of the four available algorithms.
 #' 
-#' The function [resistance()] is equal to the function [proportion_R()]. The function [susceptibility()] is equal to the function [proportion_SI()].
+#' The function [resistance()] is equal to the function [proportion_R()]. The function [susceptibility()] is equal to the function [proportion_SI()]. Since AMR v3.0, [proportion_SI()] and [proportion_I()] include dose-dependent susceptibility ('SDD').
 #'
 #' Use [sir_confidence_interval()] to calculate the confidence interval, which relies on [binom.test()], i.e., the Clopper-Pearson method. This function returns a vector of length 2 at default for antimicrobial *resistance*. Change the `side` argument to "left"/"min" or "right"/"max" to return a single value, and change the `ab_result` argument to e.g. `c("S", "I")` to test for antimicrobial *susceptibility*, see Examples.
 #'
@ -247,7 +247,7 @@ susceptibility <- function(...,
                           only_all_tested = FALSE) {
  tryCatch(
    sir_calc(...,
-      ab_result = c("S", "I"),
+      ab_result = c("S", "SDD", "I"),
      minimum = minimum,
      as_percent = as_percent,
      only_all_tested = only_all_tested,
@ -267,7 +267,7 @@ sir_confidence_interval <- function(...,
                                    confidence_level = 0.95,
                                    side = "both",
                                    collapse = FALSE) {
-  meet_criteria(ab_result, allow_class = c("character", "sir"), has_length = c(1, 2, 3), is_in = c("S", "I", "R"))
+  meet_criteria(ab_result, allow_class = c("character", "sir"), has_length = c(1:5), is_in = c("S", "SDD", "I", "R", "N"))
  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
  meet_criteria(as_percent, allow_class = "logical", has_length = 1)
  meet_criteria(only_all_tested, allow_class = "logical", has_length = 1)
@ -285,7 +285,7 @@ sir_confidence_interval <- function(...,
  )
  n <- tryCatch(
    sir_calc(...,
-      ab_result = c("S", "I", "R"),
+      ab_result = c("S", "SDD", "I", "R", "N"),
      only_all_tested = only_all_tested,
      only_count = TRUE
    ),
@ -351,9 +351,12 @@ proportion_IR <- function(...,
                          minimum = 30,
                          as_percent = FALSE,
                          only_all_tested = FALSE) {
+  if (message_not_thrown_before("proportion_IR", entire_session = TRUE)) {
+    message_("Note that `proportion_IR()` will also include dose-dependent susceptibility, 'SDD'. This note will be shown once for this session.", as_note = FALSE)
+  }
  tryCatch(
    sir_calc(...,
-      ab_result = c("I", "R"),
+      ab_result = c("I", "SDD", "R"),
      minimum = minimum,
      as_percent = as_percent,
      only_all_tested = only_all_tested,
@ -369,9 +372,12 @@ proportion_I <- function(...,
                         minimum = 30,
                         as_percent = FALSE,
                         only_all_tested = FALSE) {
+  if (message_not_thrown_before("proportion_I", entire_session = TRUE)) {
+    message_("Note that `proportion_I()` will also include dose-dependent susceptibility, 'SDD'. This note will be shown once for this session.", as_note = FALSE)
+  }
  tryCatch(
    sir_calc(...,
-      ab_result = "I",
+      ab_result = c("I", "SDD"),
      minimum = minimum,
      as_percent = as_percent,
      only_all_tested = only_all_tested,
@ -387,9 +393,12 @@ proportion_SI <- function(...,
                          minimum = 30,
                          as_percent = FALSE,
                          only_all_tested = FALSE) {
+  if (message_not_thrown_before("proportion_SI", entire_session = TRUE)) {
+    message_("Note that `proportion_SI()` will also include dose-dependent susceptibility, 'SDD'. This note will be shown once for this session.", as_note = FALSE)
+  }
  tryCatch(
    sir_calc(...,
-      ab_result = c("S", "I"),
+      ab_result = c("S", "I", "SDD"),
      minimum = minimum,
      as_percent = as_percent,
      only_all_tested = only_all_tested,
--- a/R/sir.R
+++ b/R/sir.R
@ -31,12 +31,12 @@
 #'
 #' @description Clean up existing SIR values, or interpret minimum inhibitory concentration (MIC) values and disk diffusion diameters according to EUCAST or CLSI. [as.sir()] transforms the input to a new class [`sir`], which is an ordered [factor].
 #' 
-#' Currently breakpoints are available:
+#' These breakpoints are currently available:
 #' - For **clinical microbiology** from EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "human")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "human")$guideline)))` and CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "human")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "human")$guideline)))`;
 #' - For **veterinary microbiology** from EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "animal")$guideline)))` and CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "animal")$guideline)))`;
 #' - ECOFFs (Epidemiological cut-off values) from EUCAST `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "ECOFF")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "EUCAST" & type == "ECOFF")$guideline)))` and CLSI `r min(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "ECOFF")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(AMR::clinical_breakpoints, guideline %like% "CLSI" & type == "ECOFF")$guideline)))`.
 #' 
-#' All breakpoints used for interpretation are publicly available in the [clinical_breakpoints] data set.
+#' All breakpoints used for interpretation are available in our [clinical_breakpoints] data set.
 #' @rdname as.sir
 #' @param x vector of values (for class [`mic`]: MIC values in mg/L, for class [`disk`]: a disk diffusion radius in millimetres)
 #' @param mo any (vector of) text that can be coerced to valid microorganism codes with [as.mo()], can be left empty to determine it automatically
@ -60,7 +60,7 @@
 #'
 #' The [as.sir()] function can work in four ways:
 #'
-#' 1. For **cleaning raw / untransformed data**. The data will be cleaned to only contain values S, I and R and will try its best to determine this with some intelligence. For example, mixed values with SIR interpretations and MIC values such as `"<0.25; S"` will be coerced to `"S"`. Combined interpretations for multiple test methods (as seen in laboratory records) such as `"S; S"` will be coerced to `"S"`, but a value like `"S; I"` will return `NA` with a warning that the input is unclear.
+#' 1. For **cleaning raw / untransformed data**. The data will be cleaned to only contain valid values, namely: **S** for susceptible, **I** for intermediate or 'susceptible, increased exposure', **R** for resistant, **N** for non-interpretable, and **SDD** for susceptible dose-dependent. Each of these can be set using a [regular expression][base::regex]. Furthermore, [as.sir()] will try its best to clean with some intelligence. For example, mixed values with SIR interpretations and MIC values such as `"<0.25; S"` will be coerced to `"S"`. Combined interpretations for multiple test methods (as seen in laboratory records) such as `"S; S"` will be coerced to `"S"`, but a value like `"S; I"` will return `NA` with a warning that the input is invalid.
 #'
 #' 2. For **interpreting minimum inhibitory concentration (MIC) values** according to EUCAST or CLSI. You must clean your MIC values first using [as.mic()], that also gives your columns the new data class [`mic`]. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the `mo` argument.
 #'    * Using `dplyr`, SIR interpretation can be done very easily with either:
@ -120,7 +120,7 @@
 #'
 #' The function [is.sir()] detects if the input contains class `sir`. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector.
 #'
-#' The function [is_sir_eligible()] returns `TRUE` when a columns contains at most 5% invalid antimicrobial interpretations (not S and/or I and/or R), and `FALSE` otherwise. The threshold of 5% can be set with the `threshold` argument. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector.
+#' The function [is_sir_eligible()] returns `TRUE` when a columns contains at most 5% invalid antimicrobial interpretations (not S and/or I and/or R and/or N and/or SDD), and `FALSE` otherwise. The threshold of 5% can be set with the `threshold` argument. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector.
 #' @section Interpretation of SIR:
 #' In 2019, the European Committee on Antimicrobial Susceptibility Testing (EUCAST) has decided to change the definitions of susceptibility testing categories S, I, and R as shown below (<https://www.eucast.org/newsiandr>):
 #'
@ -214,7 +214,7 @@
 #'
 #' # For CLEANING existing SIR values ------------------------------------
 #'
-#' as.sir(c("S", "I", "R", "A", "B", "C"))
+#' as.sir(c("S", "SDD", "I", "R", "N", "A", "B", "C"))
 #' as.sir("<= 0.002; S") # will return "S"
 #' sir_data <- as.sir(c(rep("S", 474), rep("I", 36), rep("R", 370)))
 #' is.sir(sir_data)
@ -242,13 +242,18 @@ as.sir <- function(x, ...) {
  UseMethod("as.sir")
 }

+as_sir_structure <- function(x) {
+  structure(factor(as.character(unlist(unname(x))),
+                   levels = c("S", "SDD", "I", "R", "N"),
+                   ordered = TRUE),
+            class = c("sir", "ordered", "factor"))
+}
+
 #' @rdname as.sir
 #' @details `NA_sir_` is a missing value of the new `sir` class, analogous to e.g. base \R's [`NA_character_`][base::NA].
 #' @format NULL
 #' @export
-NA_sir_ <- set_clean_class(factor(NA_character_, levels = c("S", "I", "R"), ordered = TRUE),
-  new_class = c("sir", "ordered", "factor")
-)
+NA_sir_ <- as_sir_structure(NA_character_)

 #' @rdname as.sir
 #' @export
@ -286,9 +291,9 @@ is_sir_eligible <- function(x, threshold = 0.05) {
  %in% class(x))) {
    # no transformation needed
    return(FALSE)
-  } else if (all(x %in% c("S", "I", "R", NA)) & !all(is.na(x))) {
+  } else if (all(x %in% c("S", "SDD", "I", "R", "N", NA)) & !all(is.na(x))) {
    return(TRUE)
-  } else if (!any(c("S", "I", "R") %in% x, na.rm = TRUE) & !all(is.na(x))) {
+  } else if (!any(c("S", "SDD", "I", "R", "N") %in% x, na.rm = TRUE) & !all(is.na(x))) {
    return(FALSE)
  } else {
    x <- x[!is.na(x) & !is.null(x) & !x %in% c("", "-", "NULL")]
@ -316,9 +321,11 @@ is_sir_eligible <- function(x, threshold = 0.05) {
  }
 }

+#' @rdname as.sir
 #' @export
+#' @param S,I,R,N,SDD a case-indepdendent [regular expression][base::regex] to translate input to this result. This regular expression will be run *after* all non-letters are removed from the input.
 # extra param: warn (logical, to never throw a warning)
-as.sir.default <- function(x, ...) {
+as.sir.default <- function(x, S = "^(S|U)+$", I = "^(I|H)+$", R = "^(R)+$", N = "^(N|V)+$", SDD = "^(SDD|D)+$", ...) {
  if (inherits(x, "sir")) {
    return(x)
  }
@ -338,11 +345,11 @@ as.sir.default <- function(x, ...) {
      x[x.bak == 2] <- "I"
      x[x.bak == 3] <- "R"
    }
-  } else if (inherits(x.bak, "character") && all(x %in% c("1", "2", "3", "S", "I", "R", NA_character_))) {
+  } else if (inherits(x.bak, "character") && all(x %in% c("1", "2", "3", c("S", "SDD", "I", "R", "N"), NA_character_))) {
    x[x.bak == "1"] <- "S"
    x[x.bak == "2"] <- "I"
    x[x.bak == "3"] <- "R"
-  } else if (!all(is.na(x)) && !identical(levels(x), c("S", "I", "R")) && !all(x %in% c("S", "I", "R", NA))) {
+  } else if (!all(is.na(x)) && !identical(levels(x), c("S", "SDD", "I", "R", "N")) && !all(x %in% c("S", "SDD", "I", "R", "N", NA))) {
    if (all(x %unlike% "(S|I|R)", na.rm = TRUE)) {
      # check if they are actually MICs or disks
      if (all_valid_mics(x)) {
@ -379,23 +386,14 @@ as.sir.default <- function(x, ...) {
    x[x %like% "([^a-z]|^)res(is(tant)?)?"] <- "R"
    x[x %like% "([^a-z]|^)sus(cep(tible)?)?"] <- "S"
    x[x %like% "([^a-z]|^)int(er(mediate)?)?|incr.*exp"] <- "I"
-    # remove other invalid characters
-    # set to capitals
-    x <- toupper(x)
    x <- gsub("[^A-Z]+", "", x, perl = TRUE)
-    # CLSI uses SDD for "susceptible dose-dependent"
-    x <- gsub("SDD", "I", x, fixed = TRUE)
-    # some labs now report "H" instead of "I" to not interfere with EUCAST prior to 2019
-    x <- gsub("H", "I", x, fixed = TRUE)
-    # MIPS uses D for Dose-dependent (which is I, but it will throw a note)
-    x <- gsub("D", "I", x, fixed = TRUE)
-    # MIPS uses U for "susceptible urine"
-    x <- gsub("U", "S", x, fixed = TRUE)
-    # in cases of "S;S" keep S, but in case of "S;I" make it NA
-    x <- gsub("^S+$", "S", x)
-    x <- gsub("^I+$", "I", x)
-    x <- gsub("^R+$", "R", x)
-    x[!x %in% c("S", "I", "R")] <- NA_character_
+    # apply regexes set by user
+    x[x %like% S] <- "S"
+    x[x %like% I] <- "I"
+    x[x %like% R] <- "R"
+    x[x %like% N] <- "N"
+    x[x %like% SDD] <- "SDD"
+    x[!x %in% c("S", "SDD", "I", "R", "N")] <- NA_character_
    na_after <- length(x[is.na(x) | x == ""])

    if (!isFALSE(list(...)$warn)) { # so as.sir(..., warn = FALSE) will never throw a warning
@ -415,24 +413,10 @@ as.sir.default <- function(x, ...) {
          call = FALSE
        )
      }
-      if (any(toupper(x.bak[!is.na(x.bak)]) == "U") && message_not_thrown_before("as.sir", "U")) {
-        warning_("in `as.sir()`: 'U' was interpreted as 'S', following some laboratory systems")
-      }
-      if (any(toupper(x.bak[!is.na(x.bak)]) == "D") && message_not_thrown_before("as.sir", "D")) {
-        warning_("in `as.sir()`: 'D' (dose-dependent) was interpreted as 'I', following some laboratory systems")
-      }
-      if (any(toupper(x.bak[!is.na(x.bak)]) == "SDD") && message_not_thrown_before("as.sir", "SDD")) {
-        warning_("in `as.sir()`: 'SDD' (susceptible dose-dependent, coined by CLSI) was interpreted as 'I' to comply with EUCAST's 'I'")
-      }
-      if (any(toupper(x.bak[!is.na(x.bak)]) == "H") && message_not_thrown_before("as.sir", "H")) {
-        warning_("in `as.sir()`: 'H' was interpreted as 'I', following some laboratory systems")
-      }
    }
  }

-  set_clean_class(factor(x, levels = c("S", "I", "R"), ordered = TRUE),
-    new_class = c("sir", "ordered", "factor")
-  )
+  as_sir_structure(x)
 }

 #' @rdname as.sir
@ -693,7 +677,7 @@ as.sir.data.frame <- function(x,
      show_message <- FALSE
      ab <- ab_cols[i]
      ab_coerced <- suppressWarnings(as.ab(ab))
-      if (!all(x[, ab_cols[i], drop = TRUE] %in% c("S", "I", "R", NA), na.rm = TRUE)) {
+      if (!all(x[, ab_cols[i], drop = TRUE] %in% c("S", "SDD", "I", "R", "N", NA), na.rm = TRUE)) {
        show_message <- TRUE
        # only print message if values are not already clean
        message_("Cleaning values in column '", font_bold(ab), "' (",
@ -1245,8 +1229,10 @@ pillar_shaft.sir <- function(x, ...) {
    # colours will anyway not work when has_colour() == FALSE,
    # but then the indentation should also not be applied
    out[is.na(x)] <- font_grey("  NA")
+    out[x == "N"] <- font_grey_bg("  N  ")
    out[x == "S"] <- font_green_bg("  S  ")
    out[x == "I"] <- font_orange_bg("  I  ")
+    out[x == "SDD"] <- font_orange_bg(" SDD ")
    if (is_dark()) {
      out[x == "R"] <- font_red_bg("  R  ")
    } else {
--- a/R/sir_calc.R
+++ b/R/sir_calc.R
@ -41,7 +41,7 @@ sir_calc <- function(...,
                     as_percent = FALSE,
                     only_all_tested = FALSE,
                     only_count = FALSE) {
-  meet_criteria(ab_result, allow_class = c("character", "numeric", "integer"), has_length = c(1, 2, 3))
+  meet_criteria(ab_result, allow_class = c("character", "numeric", "integer"), has_length = c(1:5))
  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
  meet_criteria(as_percent, allow_class = "logical", has_length = 1)
  meet_criteria(only_all_tested, allow_class = "logical", has_length = 1)
@ -249,7 +249,13 @@ sir_calc_df <- function(type, # "proportion", "count" or "both"
    for (i in seq_len(ncol(data))) {
      if (is.sir(data[, i, drop = TRUE])) {
        data[, i] <- as.character(data[, i, drop = TRUE])
-        data[, i] <- gsub("(I|S)", "SI", data[, i, drop = TRUE])
+        if ("SDD" %in% data[, i, drop = TRUE]) {
+          if (message_not_thrown_before("sir_calc_df", combine_SI, entire_session = TRUE)) {
+            message_("Note that `sir_calc_df()` will also count dose-dependent susceptibility, 'SDD', as 'SI' when `combine_SI = TRUE`. This note will be shown once for this session.", as_note = FALSE)
+          }
+          
+        }
+        data[, i] <- gsub("(I|S|SDD)", "SI", data[, i, drop = TRUE])
      }
    }
  }
@ -272,9 +278,9 @@ sir_calc_df <- function(type, # "proportion", "count" or "both"
    for (i in seq_len(ncol(.data))) {
      values <- .data[, i, drop = TRUE]
      if (isTRUE(combine_SI)) {
-        values <- factor(values, levels = c("SI", "R"), ordered = TRUE)
+        values <- factor(values, levels = c("SI", "R", "N"), ordered = TRUE)
      } else {
-        values <- factor(values, levels = c("S", "I", "R"), ordered = TRUE)
+        values <- factor(values, levels = c("S", "SDD", "I", "R", "N"), ordered = TRUE)
      }
      col_results <- as.data.frame(as.matrix(table(values)), stringsAsFactors = FALSE)
      col_results$interpretation <- rownames(col_results)
@ -351,7 +357,7 @@ sir_calc_df <- function(type, # "proportion", "count" or "both"
  } else {
    # don't use as.sir() here, as it would add the class 'sir' and we would like
    # the same data structure as output, regardless of input
-    out$interpretation <- factor(out$interpretation, levels = c("S", "I", "R"), ordered = TRUE)
+    out$interpretation <- factor(out$interpretation, levels = c("S", "SDD", "I", "R", "N"), ordered = TRUE)
  }

  if (data_has_groups) {
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/README.md
+++ b/README.md
@ -4,9 +4,7 @@

 ----

-This work was published in the Journal of Statistical Software (Volume 104(3); [DOI 10.18637/jss.v104.i03](https://doi.org/10.18637/jss.v104.i03)) and formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)).
-
-`AMR` is a free, open-source and independent R package to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. Our aim is to provide a standard for clean and reproducible antimicrobial resistance data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. It is currently being used in over 175 countries.
+`AMR` is a free, open-source and independent R package to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. Our aim is to provide a standard for clean and reproducible antimicrobial resistance data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. It is currently being used in over 175 countries. This work was published in the Journal of Statistical Software (2022, Volume 104(3); [DOI 10.18637/jss.v104.i03](https://doi.org/10.18637/jss.v104.i03)) and formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)).
 
 After installing this package, R knows ~52,000 distinct microbial species and all ~600 antibiotic, antimycotic, and antiviral drugs by name and code (including ATC, WHONET/EARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid SIR and MIC values. It supports any data format, including WHONET/EARS-Net data. Antimicrobial names and group names are available in English, Chinese, Danish, Dutch, French, German, Greek, Italian, Japanese, Polish, Portuguese, Russian, Spanish, Swedish, Turkish, and Ukrainian.

--- a/index.md
+++ b/index.md
@ -5,7 +5,7 @@
 * Provides the **full microbiological taxonomy** and data on **all antimicrobial drugs**
 * Applies all recent **CLSI and EUCAST clinical and veterinary breakpoints** for  MICs and disk zones
 * Corrects for duplicate isolates, **calculates and predicts AMR** per antibiotic class
-* Integrates with **WHONET**, ATC, **EARS-Net**, PubChem, **LOINC** and **SNOMED CT**
+* Integrates with **WHONET**, ATC, **EARS-Net**, PubChem, **LOINC**, and **SNOMED CT**
 * Works on Windows, macOS and Linux with **all versions of R** since R-3.0 and is completely **dependency-free**, highly suitable for places with **limited resources**

 <div style="display: flex; font-size: 0.8em;">
--- a/inst/tinytest/test-sir.R
+++ b/inst/tinytest/test-sir.R
@ -45,14 +45,14 @@ expect_inherits(x[[1]], "sir")
 expect_inherits(c(x[1], x[9]), "sir")
 expect_inherits(unique(x[1], x[9]), "sir")
 pdf(NULL) # prevent Rplots.pdf being created
-expect_silent(barplot(as.sir(c("S", "I", "R"))))
-expect_silent(plot(as.sir(c("S", "I", "R"))))
+expect_silent(barplot(as.sir(c("S", "SDD", "I", "R", "N"))))
+expect_silent(plot(as.sir(c("S", "SDD", "I", "R", "N"))))
 if (AMR:::pkg_is_available("ggplot2")) {
-  expect_inherits(ggplot2::autoplot(as.sir(c("S", "I", "R"))), "gg")
+  expect_inherits(ggplot2::autoplot(as.sir(c("S", "SDD", "I", "R", "N"))), "gg")
 }
-expect_stdout(print(as.sir(c("S", "I", "R"))))
-expect_equal(as.character(as.sir(c(1:3))), c("S", "I", "R"))
-expect_equal(as.character(as.sir(c(1:3))), c("S", "I", "R"))
+expect_stdout(print(as.sir(c("S", "SDD", "I", "R", "N"))))
+expect_equal(as.character(as.sir(c(1:3))), c("S", "SDD", "I", "R", "N"))
+expect_equal(as.character(as.sir(c(1:3))), c("S", "SDD", "I", "R", "N"))
 expect_equal(suppressWarnings(as.logical(as.sir("INVALID VALUE"))), NA)
 expect_equal(
  summary(as.sir(c("S", "R"))),
@ -148,7 +148,7 @@ expect_equal(suppressMessages(
      guideline = "CLSI 2019"
    )
  )),
-  c("S", "S", "I", "R", "R")
+  c("S", c("S", "SDD", "I", "R", "N"), "R")
 )

 expect_true(is.data.frame(sir_interpretation_history(clean = FALSE)))
@ -250,7 +250,7 @@ expect_inherits(
 expect_inherits(
  suppressWarnings(as.sir(data.frame(
    mo = "Escherichia coli",
-    amoxi = c("S", "I", "R", "invalid")
+    amoxi = c("S", "SDD", "I", "R", "N", "invalid")
  ))$amoxi),
  "sir"
 )
--- a/man/as.sir.Rd
+++ b/man/as.sir.Rd
@ -7,6 +7,7 @@
 \alias{NA_sir_}
 \alias{is.sir}
 \alias{is_sir_eligible}
+\alias{as.sir.default}
 \alias{as.sir.mic}
 \alias{as.sir.disk}
 \alias{as.sir.data.frame}
@ -30,6 +31,16 @@ is.sir(x)

 is_sir_eligible(x, threshold = 0.05)

+\method{as.sir}{default}(
+  x,
+  S = "^(S|U)+$",
+  I = "^(I|H)+$",
+  R = "^(R)+$",
+  N = "^(N|V)+$",
+  SDD = "^(SDD|D)+$",
+  ...
+)
+
 \method{as.sir}{mic}(
  x,
  mo = NULL,
@ -85,6 +96,8 @@ sir_interpretation_history(clean = FALSE)

 \item{threshold}{maximum fraction of invalid antimicrobial interpretations of \code{x}, see \emph{Examples}}

+\item{S, I, R, N, SDD}{a case-indepdendent \link[base:regex]{regular expression} to translate input to this result. This regular expression will be run \emph{after} all non-letters are removed from the input.}
+
 \item{mo}{any (vector of) text that can be coerced to valid microorganism codes with \code{\link[=as.mo]{as.mo()}}, can be left empty to determine it automatically}

 \item{ab}{any (vector of) text that can be coerced to a valid antimicrobial drug code with \code{\link[=as.ab]{as.ab()}}}
@ -117,14 +130,14 @@ Ordered \link{factor} with new class \code{sir}
 \description{
 Clean up existing SIR values, or interpret minimum inhibitory concentration (MIC) values and disk diffusion diameters according to EUCAST or CLSI. \code{\link[=as.sir]{as.sir()}} transforms the input to a new class \code{\link{sir}}, which is an ordered \link{factor}.

-Currently breakpoints are available:
+These breakpoints are currently available:
 \itemize{
 \item For \strong{clinical microbiology} from EUCAST 2011-2023 and CLSI 2011-2023;
 \item For \strong{veterinary microbiology} from EUCAST 2021-2023 and CLSI 2019-2023;
 \item ECOFFs (Epidemiological cut-off values) from EUCAST 2020-2023 and CLSI 2022-2023.
 }

-All breakpoints used for interpretation are publicly available in the \link{clinical_breakpoints} data set.
+All breakpoints used for interpretation are available in our \link{clinical_breakpoints} data set.
 }
 \details{
 \emph{Note: The clinical breakpoints in this package were validated through, and imported from, \href{https://whonet.org}{WHONET}. The public use of this \code{AMR} package has been endorsed by both CLSI and EUCAST. See \link{clinical_breakpoints} for more information.}
@ -132,7 +145,7 @@ All breakpoints used for interpretation are publicly available in the \link{clin

 The \code{\link[=as.sir]{as.sir()}} function can work in four ways:
 \enumerate{
-\item For \strong{cleaning raw / untransformed data}. The data will be cleaned to only contain values S, I and R and will try its best to determine this with some intelligence. For example, mixed values with SIR interpretations and MIC values such as \code{"<0.25; S"} will be coerced to \code{"S"}. Combined interpretations for multiple test methods (as seen in laboratory records) such as \code{"S; S"} will be coerced to \code{"S"}, but a value like \code{"S; I"} will return \code{NA} with a warning that the input is unclear.
+\item For \strong{cleaning raw / untransformed data}. The data will be cleaned to only contain valid values, namely: \strong{S} for susceptible, \strong{I} for intermediate or 'susceptible, increased exposure', \strong{R} for resistant, \strong{N} for non-interpretable, and \strong{SDD} for susceptible dose-dependent. Each of these can be set using a \link[base:regex]{regular expression}. Furthermore, \code{\link[=as.sir]{as.sir()}} will try its best to clean with some intelligence. For example, mixed values with SIR interpretations and MIC values such as \code{"<0.25; S"} will be coerced to \code{"S"}. Combined interpretations for multiple test methods (as seen in laboratory records) such as \code{"S; S"} will be coerced to \code{"S"}, but a value like \code{"S; I"} will return \code{NA} with a warning that the input is invalid.
 \item For \strong{interpreting minimum inhibitory concentration (MIC) values} according to EUCAST or CLSI. You must clean your MIC values first using \code{\link[=as.mic]{as.mic()}}, that also gives your columns the new data class \code{\link{mic}}. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the \code{mo} argument.
 \itemize{
 \item Using \code{dplyr}, SIR interpretation can be done very easily with either:
@ -198,7 +211,7 @@ The repository of this package \href{https://github.com/msberends/AMR/blob/main/

 The function \code{\link[=is.sir]{is.sir()}} detects if the input contains class \code{sir}. If the input is a \link{data.frame}, it iterates over all columns and returns a \link{logical} vector.

-The function \code{\link[=is_sir_eligible]{is_sir_eligible()}} returns \code{TRUE} when a columns contains at most 5\% invalid antimicrobial interpretations (not S and/or I and/or R), and \code{FALSE} otherwise. The threshold of 5\% can be set with the \code{threshold} argument. If the input is a \link{data.frame}, it iterates over all columns and returns a \link{logical} vector.
+The function \code{\link[=is_sir_eligible]{is_sir_eligible()}} returns \code{TRUE} when a columns contains at most 5\% invalid antimicrobial interpretations (not S and/or I and/or R and/or N and/or SDD), and \code{FALSE} otherwise. The threshold of 5\% can be set with the \code{threshold} argument. If the input is a \link{data.frame}, it iterates over all columns and returns a \link{logical} vector.
 }

 \code{NA_sir_} is a missing value of the new \code{sir} class, analogous to e.g. base \R's \code{\link[base:NA]{NA_character_}}.
@ -294,7 +307,7 @@ if (require("dplyr")) {

 # For CLEANING existing SIR values ------------------------------------

-as.sir(c("S", "I", "R", "A", "B", "C"))
+as.sir(c("S", "SDD", "I", "R", "N", "A", "B", "C"))
 as.sir("<= 0.002; S") # will return "S"
 sir_data <- as.sir(c(rep("S", 474), rep("I", 36), rep("R", 370)))
 is.sir(sir_data)
--- a/man/bug_drug_combinations.Rd
+++ b/man/bug_drug_combinations.Rd
@ -49,7 +49,7 @@ bug_drug_combinations(x, col_mo = NULL, FUN = mo_shortname, ...)
    decimal point.}
 }
 \value{
-The function \code{\link[=bug_drug_combinations]{bug_drug_combinations()}} returns a \link{data.frame} with columns "mo", "ab", "S", "I", "R" and "total".
+The function \code{\link[=bug_drug_combinations]{bug_drug_combinations()}} returns a \link{data.frame} with columns "mo", "ab", "S", "SDD", "I", "R", and "total".
 }
 \description{
 Determine antimicrobial resistance (AMR) of all bug-drug combinations in your data set where at least 30 (default) isolates are available per species. Use \code{\link[=format]{format()}} on the result to prettify it to a publishable/printable format, see \emph{Examples}.
--- a/man/clinical_breakpoints.Rd
+++ b/man/clinical_breakpoints.Rd
@ -56,7 +56,7 @@ The CEO of CLSI and the chairman of EUCAST have endorsed the work and public use

 Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit \href{https://msberends.github.io/AMR/articles/datasets.html}{our website for the download links}. The actual files are of course available on \href{https://github.com/msberends/AMR/tree/main/data-raw}{our GitHub repository}. They allow for machine reading EUCAST and CLSI guidelines, which is almost impossible with the MS Excel and PDF files distributed by EUCAST and CLSI, though initiatives have started to overcome these burdens.

-\strong{NOTE:} this \code{AMR} package (and the WHONET software as well) contains internal methods to apply the guidelines, which is rather complex. For example, some breakpoints must be applied on certain species groups (which are in case of this package available through the \link{microorganisms.groups} data set). It is important that this is considered when using the breakpoints for own use.
+\strong{NOTE:} this \code{AMR} package (and the WHONET software as well) contains rather complex internal methods to apply the guidelines. For example, some breakpoints must be applied on certain species groups (which are in case of this package available through the \link{microorganisms.groups} data set). It is important that this is considered when using the breakpoints for own use.
 }
 }
 \examples{
--- a/man/count.Rd
+++ b/man/count.Rd
@ -4,11 +4,11 @@
 \alias{count}
 \alias{count_resistant}
 \alias{count_susceptible}
-\alias{count_R}
-\alias{count_IR}
-\alias{count_I}
-\alias{count_SI}
 \alias{count_S}
+\alias{count_SI}
+\alias{count_I}
+\alias{count_IR}
+\alias{count_R}
 \alias{count_all}
 \alias{n_sir}
 \alias{count_df}
@ -18,15 +18,15 @@ count_resistant(..., only_all_tested = FALSE)

 count_susceptible(..., only_all_tested = FALSE)

-count_R(..., only_all_tested = FALSE)
-
-count_IR(..., only_all_tested = FALSE)
-
-count_I(..., only_all_tested = FALSE)
+count_S(..., only_all_tested = FALSE)

 count_SI(..., only_all_tested = FALSE)

-count_S(..., only_all_tested = FALSE)
+count_I(..., only_all_tested = FALSE)
+
+count_IR(..., only_all_tested = FALSE)
+
+count_R(..., only_all_tested = FALSE)

 count_all(..., only_all_tested = FALSE)

--- a/man/g.test.Rd
+++ b/man/g.test.Rd
@ -45,8 +45,9 @@ A list with class \code{"htest"} containing the following
  \item{residuals}{the Pearson residuals,
    \code{(observed - expected) / sqrt(expected)}.}
  \item{stdres}{standardized residuals,
-    \code{(observed - expected) / sqrt(V)}, where \code{V} is the residual cell variance (Agresti, 2007,
-    section 2.4.5 for the case where \code{x} is a matrix, \code{n * p * (1 - p)} otherwise).}
+    \code{(observed - expected) / sqrt(V)}, where \code{V} is the
+    residual cell variance (Agresti, 2007, section 2.4.5
+    for the case where \code{x} is a matrix, \code{n * p * (1 - p)} otherwise).}
 }
 \description{
 \code{\link[=g.test]{g.test()}} performs chi-squared contingency table tests and goodness-of-fit tests, just like \code{\link[=chisq.test]{chisq.test()}} but is more reliable (1). A \emph{G}-test can be used to see whether the number of observations in each category fits a theoretical expectation (called a \strong{\emph{G}-test of goodness-of-fit}), or to see whether the proportions of one variable are different for different values of the other variable (called a \strong{\emph{G}-test of independence}).
--- a/man/pca.Rd
+++ b/man/pca.Rd
@ -39,7 +39,7 @@ pca(
    standard deviations are less than or equal to \code{tol} times the
    standard deviation of the first component.)  With the default null
    setting, no components are omitted (unless \code{rank.} is specified
-    less than \code{min(dim(x))}.).  Other settings for tol could be
+    less than \code{min(dim(x))}.).  Other settings for \code{tol} could be
    \code{tol = 0} or \code{tol = sqrt(.Machine$double.eps)}, which
    would omit essentially constant components.}

--- a/man/proportion.Rd
+++ b/man/proportion.Rd
@ -99,7 +99,7 @@ These functions can be used to calculate the (co-)resistance or susceptibility o
 \details{
 \strong{Remember that you should filter your data to let it contain only first isolates!} This is needed to exclude duplicates and to reduce selection bias. Use \code{\link[=first_isolate]{first_isolate()}} to determine them in your data set with one of the four available algorithms.

-The function \code{\link[=resistance]{resistance()}} is equal to the function \code{\link[=proportion_R]{proportion_R()}}. The function \code{\link[=susceptibility]{susceptibility()}} is equal to the function \code{\link[=proportion_SI]{proportion_SI()}}.
+The function \code{\link[=resistance]{resistance()}} is equal to the function \code{\link[=proportion_R]{proportion_R()}}. The function \code{\link[=susceptibility]{susceptibility()}} is equal to the function \code{\link[=proportion_SI]{proportion_SI()}}. Since AMR v3.0, \code{\link[=proportion_SI]{proportion_SI()}} and \code{\link[=proportion_I]{proportion_I()}} include dose-dependent susceptibility ('SDD').

 Use \code{\link[=sir_confidence_interval]{sir_confidence_interval()}} to calculate the confidence interval, which relies on \code{\link[=binom.test]{binom.test()}}, i.e., the Clopper-Pearson method. This function returns a vector of length 2 at default for antimicrobial \emph{resistance}. Change the \code{side} argument to "left"/"min" or "right"/"max" to return a single value, and change the \code{ab_result} argument to e.g. \code{c("S", "I")} to test for antimicrobial \emph{susceptibility}, see Examples.