(v0.7.1.9005) new rsi calculations, atc class removal

2025-08-24 23:42:18 +02:00 · 2019-07-01 14:03:15 +02:00
parent 65c6702b21
commit 156d550895
78 changed files with 1169 additions and 911 deletions
--- a/R/count.R
+++ b/R/count.R
@@ -34,6 +34,7 @@
 #' The function \code{count_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and counts the amounts of S, I and R. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}.
 #'
 #' The function \code{rsi_df} works exactly like \code{count_df}, but adds the percentage of S, I and R.
+#' @inheritSection portion Combination therapy
 #' @source Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html}
 #' @seealso \code{\link{portion}_*} to calculate microbial resistance and susceptibility.
 #' @keywords resistance susceptibility rsi antibiotics isolate isolates
@@ -61,8 +62,8 @@
 #' # Since n_rsi counts available isolates, you can
 #' # calculate back to count e.g. non-susceptible isolates.
 #' # This results in the same:
-#' count_IR(septic_patients$AMX)
-#' portion_IR(septic_patients$AMX) * n_rsi(septic_patients$AMX)
+#' count_SI(septic_patients$AMX)
+#' portion_SI(septic_patients$AMX) * n_rsi(septic_patients$AMX)
 #'
 #' library(dplyr)
 #' septic_patients %>%
@@ -76,17 +77,17 @@
 #'
 #' # Count co-resistance between amoxicillin/clav acid and gentamicin,
 #' # so we can see that combination therapy does a lot more than mono therapy.
-#' # Please mind that `portion_S` calculates percentages right away instead.
-#' count_S(septic_patients$AMC)   # S = 1342 (71.4%)
-#' count_all(septic_patients$AMC) # n = 1879
+#' # Please mind that `portion_SI` calculates percentages right away instead.
+#' count_SI(septic_patients$AMC)  # 1433
+#' count_all(septic_patients$AMC) # 1879
 #'
-#' count_S(septic_patients$GEN)   # S = 1372 (74.0%)
-#' count_all(septic_patients$GEN) # n = 1855
+#' count_SI(septic_patients$GEN)  # 1399
+#' count_all(septic_patients$GEN) # 1855
 #'
 #' with(septic_patients,
-#'      count_S(AMC, GEN))         # S = 1660 (92.3%)
-#' with(septic_patients,           # n = 1798
-#'      n_rsi(AMC, GEN))
+#'      count_SI(AMC, GEN))        # 1764
+#' with(septic_patients,
+#'      n_rsi(AMC, GEN))           # 1936
 #'
 #' # Get portions S/I/R immediately of all rsi columns
 #' septic_patients %>%
@@ -99,71 +100,56 @@
 #'   group_by(hospital_id) %>%
 #'   count_df(translate = FALSE)
 #'
-count_R <- function(..., also_single_tested = FALSE) {
+count_R <- function(..., only_all_tested = FALSE) {
  rsi_calc(...,
-           type = "R",
-           include_I = FALSE,
-           minimum = 0,
-           as_percent = FALSE,
-           also_single_tested = also_single_tested,
+           ab_result = "R",
+           only_all_tested = only_all_tested,
           only_count = TRUE)
 }

 #' @rdname count
 #' @export
-count_IR <- function(..., also_single_tested = FALSE) {
+count_IR <- function(..., only_all_tested = FALSE) {
  rsi_calc(...,
-           type = "R",
-           include_I = TRUE,
-           minimum = 0,
-           as_percent = FALSE,
-           also_single_tested = also_single_tested,
+           ab_result = c("I", "R"),
+           only_all_tested = only_all_tested,
           only_count = TRUE)
 }

 #' @rdname count
 #' @export
-count_I <- function(..., also_single_tested = FALSE) {
+count_I <- function(..., only_all_tested = FALSE) {
  rsi_calc(...,
-           type = "I",
-           include_I = FALSE,
-           minimum = 0,
-           as_percent = FALSE,
-           also_single_tested = also_single_tested,
+           ab_result = "I",
+           only_all_tested = only_all_tested,
           only_count = TRUE)
 }

 #' @rdname count
 #' @export
-count_SI <- function(..., also_single_tested = FALSE) {
+count_SI <- function(..., only_all_tested = FALSE) {
  rsi_calc(...,
-           type = "S",
-           include_I = TRUE,
-           minimum = 0,
-           as_percent = FALSE,
-           also_single_tested = also_single_tested,
+           ab_result = c("S", "I"),
+           only_all_tested = only_all_tested,
           only_count = TRUE)
 }

 #' @rdname count
 #' @export
-count_S <- function(..., also_single_tested = FALSE) {
+count_S <- function(..., only_all_tested = FALSE) {
  rsi_calc(...,
-           type = "S",
-           include_I = FALSE,
-           minimum = 0,
-           as_percent = FALSE,
-           also_single_tested = also_single_tested,
+           ab_result = "S",
+           only_all_tested = only_all_tested,
           only_count = TRUE)
 }

 #' @rdname count
 #' @export
-count_all <- function(..., also_single_tested = FALSE) {
-  res_SI <- count_SI(..., also_single_tested = also_single_tested)
-  # only print warnings once, if needed
-  res_R <- suppressWarnings(count_R(..., also_single_tested = also_single_tested))
-  res_SI + res_R
+count_all <- function(..., only_all_tested = FALSE) {
+  rsi_calc(...,
+           ab_result = c("S", "I", "R"),
+           only_all_tested = only_all_tested,
+           only_count = TRUE)
 }

 #' @rdname count
--- a/R/mo.R
+++ b/R/mo.R
@@ -87,12 +87,9 @@
 #' \strong{Uncertain results} \cr
 #' The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is \code{allow_uncertain = TRUE}, which is equal to uncertainty level 2. Using \code{allow_uncertain = FALSE} will skip all of these additional rules:
 #' \itemize{
-#'   \item{(uncertainty level 1): It tries to look for only matching genera}
-#'   \item{(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names}
-#'   \item{(uncertainty level 2): It strips off values between brackets and the brackets itself, and re-evaluates the input with all previous rules}
-#'   \item{(uncertainty level 2): It strips off words from the end one by one and re-evaluates the input with all previous rules}
-#'   \item{(uncertainty level 3): It strips off words from the start one by one and re-evaluates the input with all previous rules}
-#'   \item{(uncertainty level 3): It tries any part of the name}
+#'   \item{(uncertainty level 1): It tries to look for only matching genera, previously accepted (but now invalid) taxonomic names and misspelled input}
+#'   \item{(uncertainty level 2): It removed parts between brackets, strips off words from the end one by one and re-evaluates the input with all previous rules}
+#'   \item{(uncertainty level 3): It strips off words from the start one by one and tries any part of the name}
 #' }
 #'
 #' You can also use e.g. \code{as.mo(..., allow_uncertain = 1)} to only allow up to level 1 uncertainty.
@@ -281,7 +278,7 @@ is.mo <- function(x) {

 #' @importFrom dplyr %>% pull left_join n_distinct progress_estimated filter distinct
 #' @importFrom data.table data.table as.data.table setkey
-#' @importFrom crayon magenta red blue silver italic has_color
+#' @importFrom crayon magenta red blue silver italic
 # param property a column name of AMR::microorganisms
 # param initial_search logical - is FALSE when coming from uncertain tries, which uses exec_as.mo internally too
 # param force_mo_history logical - whether found result must be saved with set_mo_history (default FALSE on non-interactive sessions)
@@ -486,7 +483,7 @@ exec_as.mo <- function(x,
    # remove genus as first word
    x <- gsub("^Genus ", "", x)
    # allow characters that resemble others
-    if (uncertainty_level >= 2) {
+    if (initial_search == FALSE) {
      x <- tolower(x)
      x <- gsub("[iy]+", "[iy]+", x)
      x <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x)
@@ -768,31 +765,24 @@ exec_as.mo <- function(x,
          }
          next
        }
-        if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup_without_spp[i], ignore.case = FALSE)) {
+        if (x_backup_without_spp[i] %like% "salmonella [a-z]+ ?.*") {
          if (x_backup_without_spp[i] %like% "Salmonella group") {
            # Salmonella Group A to Z, just return S. species for now
            x[i] <- microorganismsDT[mo == 'B_SLMNL', ..property][[1]][1L]
            if (initial_search == TRUE) {
              set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
            }
-            options(mo_renamed = c(getOption("mo_renamed"),
-                                   magenta(paste0("NOTE: ",
-                                                  italic("Salmonella"), " ", trimws(gsub("Salmonella", "", x_backup_without_spp[i])),
-                                                  " was considered ",
-                                                  italic("Salmonella species"),
-                                                  " (B_SLMNL)"))))
-          } else {
+          } else if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup_without_spp[i], ignore.case = FALSE)) {
            # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica
            x[i] <- microorganismsDT[mo == 'B_SLMNL_ENT', ..property][[1]][1L]
            if (initial_search == TRUE) {
              set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history)
            }
-            options(mo_renamed = c(getOption("mo_renamed"),
-                                   magenta(paste0("NOTE: ",
-                                                  italic("Salmonella"), " ", trimws(gsub("Salmonella", "", x_backup_without_spp[i])),
-                                                  " was considered a subspecies of ",
-                                                  italic("Salmonella enterica"),
-                                                  " (B_SLMNL_ENT)"))))
+            uncertainties <- rbind(uncertainties,
+                                   data.frame(uncertainty = 1,
+                                              input = x_backup_without_spp[i],
+                                              fullname = microorganismsDT[mo == "B_SLMNL_ENT", fullname][[1]],
+                                              mo = "B_SLMNL_ENT"))
          }
          next
        }
@@ -1041,9 +1031,27 @@ exec_as.mo <- function(x,
            }
            return(x)
          }
+
+          # (2) Try with misspelled input ----
+          # just rerun with initial_search = FALSE will used the extensive regex part above
+          found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup, initial_search = FALSE, allow_uncertain = FALSE)))
+          if (!empty_result(found)) {
+            found_result <- found
+            found <- microorganismsDT[mo == found, ..property][[1]]
+            uncertainties <<- rbind(uncertainties,
+                                    data.frame(uncertainty = 1,
+                                               input = a.x_backup,
+                                               fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
+                                               mo = found_result[1L]))
+            if (initial_search == TRUE) {
+              set_mo_history(a.x_backup, get_mo_code(found[1L], property), 1, force = force_mo_history)
+            }
+            return(found[1L])
+          }
        }

        if (uncertainty_level >= 2) {
+
          # (3) look for genus only, part of name ----
          if (nchar(g.x_backup_without_spp) > 4 & !b.x_trimmed %like% " ") {
            if (!grepl("^[A-Z][a-z]+", b.x_trimmed, ignore.case = FALSE)) {
@@ -1286,10 +1294,11 @@ exec_as.mo <- function(x,
    post_Becker <- c("argensis", "caeli", "cornubiensis", "edaphicus")
    if (any(x %in% MOs_staph[species %in% post_Becker, ..property][[1]])) {

-      warning("Becker ", italic("et al."), " (2014, 2019) does not contain species named after their publication: ",
+      warning("Becker ", italic("et al."), " (2014, 2019) does not contain these species named after their publication: ",
              italic(paste("S.",
                           sort(mo_species(unique(x[x %in% MOs_staph[species %in% post_Becker, ..property][[1]]]))),
                           collapse = ", ")),
+              ".",
              call. = FALSE,
              immediate. = TRUE)
    }
@@ -1352,15 +1361,7 @@ exec_as.mo <- function(x,
  }

  if (length(mo_renamed()) > 0) {
-    if (has_color()) {
-      notes <- getOption("mo_renamed")
-    } else {
-      notes <- mo_renamed()
-    }
-    notes <- sort(notes)
-    for (i in 1:length(notes)) {
-      base::message(blue(paste("NOTE:", notes[i])))
-    }
+    print(mo_renamed())
  }

  x
@@ -1387,9 +1388,14 @@ was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "")
  } else {
    mo <- ""
  }
-  msg <- paste0(italic(name_old), ref_old, " was renamed ", italic(name_new), ref_new, mo)
-  msg <- gsub("et al.", italic("et al."), msg)
-  options(mo_renamed = c(getOption("mo_renamed"), sort(msg)))
+  old_values <- paste0(italic(name_old), ref_old)
+  old_values <- gsub("et al.", italic("et al."), old_values)
+  new_values <- paste0(italic(name_new), ref_new, mo)
+  new_values <- gsub("et al.", italic("et al."), new_values)
+
+  names(new_values) <- old_values
+  total <- c(getOption("mo_renamed"), new_values)
+  options(mo_renamed = total[order(names(total))])
 }

 #' @exportMethod print.mo
@@ -1451,6 +1457,9 @@ mo_failures <- function() {
 #' @importFrom crayon italic
 #' @export
 mo_uncertainties <- function() {
+  if (is.null(getOption("mo_uncertainties"))) {
+    return(NULL)
+  }
  structure(.Data = as.data.frame(getOption("mo_uncertainties"), stringsAsFactors = FALSE),
            class = c("mo_uncertainties", "data.frame"))
 }
@@ -1463,8 +1472,8 @@ print.mo_uncertainties <- function(x, ...) {
  if (NROW(x) == 0) {
    return(NULL)
  }
-  cat(paste0(bold(nrow(x), "unique result(s) guessed with uncertainty:"),
-             "\n(1 = ", green("renamed"),
+  cat(paste0(bold(nr2char(nrow(x)), paste0("unique result", ifelse(nrow(x) > 1, "s", ""), " guessed with uncertainty:")),
+             "\n(1 = ", green("renamed/misspelled"),
             ", 2 = ", yellow("uncertain"),
             ", 3 = ", red("very uncertain"), ")\n"))

@@ -1489,10 +1498,18 @@ print.mo_uncertainties <- function(x, ...) {
 }

 #' @rdname as.mo
+#' @importFrom crayon strip_style
 #' @export
 mo_renamed <- function() {
-  structure(.Data = strip_style(gsub("was renamed", "->", getOption("mo_renamed"), fixed = TRUE)),
-            class = c("mo_renamed", "character"))
+  items <- getOption("mo_renamed")
+  if (is.null(items)) {
+    return(NULL)
+  }
+
+  items <- strip_style(items)
+  names(items) <- strip_style(names(items))
+  structure(.Data = items,
+             class = c("mo_renamed", "character"))
 }

 #' @exportMethod print.mo_renamed
@@ -1500,7 +1517,8 @@ mo_renamed <- function() {
 #' @export
 #' @noRd
 print.mo_renamed <- function(x, ...) {
-  cat(blue(paste(getOption("mo_renamed"), collapse = "\n")))
+  items <- getOption("mo_renamed")
+  base::message(blue(paste("NOTE:", names(items), "was renamed", items, collapse = "\n"), collapse = "\n"))
 }

 nr2char <- function(x) {
@@ -1540,3 +1558,15 @@ translate_allow_uncertain <- function(allow_uncertain) {
  }
  allow_uncertain
 }
+
+get_mo_failures_uncertainties_renamed <- function() {
+  list(failures = getOption("mo_failures"),
+       uncertainties = getOption("mo_uncertainties"),
+       renamed = getOption("mo_renamed"))
+}
+
+load_mo_failures_uncertainties_renamed <- function(metadata) {
+  options("mo_failures" = metadata$failures)
+  options("mo_uncertainties" = metadata$uncertainties)
+  options("mo_renamed" = metadata$renamed)
+}
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -148,7 +148,9 @@ mo_fullname <- mo_name
 #' @importFrom dplyr %>% mutate pull
 #' @export
 mo_shortname <- function(x, language = get_locale(), ...) {
-  x.mo <- as.mo(x, ...)
+  x.mo <- AMR::as.mo(x, ...)
+  metadata <- get_mo_failures_uncertainties_renamed()
+
  # get first char of genus and complete species in English
  shortnames <- paste0(substr(mo_genus(x.mo, language = NULL), 1, 1), ". ", mo_species(x.mo, language = NULL))

@@ -158,6 +160,7 @@ mo_shortname <- function(x, language = get_locale(), ...) {
  # exceptions for Streptococci
  shortnames[shortnames %like% "S. group [ABCDFGHK]"] <- paste0("G", gsub("S. group ([ABCDFGHK])", "\\1", shortnames[shortnames %like% "S. group [ABCDFGHK]"]), "S")

+  load_mo_failures_uncertainties_renamed(metadata)
  translate_AMR(shortnames, language = language, only_unknown = FALSE)
 }

@@ -218,8 +221,10 @@ mo_type <- function(x, language = get_locale(), ...) {
 #' @rdname mo_property
 #' @export
 mo_gramstain <- function(x, language = get_locale(), ...) {
-  x.mo <- as.mo(x, ...)
-  x.phylum <- mo_phylum(x.mo, language = NULL)
+  x.mo <- AMR::as.mo(x, ...)
+  metadata <- get_mo_failures_uncertainties_renamed()
+
+  x.phylum <- mo_phylum(x.mo)
  # DETERMINE GRAM STAIN FOR BACTERIA
  # Source: https://itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=956097
  # It says this:
@@ -232,13 +237,15 @@ mo_gramstain <- function(x, language = get_locale(), ...) {
  #       Phylum  Tenericutes (Murray, 1984)
  x <- NA_character_
  # make all bacteria Gram negative
-  x[mo_kingdom(x.mo, language = NULL) == "Bacteria"] <- "Gram-negative"
+  x[mo_kingdom(x.mo) == "Bacteria"] <- "Gram-negative"
  # overwrite these phyla with Gram positive
  x[x.phylum %in% c("Actinobacteria",
                    "Chloroflexi",
                    "Firmicutes",
                    "Tenericutes")
    | x.mo == "B_GRAMP"] <- "Gram-positive"
+
+  load_mo_failures_uncertainties_renamed(metadata)
  translate_AMR(x, language = language, only_unknown = FALSE)
 }

@@ -276,7 +283,9 @@ mo_rank <- function(x, ...) {
 #' @export
 mo_taxonomy <- function(x, language = get_locale(),  ...) {
  x <- AMR::as.mo(x, ...)
-  base::list(kingdom = AMR::mo_kingdom(x, language = language),
+  metadata <- get_mo_failures_uncertainties_renamed()
+
+  result <- base::list(kingdom = AMR::mo_kingdom(x, language = language),
             phylum = AMR::mo_phylum(x, language = language),
             class = AMR::mo_class(x, language = language),
             order = AMR::mo_order(x, language = language),
@@ -284,12 +293,17 @@ mo_taxonomy <- function(x, language = get_locale(),  ...) {
             genus = AMR::mo_genus(x, language = language),
             species = AMR::mo_species(x, language = language),
             subspecies = AMR::mo_subspecies(x, language = language))
+
+  load_mo_failures_uncertainties_renamed(metadata)
+  result
 }

 #' @rdname mo_property
 #' @export
 mo_synonyms <- function(x, ...) {
-  x <- as.mo(x, ...)
+  x <- AMR::as.mo(x, ...)
+  metadata <- get_mo_failures_uncertainties_renamed()
+
  IDs <- AMR::mo_property(x = x, property = "col_id", language = NULL)
  syns <- lapply(IDs, function(col_id) {
    res <- sort(AMR::microorganisms.old[which(AMR::microorganisms.old$col_id_new == col_id), "fullname"])
@@ -301,16 +315,21 @@ mo_synonyms <- function(x, ...) {
  })
  if (length(syns) > 1) {
    names(syns) <- mo_fullname(x)
-    syns
+    result <- syns
  } else {
-    unlist(syns)
+    result <- unlist(syns)
  }
+
+  load_mo_failures_uncertainties_renamed(metadata)
+  result
 }

 #' @rdname mo_property
 #' @export
 mo_info <- function(x, language = get_locale(),  ...) {
  x <- AMR::as.mo(x, ...)
+  metadata <- get_mo_failures_uncertainties_renamed()
+
  info <- lapply(x, function(y)
    c(mo_taxonomy(y, language = language),
      list(synonyms = mo_synonyms(y),
@@ -318,10 +337,13 @@ mo_info <- function(x, language = get_locale(),  ...) {
           ref = mo_ref(y))))
  if (length(info) > 1) {
    names(info) <- mo_fullname(x)
-    info
+    result <- info
  } else {
-    info[[1L]]
+    result <- info[[1L]]
  }
+
+  load_mo_failures_uncertainties_renamed(metadata)
+  result
 }

 #' @rdname mo_property
@@ -330,6 +352,8 @@ mo_info <- function(x, language = get_locale(),  ...) {
 #' @export
 mo_url <- function(x, open = FALSE, ...) {
  mo <- AMR::as.mo(x = x, ... = ...)
+  metadata <- get_mo_failures_uncertainties_renamed()
+
  df <- data.frame(mo, stringsAsFactors = FALSE) %>%
    left_join(select(AMR::microorganisms, mo, source, species_id), by = "mo") %>%
    mutate(url = case_when(source == "CoL" ~
@@ -347,6 +371,8 @@ mo_url <- function(x, open = FALSE, ...) {
    }
    browseURL(u[1L])
  }
+
+  load_mo_failures_uncertainties_renamed(metadata)
  u
 }

--- a/R/portion.R
+++ b/R/portion.R
@@ -27,36 +27,61 @@
 #' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.
 #' @param minimum the minimum allowed number of available (tested) isolates. Any isolate count lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.
 #' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.
-#' @param also_single_tested a logical to indicate whether for combination therapies also observations should be included where not all antibiotics were tested, but at least one of the tested antibiotics contains a target interpretation (e.g. S in case of \code{portion_S} and R in case of \code{portion_R}). \strong{This could lead to selection bias.}
+#' @param only_all_tested (for combination therapies, i.e. using more than one variable for \code{...}) a logical to indicate that isolates must be tested for all antibiotics, see section \emph{Combination therapy} below
 #' @param data a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})
 #' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{ab_property}}
 #' @inheritParams ab_property
 #' @param combine_SI a logical to indicate whether all values of S and I must be merged into one, so the output only consists of S+I vs. R (susceptible vs. resistant). This used to be the parameter \code{combine_IR}, but this now follows the redefinition by EUCAST about the interpretion of I (increased exposure) in 2019, see section 'Interpretation of S, I and R' below. Default is \code{TRUE}.
 #' @param combine_IR a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. I+R (susceptible vs. non-susceptible). This is outdated, see parameter \code{combine_SI}.
 #' @inheritSection as.rsi Interpretation of S, I and R
-#' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
+#' @details \strong{Remember that you should filter your table to let it contain only first isolates!} This is needed to exclude duplicates and to reduce selection bias. Use \code{\link{first_isolate}} to determine them in your data set.
 #'
 #' These functions are not meant to count isolates, but to calculate the portion of resistance/susceptibility. Use the \code{\link[AMR]{count}} functions to count isolates. \emph{Low counts can infuence the outcome - these \code{portion} functions may camouflage this, since they only return the portion albeit being dependent on the \code{minimum} parameter.}
 #'
 #' The function \code{portion_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and calculates the portions R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each group and each variable with class \code{"rsi"}.
 #'
 #' The function \code{rsi_df} works exactly like \code{portion_df}, but adds the number of isolates.
-#' \if{html}{
-#    (created with https://www.latex4technics.com/)
-#'   \cr\cr
-#'   To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula:
-#'   \out{<div style="text-align: center;">}\figure{combi_therapy_2.png}\out{</div>}
-#'   To calculate the probability (\emph{p}) of susceptibility of more antibiotics (i.e. combination therapy), we need to check whether one of them has a susceptible result (as numerator) and count all cases where all antibiotics were tested (as denominator). \cr
-#'   \cr
-#'   For two antibiotics:
-#'   \out{<div style="text-align: center;">}\figure{combi_therapy_2.png}\out{</div>}
-#'   \cr
-#'   For three antibiotics:
-#'   \out{<div style="text-align: center;">}\figure{combi_therapy_2.png}\out{</div>}
-#'   \cr
-#'   And so on.
+#' @section Combination therapy:
+#' When using more than one variable for \code{...} (= combination therapy)), use \code{only_all_tested} to only count isolates that are tested for all antibiotics/variables that you test them for. See this example for two antibiotics, Antibiotic A and Antibiotic B, about how \code{portion_SI} works to calculate the \%SI:
+#'
+#' \preformatted{
+#' -------------------------------------------------------------------------
+#'                         only_all_tested = FALSE   only_all_tested = TRUE
+#' Antibiotic  Antibiotic  -----------------------   -----------------------
+#'     A           B       include as  include as    include as  include as
+#'                         numerator   denominator   numerator   denominator
+#' ----------  ----------  ----------  -----------   ----------  -----------
+#'     S           S           X            X             X            X
+#'     I           S           X            X             X            X
+#'     R           S           X            X             X            X
+#' not tested      S           X            X             -            -
+#'     S           I           X            X             X            X
+#'     I           I           X            X             X            X
+#'     R           I           X            X             X            X
+#' not tested      I           X            X             -            -
+#'     S           R           X            X             X            X
+#'     I           R           X            X             X            X
+#'     R           R           -            X             -            X
+#' not tested      R           -            -             -            -
+#'     S       not tested      X            X             -            -
+#'     I       not tested      X            X             -            -
+#'     R       not tested      -            -             -            -
+#' not tested  not tested      -            -             -            -
+#' -------------------------------------------------------------------------
 #' }
 #'
+#' Please note that for \code{only_all_tested = TRUE} applies that:
+#' \preformatted{
+#'    count_S()  +  count_I()  +  count_R()  == count_all()
+#'   portion_S() + portion_I() + portion_R() == 1
+#' }
+#' and that for \code{only_all_tested = FALSE} applies that:
+#' \preformatted{
+#'    count_S()  +  count_I()  +  count_R()  >= count_all()
+#'   portion_S() + portion_I() + portion_R() >= 1
+#' }
+#'
+#' Using \code{only_all_tested} has no impact when only using one antibiotic as input.
 #' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
 #'
 #' Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html}
@@ -89,7 +114,7 @@
 #'
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
-#'   summarise(p = portion_S(CIP),
+#'   summarise(p = portion_SI(CIP),
 #'             n = n_rsi(CIP)) # n_rsi works like n_distinct in dplyr
 #'
 #' septic_patients %>%
@@ -103,32 +128,38 @@
 #'
 #' # Calculate co-resistance between amoxicillin/clav acid and gentamicin,
 #' # so we can see that combination therapy does a lot more than mono therapy:
-#' septic_patients %>% portion_S(AMC)       # S = 71.4%
-#' septic_patients %>% count_all(AMC)       # n = 1879
+#' septic_patients %>% portion_SI(AMC)      # %SI = 76.3%
+#' septic_patients %>% count_all(AMC)       #   n = 1879
 #'
-#' septic_patients %>% portion_S(GEN)       # S = 74.0%
-#' septic_patients %>% count_all(GEN)       # n = 1855
+#' septic_patients %>% portion_SI(GEN)      # %SI = 75.4%
+#' septic_patients %>% count_all(GEN)       #   n = 1855
 #'
-#' septic_patients %>% portion_S(AMC, GEN)  # S = 92.3%
-#' septic_patients %>% count_all(AMC, GEN)  # n = 1798
+#' septic_patients %>% portion_SI(AMC, GEN) # %SI = 94.1%
+#' septic_patients %>% count_all(AMC, GEN)  #   n = 1939
 #'
-#' # Using `also_single_tested` can be useful ...
+#'
+#' # See Details on how `only_all_tested` works. Example:
 #' septic_patients %>%
-#'   portion_S(AMC, GEN,
-#'             also_single_tested = TRUE)   # S = 92.6%
-#' # ... but can also lead to selection bias - the data only has 2,000 rows:
+#'   summarise(numerator = count_SI(AMC, GEN),
+#'             denominator = count_all(AMC, GEN),
+#'             portion = portion_SI(AMC, GEN))
+#' #   numerator denominator portion
+#' #        1764        1936  0.9408
 #' septic_patients %>%
-#'   count_all(AMC, GEN,
-#'             also_single_tested = TRUE)   # n = 2555
+#'   summarise(numerator = count_SI(AMC, GEN, only_all_tested = TRUE),
+#'             denominator = count_all(AMC, GEN, only_all_tested = TRUE),
+#'             portion = portion_SI(AMC, GEN, only_all_tested = TRUE))
+#' #   numerator denominator portion
+#' #       1687        1798   0.9383
 #'
 #'
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
-#'   summarise(cipro_p = portion_S(CIP, as_percent = TRUE),
+#'   summarise(cipro_p = portion_SI(CIP, as_percent = TRUE),
 #'             cipro_n = count_all(CIP),
-#'             genta_p = portion_S(GEN, as_percent = TRUE),
+#'             genta_p = portion_SI(GEN, as_percent = TRUE),
 #'             genta_n = count_all(GEN),
-#'             combination_p = portion_S(CIP, GEN, as_percent = TRUE),
+#'             combination_p = portion_SI(CIP, GEN, as_percent = TRUE),
 #'             combination_n = count_all(CIP, GEN))
 #'
 #' # Get portions S/I/R immediately of all rsi columns
@@ -155,13 +186,12 @@
 portion_R <- function(...,
                      minimum = 30,
                      as_percent = FALSE,
-                      also_single_tested = FALSE) {
+                      only_all_tested = FALSE) {
  rsi_calc(...,
-           type = "R",
-           include_I = FALSE,
+           ab_result = "R",
           minimum = minimum,
           as_percent = as_percent,
-           also_single_tested = also_single_tested,
+           only_all_tested = only_all_tested,
           only_count = FALSE)
 }

@@ -170,13 +200,12 @@ portion_R <- function(...,
 portion_IR <- function(...,
                       minimum = 30,
                       as_percent = FALSE,
-                       also_single_tested = FALSE) {
+                       only_all_tested = FALSE) {
  rsi_calc(...,
-           type = "R",
-           include_I = TRUE,
+           ab_result = c("I", "R"),
           minimum = minimum,
           as_percent = as_percent,
-           also_single_tested = also_single_tested,
+           only_all_tested = only_all_tested,
           only_count = FALSE)
 }

@@ -185,13 +214,12 @@ portion_IR <- function(...,
 portion_I <- function(...,
                      minimum = 30,
                      as_percent = FALSE,
-                      also_single_tested = FALSE) {
+                      only_all_tested = FALSE) {
  rsi_calc(...,
-           type = "I",
-           include_I = FALSE,
+           ab_result = "I",
           minimum = minimum,
           as_percent = as_percent,
-           also_single_tested = also_single_tested,
+           only_all_tested = only_all_tested,
           only_count = FALSE)
 }

@@ -200,13 +228,12 @@ portion_I <- function(...,
 portion_SI <- function(...,
                       minimum = 30,
                       as_percent = FALSE,
-                       also_single_tested = FALSE) {
+                       only_all_tested = FALSE) {
  rsi_calc(...,
-           type = "S",
-           include_I = TRUE,
+           ab_result = c("S", "I"),
           minimum = minimum,
           as_percent = as_percent,
-           also_single_tested = also_single_tested,
+           only_all_tested = only_all_tested,
           only_count = FALSE)
 }

@@ -215,13 +242,12 @@ portion_SI <- function(...,
 portion_S <- function(...,
                      minimum = 30,
                      as_percent = FALSE,
-                      also_single_tested = FALSE) {
+                      only_all_tested = FALSE) {
  rsi_calc(...,
-           type = "S",
-           include_I = FALSE,
+           ab_result = "S",
           minimum = minimum,
           as_percent = as_percent,
-           also_single_tested = also_single_tested,
+           only_all_tested = only_all_tested,
           only_count = FALSE)
 }

--- a/R/rsi.R
+++ b/R/rsi.R
@@ -36,7 +36,7 @@
 #'
 #' The function \code{is.rsi.eligible} returns \code{TRUE} when a columns contains at most 5\% invalid antimicrobial interpretations (not S and/or I and/or R), and \code{FALSE} otherwise. The threshold of 5\% can be set with the \code{threshold} parameter.
 #' @section Interpretation of S, I and R:
-#' In 2019, EUCAST has decided to change the definitions of susceptibility testing categories S, I and R as shown below. Results of several consultations on the new definitions are available on the EUCAST website under "Consultations".
+#' In 2019, EUCAST has decided to change the definitions of susceptibility testing categories S, I and R as shown below (\url{http://www.eucast.org/newsiandr/}). Results of several consultations on the new definitions are available on the EUCAST website under "Consultations".
 #'
 #' \itemize{
 #'   \item{\strong{S} - }{Susceptible, standard dosing regimen: A microorganism is categorised as "Susceptible, standard dosing regimen", when there is a high likelihood of therapeutic success using a standard dosing regimen of the agent.}
@@ -46,9 +46,7 @@
 #'
 #' Exposure is a function of how the mode of administration, dose, dosing interval, infusion time, as well as distribution and excretion of the antimicrobial agent will influence the infecting organism at the site of infection.
 #'
-#' Source: \url{http://www.eucast.org/newsiandr/}.
-#'
-#' \strong{This AMR package honours this new insight.}
+#' This AMR package honours this new insight. Use \code{\link{portion_SI}} to determine antimicrobial susceptibility and \code{\link{count_SI}} to count susceptible isolates.
 #' @return Ordered factor with new class \code{rsi}
 #' @keywords rsi
 #' @export
--- a/R/rsi_calc.R
+++ b/R/rsi_calc.R
@@ -38,30 +38,29 @@ dots2vars <- function(...) {

 #' @importFrom dplyr %>% pull all_vars any_vars filter_all funs mutate_all
 rsi_calc <- function(...,
-                     type,
-                     include_I,
-                     minimum,
-                     as_percent,
-                     also_single_tested,
-                     only_count) {
+                     ab_result,
+                     minimum = 0,
+                     as_percent = FALSE,
+                     only_all_tested = FALSE,
+                     only_count = FALSE) {

  data_vars <- dots2vars(...)

-  if (!is.logical(include_I)) {
-    stop('`include_I` must be logical', call. = FALSE)
-  }
  if (!is.numeric(minimum)) {
    stop('`minimum` must be numeric', call. = FALSE)
  }
  if (!is.logical(as_percent)) {
    stop('`as_percent` must be logical', call. = FALSE)
  }
-  if (!is.logical(also_single_tested)) {
-    stop('`also_single_tested` must be logical', call. = FALSE)
+  if (!is.logical(only_all_tested)) {
+    stop('`only_all_tested` must be logical', call. = FALSE)
  }

  dots_df <- ...elt(1) # it needs this evaluation
  dots <- base::eval(base::substitute(base::alist(...)))
+  if ("also_single_tested" %in% names(dots)) {
+    stop("`also_single_tested` was replaced by `only_all_tested`. Please read Details in the help page (`?portion`) as this may have a considerable impact on your analysis.", call. = FALSE)
+  }
  ndots <- length(dots)

 if ("data.frame" %in% class(dots_df)) {
@@ -99,8 +98,7 @@ rsi_calc <- function(...,

  print_warning <- FALSE

-  type_trans <- as.integer(as.rsi(type))
-  type_others <- base::setdiff(1:3, type_trans)
+  ab_result <- as.rsi(ab_result)

  if (is.data.frame(x)) {
    rsi_integrity_check <- character(0)
@@ -108,43 +106,38 @@ rsi_calc <- function(...,
      # check integrity of columns: force rsi class
      if (!is.rsi(x %>% pull(i))) {
        rsi_integrity_check <- c(rsi_integrity_check, x %>% pull(i) %>% as.character())
-        x[, i] <- suppressWarnings(as.rsi(x[, i])) # warning will be given later
+        x[, i] <- suppressWarnings(x %>% pull(i) %>% as.rsi()) # warning will be given later
        print_warning <- TRUE
      }
-      x[, i] <- x %>% pull(i) %>% as.integer()
+      #x[, i] <- x %>% pull(i)
    }
    if (length(rsi_integrity_check) > 0) {
      # this will give a warning for invalid results, of all input columns (so only 1 warning)
      rsi_integrity_check <- as.rsi(rsi_integrity_check)
    }

-    if (include_I == TRUE) {
-      x <- x %>% mutate_all(funs(ifelse(. == 2, type_trans, .)))
-    }
-
-    if (also_single_tested == TRUE) {
-      # THE CHANCE THAT AT LEAST ONE RESULT IS type
-      found <- x %>% filter_all(any_vars(. == type_trans)) %>% nrow()
-      # THE CHANCE THAT AT LEAST ONE RESULT IS type OR ALL ARE TESTED
-      total <- found + x %>% filter_all(all_vars(. %in% type_others)) %>% nrow()
+    # THE CHANCE THAT AT LEAST ONE RESULT IS ab_result
+    #numerator <- x %>% filter_all(any_vars(. %in% ab_result)) %>% nrow()
+    if (only_all_tested == TRUE) {
+      # THE NUMBER OF ISOLATES WHERE *ALL* ABx ARE S/I/R
+      x_filtered <- x %>% filter_all(all_vars(!is.na(.)))
+      numerator <- x_filtered %>% filter_all(any_vars(. %in% ab_result)) %>% nrow()
+      denominator <- x_filtered %>% nrow()
    } else {
-      x <- apply(X = x,
-                 MARGIN = 1,
-                 FUN = min)
-      found <- sum(as.integer(x) == type_trans, na.rm = TRUE)
-      total <- length(x) - sum(is.na(x))
+      # THE NUMBER OF ISOLATES WHERE *ANY* ABx IS S/I/R
+      other_values <- base::setdiff(c(NA, levels(ab_result)), ab_result)
+      other_values_filter <- base::apply(x, 1, function(y) { base::all(y %in% other_values) & base::any(is.na(y)) })
+      numerator <- x %>% filter_all(any_vars(. %in% ab_result)) %>% nrow()
+      denominator <- x %>% filter(!other_values_filter) %>% nrow()
    }
  } else {
+    # x is not a data.frame
    if (!is.rsi(x)) {
      x <- as.rsi(x)
      print_warning <- TRUE
    }
-    x <- as.integer(x)
-    if (include_I == TRUE) {
-      x[x == 2] <- type_trans
-    }
-    found <- sum(x == type_trans, na.rm = TRUE)
-    total <- length(x) - sum(is.na(x))
+    numerator <- sum(x %in% ab_result, na.rm = TRUE)
+    denominator <- sum(x %in% levels(ab_result), na.rm = TRUE)
  }

  if (print_warning == TRUE) {
@@ -153,20 +146,23 @@ rsi_calc <- function(...,
  }

  if (only_count == TRUE) {
-    return(found)
+    return(numerator)
  }

-  if (total < minimum) {
-    warning("Introducing NA: only ", total, " results available for ", data_vars, " (minimum set to ", minimum, ").", call. = FALSE)
-    result <- NA
+  if (denominator < minimum) {
+    if (data_vars != "") {
+      data_vars <- paste(" for", data_vars)
+    }
+    warning("Introducing NA: only ", denominator, " results available", data_vars, " (minimum set to ", minimum, ").", call. = FALSE)
+    fraction <- NA
  } else {
-    result <- found / total
+    fraction <- numerator / denominator
  }

  if (as_percent == TRUE) {
-    percent(result, force_zero = TRUE)
+    percent(fraction, force_zero = TRUE)
  } else {
-    result
+    fraction
  }
 }