bring back antibiogram(), without deps

2025-07-09 04:02:19 +02:00 · 2023-02-10 16:18:00 +01:00
parent 70a7ba0206
commit bc434db835
42 changed files with 11307 additions and 4734 deletions
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@ -386,7 +386,7 @@ import_fn <- function(name, pkg, error_on_fail = TRUE) {
    getExportedValue(name = name, ns = asNamespace(pkg)),
    error = function(e) {
      if (isTRUE(error_on_fail)) {
-        stop_("function ", name, "() is not an exported object from package '", pkg,
+        stop_("function `", name, "()` is not an exported object from package '", pkg,
          "'. Please create an issue at ", font_url("https://github.com/msberends/AMR/issues"), ". Many thanks!",
          call = FALSE
        )
@ -622,7 +622,7 @@ format_included_data_number <- function(data) {
  } else {
    rounder <- -1 # round on tens
  }
-  paste0("~", format(round(n, rounder), decimal.mark = ".", big.mark = ","))
+  paste0("~", format(round(n, rounder), decimal.mark = ".", big.mark = " "))
 }

 # for eucast_rules() and mdro(), creates markdown output with URLs and names
@ -928,11 +928,9 @@ get_current_data <- function(arg_name, call) {
 get_current_column <- function() {
  # try dplyr::cur_columns() first
  cur_column <- import_fn("cur_column", "dplyr", error_on_fail = FALSE)
-  if (!is.null(cur_column)) {
-    out <- tryCatch(cur_column(), error = function(e) NULL)
-    if (!is.null(out)) {
-      return(out)
-    }
+  out <- tryCatch(cur_column(), error = function(e) NULL)
+  if (!is.null(out)) {
+    return(out)
  }

  # cur_column() doesn't always work (only allowed for certain conditions set by dplyr), but it's probably still possible:
@ -965,8 +963,20 @@ get_current_column <- function() {
 }

 is_null_or_grouped_tbl <- function(x) {
-  # class "grouped_df" might change at one point, so only set in one place; here.
-  is.null(x) || inherits(x, "grouped_df")
+  # class "grouped_data" is from {poorman}, see aa_helper_pm_functions.R
+  # class "grouped_df" is from {dplyr} and might change at one point, so only set in one place; here.
+  is.null(x) || inherits(x, "grouped_data") || inherits(x, "grouped_df")
+}
+
+get_group_names <- function(x) {
+  if ("pm_groups" %in% names(attributes(x))) {
+    pm_get_groups(x)
+  } else if (!is.null(x) && is_null_or_grouped_tbl(x)) {
+    grps <- colnames(attributes(x)$groups)
+    grps[!grps %in% c(".group_id", ".rows")]
+  } else {
+    character(0)
+  }
 }

 unique_call_id <- function(entire_session = FALSE, match_fn = NULL) {
@ -1272,7 +1282,7 @@ create_pillar_column <- function(x, ...) {
  new_pillar_shaft_simple(x, ...)
 }

-as_original_data_class <- function(df, old_class = NULL) {
+as_original_data_class <- function(df, old_class = NULL, extra_class = NULL) {
  if ("tbl_df" %in% old_class && pkg_is_available("tibble", also_load = FALSE)) {
    # this will then also remove groups
    fn <- import_fn("as_tibble", "tibble")
@ -1285,7 +1295,11 @@ as_original_data_class <- function(df, old_class = NULL) {
  } else {
    fn <- function(x) base::as.data.frame(df, stringsAsFactors = FALSE)
  }
-  fn(df)
+  out <- fn(df)
+  if (!is.null(extra_class)) {
+    class(out) <- c(extra_class, class(out))
+  }
+  out
 }

 # works exactly like round(), but rounds `round2(44.55, 1)` to 44.6 instead of 44.5
@ -1425,7 +1439,7 @@ add_MO_lookup_to_AMR_env <- function() {
 }

 trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u200C\u200D\u2028\u2029\u202F\u205F\u2060\u3000\uFEFF]") {
-  # this is even faster than trimws() itself which sets " \t\n\r".
+  # this is even faster than trimws() itself which sets "[ \t\r\n]".
  trimws(..., whitespace = whitespace)
 }

@ -1441,7 +1455,7 @@ readRDS2 <- function(file, refhook = NULL) {
 match <- function(x, table, ...) {
  chmatch <- import_fn("chmatch", "data.table", error_on_fail = FALSE)
  if (!is.null(chmatch) && is.character(x) && is.character(table)) {
-    # data.table::chmatch() is 35% faster than base::match() for character
+    # data.table::chmatch() is much faster than base::match() for character
    chmatch(x, table, ...)
  } else {
    base::match(x, table, ...)
@ -1450,7 +1464,7 @@ match <- function(x, table, ...) {
 `%in%` <- function(x, table) {
  chin <- import_fn("%chin%", "data.table", error_on_fail = FALSE)
  if (!is.null(chin) && is.character(x) && is.character(table)) {
-    # data.table::`%chin%`() is 20-50% faster than base::`%in%`() for character
+    # data.table::`%chin%`() is much faster than base::`%in%`() for character
    chin(x, table)
  } else {
    base::`%in%`(x, table)
--- a/R/ab_property.R
+++ b/R/ab_property.R
@ -335,7 +335,7 @@ ab_url <- function(x, open = FALSE, ...) {
 ab_property <- function(x, property = "name", language = get_AMR_locale(), ...) {
  meet_criteria(x, allow_NA = TRUE)
  meet_criteria(property, is_in = colnames(AMR::antibiotics), has_length = 1)
-  meet_criteria(language, is_in = c(LANGUAGES_SUPPORTED, ""), has_length = 1, allow_NULL = TRUE, allow_NA = TRUE)
+  language <- validate_language(language)
  translate_into_language(ab_validate(x = x, property = property, ...), language = language)
 }

--- a/R/antibiogram.R.bak
+++ b/R/antibiogram.R.bak
@ -170,9 +170,11 @@
 #'             antibiotics = c(aminoglycosides(), carbapenems()),
 #'             syndromic_group = "ward")
 #' 
+#' # now define a data set with only E. coli
+#' ex1 <- example_isolates[which(mo_genus() == "Escherichia"), ]
+#' 
 #' # with a custom language, though this will be determined automatically
 #' # (i.e., this table will be in Spanish on Spanish systems)
-#' ex1 <- example_isolates[which(mo_genus() == "Escherichia"), ]
 #' antibiogram(ex1,
 #'             antibiotics = aminoglycosides(),
 #'             ab_transform = "name",
@ -313,46 +315,48 @@ antibiogram <- function(x,
    }
    antibiotics <- unlist(antibiotics)
  } else {
-    if (identical(select, import_fn("select", "dplyr", error_on_fail = FALSE))) {
-      antibiotics <- suppressWarnings(x %>% select({{ antibiotics }}) %>% colnames())
-    } else {
-      antibiotics <- colnames(x[, antibiotics, drop = FALSE])
-    }
+    antibiotics <- colnames(suppressWarnings(x[, antibiotics, drop = FALSE]))
  }
  
  if (isTRUE(has_syndromic_group)) {
-    out <- x %>% 
-      select(.syndromic_group, .mo, antibiotics) %>% 
-      group_by(.syndromic_group)
+    out <- x %pm>% 
+      pm_select(.syndromic_group, .mo, antibiotics) %pm>% 
+      pm_group_by(.syndromic_group)
  } else {
-    out <- x %>% 
-      select(.mo, antibiotics)
+    out <- x %pm>% 
+      pm_select(.mo, antibiotics)
  }
+  
  # get numbers of S, I, R (per group)
-  out <- out %>% 
+  out <- out %pm>% 
    bug_drug_combinations(col_mo = ".mo",
                          FUN = function(x) x)
  counts <- out
  
-  out$numerator <- ifelse(isTRUE(combine_SI), out$S + out$I, out$S)
-  out$minimum <- minimum
-  
  # regroup for summarising
  if (isTRUE(has_syndromic_group)) {
    colnames(out)[1] <- "syndromic_group"
-    out <- out %>% 
-      group_by(syndromic_group, mo, ab)
+    out <- out %pm>% 
+      pm_group_by(syndromic_group, mo, ab)
  } else {
-    out <- out %>% 
-      group_by(mo, ab)
-  }
-  if (any(out$total < minimum, na.rm = TRUE)) {
-    message_("NOTE: ", sum(out$total < minimum, na.rm = TRUE), " combinations had less than `minimum = ", minimum, "` results and were ignored", add_fn = font_red)
+    out <- out %pm>% 
+      pm_group_by(mo, ab)
  }
  
-  out <- out %>% 
-    summarise(SI = ifelse(total >= minimum, numerator / total, NA_real_)) %>%
-    filter(!is.na(SI))
+  if (isTRUE(combine_SI)) {
+    out$numerator <- out$S + out$I
+  } else {
+    out$numerator <- out$S
+  }
+  out$minimum <- minimum
+  if (any(out$total < out$minimum, na.rm = TRUE)) {
+    message_("NOTE: ", sum(out$total < out$minimum, na.rm = TRUE), " combinations had less than `minimum = ", minimum, "` results and were ignored", add_fn = font_red)
+    out <- out %pm>% 
+      subset(total >= minimum)
+  }
+  
+  out <- out %pm>%
+    pm_summarise(SI = numerator / total)
  
  # transform names of antibiotics
  ab_naming_function <- function(x, t, l, s) {
@ -378,16 +382,20 @@ antibiogram <- function(x,
  
  # transform long to wide
  long_to_wide <- function(object, digs) {
-    object <- object %>% 
-      mutate(SI = round(SI * 100, digits = digs)) %>% 
+    object$SI <- round(object$SI * 100, digits = digs)
+    object <- object %pm>%
      # an unclassed data.frame is required for stats::reshape()
-      as.data.frame(stringsAsFactors = FALSE) %>% 
+      as.data.frame(stringsAsFactors = FALSE) %pm>% 
      stats::reshape(direction = "wide", idvar = "mo", timevar = "ab", v.names = "SI")
    colnames(object) <- gsub("^SI?[.]", "", colnames(object))
    return(object)
  }
  
-  long <- ungroup(out)
+  # ungroup for long -> wide transformation
+  attr(out, "pm_groups") <- NULL
+  attr(out, "groups") <- NULL
+  class(out) <- class(out)[!class(out) %in% c("grouped_df", "grouped_data")]
+  long <- out
  
  if (isTRUE(has_syndromic_group)) {
    grps <- unique(out$syndromic_group)
@ -401,14 +409,14 @@ antibiogram <- function(x,
      }
    }
    # sort rows
-    new_df <- new_df %>% arrange(mo, syndromic_group)
+    new_df <- new_df %pm>% pm_arrange(mo, syndromic_group)
    # sort columns
    new_df <- new_df[, c("syndromic_group", "mo", sort(colnames(new_df)[!colnames(new_df) %in% c("syndromic_group", "mo")])), drop = FALSE]
    colnames(new_df)[1:2] <- translate_AMR(c("Syndromic Group", "Pathogen"), language = language)
  } else {
    new_df <- long_to_wide(out, digs = digits)
    # sort rows
-    new_df <- new_df %>% arrange(mo)
+    new_df <- new_df %pm>% pm_arrange(mo)
    # sort columns
    new_df <- new_df[, c("mo", sort(colnames(new_df)[colnames(new_df) != "mo"])), drop = FALSE]
    colnames(new_df)[1] <- translate_AMR("Pathogen", language = language)
@ -417,16 +425,16 @@ antibiogram <- function(x,
  # add total N if indicated
  if (isTRUE(add_total_n)) {
    if (isTRUE(has_syndromic_group)) {
-      n_per_mo <- counts %>%
-        group_by(mo, .syndromic_group) %>%
-        summarise(paste0(min(total, na.rm = TRUE), "-", max(total, na.rm = TRUE)))
+      n_per_mo <- counts %pm>%
+        pm_group_by(mo, .syndromic_group) %pm>%
+        pm_summarise(paste0(min(total, na.rm = TRUE), "-", max(total, na.rm = TRUE)))
      colnames(n_per_mo) <- c("mo", "syn", "count")
      count_group <- n_per_mo$count[match(paste(new_df[[2]], new_df[[1]]), paste(n_per_mo$mo, n_per_mo$syn))]
      edit_col <- 2
    } else {
-      n_per_mo <- counts %>%
-        group_by(mo) %>%
-        summarise(paste0(min(total, na.rm = TRUE), "-", max(total, na.rm = TRUE)))
+      n_per_mo <- counts %pm>%
+        pm_group_by(mo) %pm>%
+        pm_summarise(paste0(min(total, na.rm = TRUE), "-", max(total, na.rm = TRUE)))
      colnames(n_per_mo) <- c("mo", "count")
      count_group <- n_per_mo$count[match(new_df[[1]], n_per_mo$mo)]
      edit_col <- 1
@ -489,7 +497,7 @@ autoplot.antibiogram <- function(object, ...) {
                                   } else {
                                     NULL
                                   }),
-                      position = "dodge") +
+                      position = ggplot2::position_dodge2(preserve = "single")) +
    ggplot2::facet_wrap("mo") +
    ggplot2::labs(y = ifelse(isTRUE(attributes(object)$combine_SI), "%SI", "%S"),
                  x = NULL,
@ -506,12 +514,14 @@ autoplot.antibiogram <- function(object, ...) {
 #' @rdname antibiogram
 print.antibiogram <- function(x, as_kable = !interactive(), ...) {
  meet_criteria(as_kable, allow_class = "logical", has_length = 1)
-  if (isTRUE(as_kable) && !identical(Sys.getenv("IN_PKGDOWN"), "true")) {
+  if (isTRUE(as_kable) &&
+      # be sure not to run kable in pkgdown for our website generation
+      !identical(Sys.getenv("IN_PKGDOWN"), "true")) {
    stop_ifnot_installed("knitr")
    kable <- import_fn("kable", "knitr", error_on_fail = TRUE)
    kable(x, ...)
  } else {
-    # remove 'antibiogram' class and print as indicated
+    # remove 'antibiogram' class and print with default method
    class(x) <- class(x)[class(x) != "antibiogram"]
    print(x, ...)
  }
--- a/R/av_property.R
+++ b/R/av_property.R
@ -252,7 +252,7 @@ av_url <- function(x, open = FALSE, ...) {
 av_property <- function(x, property = "name", language = get_AMR_locale(), ...) {
  meet_criteria(x, allow_NA = TRUE)
  meet_criteria(property, is_in = colnames(AMR::antivirals), has_length = 1)
-  meet_criteria(language, is_in = c(LANGUAGES_SUPPORTED, ""), has_length = 1, allow_NULL = TRUE, allow_NA = TRUE)
+  language <- validate_language(language)
  translate_into_language(av_validate(x = x, property = property, ...), language = language)
 }

--- a/R/bug_drug_combinations.R
+++ b/R/bug_drug_combinations.R
@ -44,6 +44,10 @@
 #' @rdname bug_drug_combinations
 #' @return The function [bug_drug_combinations()] returns a [data.frame] with columns "mo", "ab", "S", "I", "R" and "total".
 #' @examples
+#' # example_isolates is a data set available in the AMR package.
+#' # run ?example_isolates for more info.
+#' example_isolates
+#' 
 #' \donttest{
 #' x <- bug_drug_combinations(example_isolates)
 #' head(x)
@ -89,7 +93,7 @@ bug_drug_combinations <- function(x,
  # select only groups and antibiotics
  if (is_null_or_grouped_tbl(x.bak)) {
    data_has_groups <- TRUE
-    groups <- setdiff(names(attributes(x.bak)$groups), ".rows")
+    groups <- get_group_names(x.bak)
    x <- x[, c(groups, col_mo, colnames(x)[vapply(FUN.VALUE = logical(1), x, is.sir)]), drop = FALSE]
  } else {
    data_has_groups <- FALSE
@ -161,6 +165,7 @@ bug_drug_combinations <- function(x,
    out <- run_it(x)
  }
  rownames(out) <- NULL
+  out <- out %>% pm_arrange(mo, ab)
  out <- as_original_data_class(out, class(x.bak)) # will remove tibble groups
  structure(out, class = c("bug_drug_combinations", ifelse(data_has_groups, "grouped", character(0)), class(out)))
 }
@ -181,7 +186,7 @@ format.bug_drug_combinations <- function(x,
  meet_criteria(x, allow_class = "data.frame")
  meet_criteria(translate_ab, allow_class = c("character", "logical"), has_length = 1, allow_NA = TRUE)
  language <- validate_language(language)
-  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE)
+  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
  meet_criteria(combine_SI, allow_class = "logical", has_length = 1)
  meet_criteria(add_ab_group, allow_class = "logical", has_length = 1)
  meet_criteria(remove_intrinsic_resistant, allow_class = "logical", has_length = 1)
--- a/R/data.R
+++ b/R/data.R
@ -27,7 +27,7 @@
 # how to conduct AMR data analysis: https://msberends.github.io/AMR/   #
 # ==================================================================== #

-#' Data Sets with `r format(nrow(antibiotics) + nrow(antivirals), big.mark = ",")` Antimicrobial Drugs
+#' Data Sets with `r format(nrow(antibiotics) + nrow(antivirals), big.mark = " ")` Antimicrobial Drugs
 #'
 #' Two data sets containing all antibiotics/antimycotics and antivirals. Use [as.ab()] or one of the [`ab_*`][ab_property()] functions to retrieve values from the [antibiotics] data set. Three identifiers are included in this data set: an antibiotic ID (`ab`, primarily used in this package) as defined by WHONET/EARS-Net, an ATC code (`atc`) as defined by the WHO, and a Compound ID (`cid`) as found in PubChem. Other properties in this data set are derived from one or more of these codes. Note that some drugs have multiple ATC codes.
 #' @format
@ -82,10 +82,10 @@
 #' @rdname antibiotics
 "antivirals"

-#' Data Set with `r format(nrow(microorganisms), big.mark = ",")` Microorganisms
+#' Data Set with `r format(nrow(microorganisms), big.mark = " ")` Microorganisms
 #'
 #' A data set containing the full microbial taxonomy (**last updated: `r documentation_date(max(TAXONOMY_VERSION$GBIF$accessed_date, TAXONOMY_VERSION$LPSN$accessed_date))`**) of `r nr2char(length(unique(microorganisms$kingdom[!microorganisms$kingdom %like% "unknown"])))` kingdoms from the List of Prokaryotic names with Standing in Nomenclature (LPSN) and the Global Biodiversity Information Facility (GBIF). This data set is the backbone of this `AMR` package. MO codes can be looked up using [as.mo()].
-#' @format A [tibble][tibble::tibble] with `r format(nrow(microorganisms), big.mark = ",")` observations and `r ncol(microorganisms)` variables:
+#' @format A [tibble][tibble::tibble] with `r format(nrow(microorganisms), big.mark = " ")` observations and `r ncol(microorganisms)` variables:
 #' - `mo`\cr ID of microorganism as used by this package
 #' - `fullname`\cr Full name, like `"Escherichia coli"`. For the taxonomic ranks genus, species and subspecies, this is the 'pasted' text of genus, species, and subspecies. For all taxonomic ranks higher than genus, this is the name of the taxon.
 #' - `status` \cr Status of the taxon, either `r vector_or(microorganisms$status)`
@ -150,10 +150,10 @@
 #' microorganisms
 "microorganisms"

-#' Data Set with `r format(nrow(microorganisms.codes), big.mark = ",")` Common Microorganism Codes
+#' Data Set with `r format(nrow(microorganisms.codes), big.mark = " ")` Common Microorganism Codes
 #'
 #' A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with [set_mo_source()]. They will all be searched when using [as.mo()] and consequently all the [`mo_*`][mo_property()] functions.
-#' @format A [tibble][tibble::tibble] with `r format(nrow(microorganisms.codes), big.mark = ",")` observations and `r ncol(microorganisms.codes)` variables:
+#' @format A [tibble][tibble::tibble] with `r format(nrow(microorganisms.codes), big.mark = " ")` observations and `r ncol(microorganisms.codes)` variables:
 #' - `code`\cr Commonly used code of a microorganism
 #' - `mo`\cr ID of the microorganism in the [microorganisms] data set
 #' @details
@ -163,10 +163,10 @@
 #' microorganisms.codes
 "microorganisms.codes"

-#' Data Set with `r format(nrow(example_isolates), big.mark = ",")` Example Isolates
+#' Data Set with `r format(nrow(example_isolates), big.mark = " ")` Example Isolates
 #'
-#' A data set containing `r format(nrow(example_isolates), big.mark = ",")` microbial isolates with their full antibiograms. This data set contains randomised fictitious data, but reflects reality and can be used to practise AMR data analysis. For examples, please read [the tutorial on our website](https://msberends.github.io/AMR/articles/AMR.html).
-#' @format A [tibble][tibble::tibble] with `r format(nrow(example_isolates), big.mark = ",")` observations and `r ncol(example_isolates)` variables:
+#' A data set containing `r format(nrow(example_isolates), big.mark = " ")` microbial isolates with their full antibiograms. This data set contains randomised fictitious data, but reflects reality and can be used to practise AMR data analysis. For examples, please read [the tutorial on our website](https://msberends.github.io/AMR/articles/AMR.html).
+#' @format A [tibble][tibble::tibble] with `r format(nrow(example_isolates), big.mark = " ")` observations and `r ncol(example_isolates)` variables:
 #' - `date`\cr Date of receipt at the laboratory
 #' - `patient`\cr ID of the patient
 #' - `age`\cr Age of the patient
@ -182,8 +182,8 @@

 #' Data Set with Unclean Data
 #'
-#' A data set containing `r format(nrow(example_isolates_unclean), big.mark = ",")` microbial isolates that are not cleaned up and consequently not ready for AMR data analysis. This data set can be used for practice.
-#' @format A [tibble][tibble::tibble] with `r format(nrow(example_isolates_unclean), big.mark = ",")` observations and `r ncol(example_isolates_unclean)` variables:
+#' A data set containing `r format(nrow(example_isolates_unclean), big.mark = " ")` microbial isolates that are not cleaned up and consequently not ready for AMR data analysis. This data set can be used for practice.
+#' @format A [tibble][tibble::tibble] with `r format(nrow(example_isolates_unclean), big.mark = " ")` observations and `r ncol(example_isolates_unclean)` variables:
 #' - `patient_id`\cr ID of the patient
 #' - `date`\cr date of receipt at the laboratory
 #' - `hospital`\cr ID of the hospital, from A to C
@ -195,10 +195,10 @@
 #' example_isolates_unclean
 "example_isolates_unclean"

-#' Data Set with `r format(nrow(WHONET), big.mark = ",")` Isolates - WHONET Example
+#' Data Set with `r format(nrow(WHONET), big.mark = " ")` Isolates - WHONET Example
 #'
 #' This example data set has the exact same structure as an export file from WHONET. Such files can be used with this package, as this example data set shows. The antibiotic results are from our [example_isolates] data set. All patient names are created using online surname generators and are only in place for practice purposes.
-#' @format A [tibble][tibble::tibble] with `r format(nrow(WHONET), big.mark = ",")` observations and `r ncol(WHONET)` variables:
+#' @format A [tibble][tibble::tibble] with `r format(nrow(WHONET), big.mark = " ")` observations and `r ncol(WHONET)` variables:
 #' - `Identification number`\cr ID of the sample
 #' - `Specimen number`\cr ID of the specimen
 #' - `Organism`\cr Name of the microorganism. Before analysis, you should transform this to a valid microbial class, using [as.mo()].
@ -234,7 +234,7 @@
 #' Data Set with Clinical Breakpoints for SIR Interpretation
 #'
 #' Data set containing clinical breakpoints to interpret MIC and disk diffusion to SIR values, according to international guidelines. Currently implemented guidelines are EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`) and CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "CLSI")$guideline)))`). Use [as.sir()] to transform MICs or disks measurements to SIR values.
-#' @format A [tibble][tibble::tibble] with `r format(nrow(clinical_breakpoints), big.mark = ",")` observations and `r ncol(clinical_breakpoints)` variables:
+#' @format A [tibble][tibble::tibble] with `r format(nrow(clinical_breakpoints), big.mark = " ")` observations and `r ncol(clinical_breakpoints)` variables:
 #' - `guideline`\cr Name of the guideline
 #' - `method`\cr Either `r vector_or(clinical_breakpoints$method)`
 #' - `site`\cr Body site, e.g. "Oral" or "Respiratory"
@ -258,7 +258,7 @@
 #' Data Set with Bacterial Intrinsic Resistance
 #'
 #' Data set containing defined intrinsic resistance by EUCAST of all bug-drug combinations.
-#' @format A [tibble][tibble::tibble] with `r format(nrow(intrinsic_resistant), big.mark = ",")` observations and `r ncol(intrinsic_resistant)` variables:
+#' @format A [tibble][tibble::tibble] with `r format(nrow(intrinsic_resistant), big.mark = " ")` observations and `r ncol(intrinsic_resistant)` variables:
 #' - `mo`\cr Microorganism ID
 #' - `ab`\cr Antibiotic ID
 #' @details
@ -275,7 +275,7 @@
 #' Data Set with Treatment Dosages as Defined by EUCAST
 #'
 #' EUCAST breakpoints used in this package are based on the dosages in this data set. They can be retrieved with [eucast_dosage()].
-#' @format A [tibble][tibble::tibble] with `r format(nrow(dosage), big.mark = ",")` observations and `r ncol(dosage)` variables:
+#' @format A [tibble][tibble::tibble] with `r format(nrow(dosage), big.mark = " ")` observations and `r ncol(dosage)` variables:
 #' - `ab`\cr Antibiotic ID as used in this package (such as `AMC`), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available
 #' - `name`\cr Official name of the antimicrobial drug as used by WHONET/EARS-Net or the WHO
 #' - `type`\cr Type of the dosage, either `r vector_or(dosage$type)`
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@ -512,7 +512,7 @@ first_isolate <- function(x = NULL,
  if (!is.null(col_icu)) {
    if (icu_exclude == TRUE) {
      if (isTRUE(info)) {
-        message_("Excluding ", format(sum(col_icu, na.rm = TRUE), big.mark = ","), " isolates from ICU.",
+        message_("Excluding ", format(sum(col_icu, na.rm = TRUE), big.mark = " "), " isolates from ICU.",
                 add_fn = font_black,
                 as_note = FALSE
        )
--- a/R/get_episode.R
+++ b/R/get_episode.R
@ -98,9 +98,27 @@
 #'     )
 #' }
 #' 
+#' # grouping on patients and microorganisms leads to the same
+#' # results as first_isolate() when using 'episode-based':
 #' if (require("dplyr")) {
-#'   # is_new_episode() has a lot more flexibility than first_isolate(),
-#'   # since you can group on anything that seems relevant:
+#'   x <- df %>%
+#'     filter_first_isolate(
+#'       include_unknown = TRUE,
+#'       method = "episode-based"
+#'     )
+#'
+#'   y <- df %>%
+#'     group_by(patient, mo) %>%
+#'     filter(is_new_episode(date, 365)) %>%
+#'     ungroup()
+#'
+#'   identical(x, y)
+#' }
+#' 
+#' # but is_new_episode() has a lot more flexibility than first_isolate(),
+#' # since you can now group on anything that seems relevant:
+#' if (require("dplyr")) {
+#'   
 #'   df %>%
 #'     group_by(patient, mo, ward) %>%
 #'     mutate(flag_episode = is_new_episode(date, 365)) %>%
--- a/R/ggplot_sir.R
+++ b/R/ggplot_sir.R
@ -202,7 +202,7 @@ ggplot_sir <- function(data,
  meet_criteria(limits, allow_class = c("numeric", "integer"), has_length = 2, allow_NULL = TRUE, allow_NA = TRUE)
  meet_criteria(translate_ab, allow_class = c("character", "logical"), has_length = 1, allow_NA = TRUE)
  meet_criteria(combine_SI, allow_class = "logical", has_length = 1)
-  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE)
+  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
  language <- validate_language(language)
  meet_criteria(nrow, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE)
  meet_criteria(colours, allow_class = c("character", "logical"))
@ -300,7 +300,7 @@ geom_sir <- function(position = NULL,
  meet_criteria(x, allow_class = "character", has_length = 1)
  meet_criteria(fill, allow_class = "character", has_length = 1)
  meet_criteria(translate_ab, allow_class = c("character", "logical"), has_length = 1, allow_NA = TRUE)
-  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE)
+  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
  language <- validate_language(language)
  meet_criteria(combine_SI, allow_class = "logical", has_length = 1)

@ -486,7 +486,7 @@ labels_sir_count <- function(position = NULL,
  meet_criteria(position, allow_class = "character", has_length = 1, is_in = c("fill", "stack", "dodge"), allow_NULL = TRUE)
  meet_criteria(x, allow_class = "character", has_length = 1)
  meet_criteria(translate_ab, allow_class = c("character", "logical"), has_length = 1, allow_NA = TRUE)
-  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE)
+  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
  language <- validate_language(language)
  meet_criteria(combine_SI, allow_class = "logical", has_length = 1)
  meet_criteria(datalabels.size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE)
--- a/R/mo.R
+++ b/R/mo.R
@ -158,7 +158,7 @@ as.mo <- function(x,
  meet_criteria(Becker, allow_class = c("logical", "character"), has_length = 1)
  meet_criteria(Lancefield, allow_class = c("logical", "character"), has_length = 1)
  meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
-  meet_criteria(minimum_matching_score, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE)
+  meet_criteria(minimum_matching_score, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive_or_zero = TRUE, is_finite = TRUE)
  meet_criteria(reference_df, allow_class = "data.frame", allow_NULL = TRUE)
  meet_criteria(ignore_pattern, allow_class = "character", has_length = 1, allow_NULL = TRUE)
  language <- validate_language(language)
@ -627,7 +627,7 @@ freq.mo <- function(x, ...) {
    .add_header = list(
      `Gram-negative` = paste0(
        format(sum(grams == "Gram-negative", na.rm = TRUE),
-          big.mark = ",",
+          big.mark = " ",
          decimal.mark = "."
        ),
        " (", percentage(sum(grams == "Gram-negative", na.rm = TRUE) / length(grams),
@ -637,7 +637,7 @@ freq.mo <- function(x, ...) {
      ),
      `Gram-positive` = paste0(
        format(sum(grams == "Gram-positive", na.rm = TRUE),
-          big.mark = ",",
+          big.mark = " ",
          decimal.mark = "."
        ),
        " (", percentage(sum(grams == "Gram-positive", na.rm = TRUE) / length(grams),
--- a/R/mo_property.R
+++ b/R/mo_property.R
@ -900,12 +900,16 @@ mo_validate <- function(x, property, language, keep_synonyms = keep_synonyms, ..
  }

  # get property reeaaally fast using match()
-  x <- AMR_env$MO_lookup[[property]][match(x, AMR_env$MO_lookup$mo)]
+  if (property == "snomed") {
+    x <- lapply(x, function(y) unlist(AMR_env$MO_lookup$snomed[match(y, AMR_env$MO_lookup$mo)]))
+  } else {
+    x <- AMR_env$MO_lookup[[property]][match(x, AMR_env$MO_lookup$mo)]
+  }

  if (property == "mo") {
    return(set_clean_class(x, new_class = c("mo", "character")))
  } else if (property == "snomed") {
-    return(sort(as.character(eval(parse(text = x)))))
+    return(x)
  } else if (property == "prevalence") {
    return(as.double(x))
  } else {
--- a/R/proportion.R
+++ b/R/proportion.R
@ -27,7 +27,7 @@
 # how to conduct AMR data analysis: https://msberends.github.io/AMR/   #
 # ==================================================================== #

-#' Calculate Microbial Resistance
+#' Calculate Antimicrobial Resistance
 #'
 #' @description These functions can be used to calculate the (co-)resistance or susceptibility of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in `summarise()` from the `dplyr` package and also support grouped variables, see *Examples*.
 #'
@ -49,13 +49,14 @@
 #'
 #' Use [sir_confidence_interval()] to calculate the confidence interval, which relies on [binom.test()], i.e., the Clopper-Pearson method. This function returns a vector of length 2 at default for antimicrobial *resistance*. Change the `side` argument to "left"/"min" or "right"/"max" to return a single value, and change the `ab_result` argument to e.g. `c("S", "I")` to test for antimicrobial *susceptibility*, see Examples.
 #'
-#' **Remember that you should filter your data to let it contain only first isolates!** This is needed to exclude duplicates and to reduce selection bias. Use [first_isolate()] to determine them in your data set.
+#' **Remember that you should filter your data to let it contain only first isolates!** This is needed to exclude duplicates and to reduce selection bias. Use [first_isolate()] to determine them in your data set with one of the four available algorithms.
 #'
 #' These functions are not meant to count isolates, but to calculate the proportion of resistance/susceptibility. Use the [`count()`][AMR::count()] functions to count isolates. The function [susceptibility()] is essentially equal to `count_susceptible() / count_all()`. *Low counts can influence the outcome - the `proportion` functions may camouflage this, since they only return the proportion (albeit being dependent on the `minimum` argument).*
 #'
 #' The function [proportion_df()] takes any variable from `data` that has an [`sir`] class (created with [as.sir()]) and calculates the proportions S, I, and R. It also supports grouped variables. The function [sir_df()] works exactly like [proportion_df()], but adds the number of isolates.
 #' @section Combination Therapy:
 #' When using more than one variable for `...` (= combination therapy), use `only_all_tested` to only count isolates that are tested for all antibiotics/variables that you test them for. See this example for two antibiotics, Drug A and Drug B, about how [susceptibility()] works to calculate the %SI:
+#' 
 #'
 #' ```
 #' --------------------------------------------------------------------
@ -77,11 +78,14 @@
 #' ```
 #'
 #' Please note that, in combination therapies, for `only_all_tested = TRUE` applies that:
+#' 
 #' ```
 #'     count_S()    +   count_I()    +   count_R()    = count_all()
 #'   proportion_S() + proportion_I() + proportion_R() = 1
 #' ```
+#' 
 #' and that, in combination therapies, for `only_all_tested = FALSE` applies that:
+#' 
 #' ```
 #'     count_S()    +   count_I()    +   count_R()    >= count_all()
 #'   proportion_S() + proportion_I() + proportion_R() >= 1
@ -98,7 +102,9 @@
 #' @examples
 #' # example_isolates is a data set available in the AMR package.
 #' # run ?example_isolates for more info.
-#'
+#' example_isolates
+#' 
+#' 
 #' # base R ------------------------------------------------------------
 #' # determines %R
 #' resistance(example_isolates$AMX)
--- a/R/resistance_predict.R
+++ b/R/resistance_predict.R
@ -125,7 +125,7 @@ resistance_predict <- function(x,
  meet_criteria(year_min, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE)
  meet_criteria(year_max, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE)
  meet_criteria(year_every, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE)
-  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE)
+  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
  meet_criteria(model, allow_class = c("character", "function"), has_length = 1, allow_NULL = TRUE)
  meet_criteria(I_as_S, allow_class = "logical", has_length = 1)
  meet_criteria(preserve_measurements, allow_class = "logical", has_length = 1)
--- a/R/sir.R
+++ b/R/sir.R
@ -89,7 +89,7 @@
 #'
 #' ### Machine-Readable Interpretation Guidelines
 #'
-#' The repository of this package [contains a machine-readable version](https://github.com/msberends/AMR/blob/main/data-raw/clinical_breakpoints.txt) of all guidelines. This is a CSV file consisting of `r format(nrow(AMR::clinical_breakpoints), big.mark = ",")` rows and `r ncol(AMR::clinical_breakpoints)` columns. This file is machine-readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial drug and the microorganism. **This allows for easy implementation of these rules in laboratory information systems (LIS)**. Note that it only contains interpretation guidelines for humans - interpretation guidelines from CLSI for animals were removed.
+#' The repository of this package [contains a machine-readable version](https://github.com/msberends/AMR/blob/main/data-raw/clinical_breakpoints.txt) of all guidelines. This is a CSV file consisting of `r format(nrow(AMR::clinical_breakpoints), big.mark = " ")` rows and `r ncol(AMR::clinical_breakpoints)` columns. This file is machine-readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial drug and the microorganism. **This allows for easy implementation of these rules in laboratory information systems (LIS)**. Note that it only contains interpretation guidelines for humans - interpretation guidelines from CLSI for animals were removed.
 #'
 #' ### Other
 #'
--- a/R/sir_calc.R
+++ b/R/sir_calc.R
@ -31,7 +31,8 @@ dots2vars <- function(...) {
  # this function is to give more informative output about
  # variable names in count_* and proportion_* functions
  dots <- substitute(list(...))
-  as.character(dots)[2:length(dots)]
+  dots <- as.character(dots)[2:length(dots)]
+  paste0(dots[dots != "."], collapse = "+")
 }

 sir_calc <- function(...,
@ -41,7 +42,7 @@ sir_calc <- function(...,
                     only_all_tested = FALSE,
                     only_count = FALSE) {
  meet_criteria(ab_result, allow_class = c("character", "numeric", "integer"), has_length = c(1, 2, 3))
-  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE)
+  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
  meet_criteria(as_percent, allow_class = "logical", has_length = 1)
  meet_criteria(only_all_tested, allow_class = "logical", has_length = 1)
  meet_criteria(only_count, allow_class = "logical", has_length = 1)
@ -133,7 +134,7 @@ sir_calc <- function(...,
    }

    x_transposed <- as.list(as.data.frame(t(x), stringsAsFactors = FALSE))
-    if (only_all_tested == TRUE) {
+    if (isTRUE(only_all_tested)) {
      # no NAs in any column
      y <- apply(
        X = as.data.frame(lapply(x, as.integer), stringsAsFactors = FALSE),
@ -224,8 +225,8 @@ sir_calc_df <- function(type, # "proportion", "count" or "both"
  meet_criteria(type, is_in = c("proportion", "count", "both"), has_length = 1)
  meet_criteria(data, allow_class = "data.frame", contains_column_class = "sir")
  meet_criteria(translate_ab, allow_class = c("character", "logical"), has_length = 1, allow_NA = TRUE)
-  meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE)
-  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_finite = TRUE)
+  language <- validate_language(language)
+  meet_criteria(minimum, allow_class = c("numeric", "integer"), has_length = 1, is_positive_or_zero = TRUE, is_finite = TRUE)
  meet_criteria(as_percent, allow_class = "logical", has_length = 1)
  meet_criteria(combine_SI, allow_class = "logical", has_length = 1)
  meet_criteria(confidence_level, allow_class = "numeric", has_length = 1)
@ -236,7 +237,7 @@ sir_calc_df <- function(type, # "proportion", "count" or "both"
  # select only groups and antibiotics
  if (is_null_or_grouped_tbl(data)) {
    data_has_groups <- TRUE
-    groups <- setdiff(names(attributes(data)$groups), ".rows")
+    groups <- get_group_names(data)
    data <- data[, c(groups, colnames(data)[vapply(FUN.VALUE = logical(1), data, is.sir)]), drop = FALSE]
  } else {
    data_has_groups <- FALSE
--- a/R/translate.R
+++ b/R/translate.R
@ -246,7 +246,8 @@ translate_into_language <- function(from,
  }

  lapply(
-    seq_len(nrow(df_trans)),
+    # starting from last row, since more general translations are on top, such as 'Group'
+    rev(seq_len(nrow(df_trans))),
    function(i) {
      from_unique_translated <<- gsub(
        pattern = df_trans$pattern[i],
--- a/R/zzz.R
+++ b/R/zzz.R
@ -123,6 +123,7 @@ if (utf8_supported && !is_latex) {
  s3_register("ggplot2::autoplot", "mic")
  s3_register("ggplot2::autoplot", "disk")
  s3_register("ggplot2::autoplot", "resistance_predict")
+  s3_register("ggplot2::autoplot", "antibiogram")
  # Support for fortify from the ggplot2 package
  s3_register("ggplot2::fortify", "sir")
  s3_register("ggplot2::fortify", "mic")