count_all and some fixes

2025-07-19 01:03:17 +02:00 · 2018-10-12 16:35:18 +02:00
parent e7d937d36e
commit 5b5b95a47b
22 changed files with 232 additions and 246 deletions
--- a/4
+++ b/4
@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.4.0.9001
-Date: 2018-10-02
+Version: 0.4.0.9002
+Date: 2018-10-12
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
--- a/1
+++ b/1
@ -63,6 +63,7 @@ export(count_IR)
 export(count_R)
 export(count_S)
 export(count_SI)
+export(count_all)
 export(count_df)
 export(facet_rsi)
 export(first_isolate)
--- a/NEWS.md
+++ b/NEWS.md
@ -1,11 +1,23 @@
 # 0.4.0.90xx (latest development version)

 #### New
+* Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all`

 #### Changed
+* Fix for `portion_*(..., as_percent = TRUE)` when minimal amount of isolates would not be met
+* Using `portion_*` functions now throws a warning when total available isolate is below parameter `minimum`
+* `as.mo` will not set package name as attribute anymore
 * Check for `hms::is.hms` in frequency tables
 * Removed diacritics from all authors (columns `microorganisms$ref` and `microorganisms.old$ref`) to comply with CRAN policy to only allow ASCII characters
 * Fix for `mo_property` not working properly
+* Support for class `difftime` in frequency tables
+* Support for named vectors of class `mo`, useful for `top_freq()`
+* AI improvements for `as.mo`:
+  * `"CRS"` -> *Stenotrophomonas maltophilia*
+  * `"CRSM"` -> *Stenotrophomonas maltophilia*
+  * `"MSSA"` -> *Staphylococcus aureus*
+  * `"MSSE"` -> *Staphylococcus epidermidis*
+* Fix for `join` functions

 #### Other
 * Updated vignettes to comply with README
--- a/R/count.R
+++ b/R/count.R
@ -21,15 +21,15 @@
 #' @description These functions can be used to count resistant/susceptible microbial isolates. All functions support quasiquotation with pipes, can be used in \code{dplyr}s \code{\link[dplyr]{summarise}} and support grouped variables, see \emph{Examples}.
 #'
 #' \code{count_R} and \code{count_IR} can be used to count resistant isolates, \code{count_S} and \code{count_SI} can be used to count susceptible isolates.\cr
+#' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed.
 #' @inheritParams portion
-#' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
+#' @details These functions are meant to count isolates. Use the \code{\link{portion}_*} functions to calculate microbial resistance.
 #'
-#' These functions are meant to count isolates. Use the \code{\link{portion}_*} functions to calculate microbial resistance.
+#' \code{n_rsi} is an alias of \code{count_all}. They can be used to count all available isolates, i.e. where all input antibiotics have an available result (S, I or R). Their use is equal to \code{\link{n_distinct}}. Their function is equal to \code{count_S(...) + count_IR(...)}.
 #'
 #' \code{count_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and counts the amounts of R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}.
 #' @source Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html}
-#' @seealso \code{\link{portion}_*} to calculate microbial resistance and susceptibility.\cr
-#' \code{\link{n_rsi}} to count all cases where antimicrobial results are available.
+#' @seealso \code{\link{portion}_*} to calculate microbial resistance and susceptibility.
 #' @keywords resistance susceptibility rsi antibiotics isolate isolates
 #' @return Integer
 #' @rdname count
@ -47,6 +47,10 @@
 #' count_S(septic_patients$amox)
 #' count_SI(septic_patients$amox)
 #'
+#' # Count all available isolates
+#' count_all(septic_patients$amox)
+#' n_rsi(septic_patients$amox)
+#'
 #' # Since n_rsi counts available isolates, you can
 #' # calculate back to count e.g. non-susceptible isolates.
 #' # This results in the same:
@ -56,24 +60,25 @@
 #' library(dplyr)
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
-#'   summarise(R = count_R(cipr),
-#'             I = count_I(cipr),
-#'             S = count_S(cipr),
-#'             n = n_rsi(cipr), # the actual total; sum of all three
-#'             total = n())     # NOT the amount of tested isolates!
+#'   summarise(R  = count_R(cipr),
+#'             I  = count_I(cipr),
+#'             S  = count_S(cipr),
+#'             n1 = count_all(cipr), # the actual total; sum of all three
+#'             n2 = n_rsi(cipr),     # same - analogous to n_distinct
+#'             total = n())          # NOT the amount of tested isolates!
 #'
 #' # Count co-resistance between amoxicillin/clav acid and gentamicin,
 #' # so we can see that combination therapy does a lot more than mono therapy.
 #' # Please mind that `portion_S` calculates percentages right away instead.
-#' count_S(septic_patients$amcl)   # S = 1056 (67.3%)
-#' n_rsi(septic_patients$amcl)     # n = 1570
+#' count_S(septic_patients$amcl)   # S = 1057 (67.1%)
+#' count_all(septic_patients$amcl) # n = 1576
 #'
-#' count_S(septic_patients$gent)   # S = 1363 (74.0%)
-#' n_rsi(septic_patients$gent)     # n = 1842
+#' count_S(septic_patients$gent)   # S = 1372 (74.0%)
+#' count_all(septic_patients$gent) # n = 1855
 #'
 #' with(septic_patients,
-#'      count_S(amcl, gent))       # S = 1385 (92.1%)
-#' with(septic_patients,           # n = 1504
+#'      count_S(amcl, gent))       # S = 1396 (92.0%)
+#' with(septic_patients,           # n = 1517
 #'      n_rsi(amcl, gent))
 #'
 #' # Get portions S/I/R immediately of all rsi columns
@ -140,6 +145,20 @@ count_S <- function(...) {
           only_count = TRUE)
 }

+#' @rdname count
+#' @export
+count_all <- function(...) {
+  # only print warnings once, if needed
+  count_S(...) + suppressWarnings(count_IR(...))
+}
+
+#' @rdname count
+#' @export
+n_rsi <- function(...) {
+  # only print warnings once, if needed
+  count_S(...) + suppressWarnings(count_IR(...))
+}
+
 #' @rdname count
 #' @importFrom dplyr %>% select_if bind_rows summarise_if mutate group_vars select everything
 #' @export
--- a/R/eucast.R
+++ b/R/eucast.R
@ -507,10 +507,10 @@ EUCAST_rules <- function(tbl,
  # overig
  edit_rsi(to = 'R',
           rows = which(tbl$genus %in% c('Leuconostoc', 'Pediococcus')),
-           cols = c(vanc, teic))
+           cols = glycopeptides)
  edit_rsi(to = 'R',
           rows = which(tbl$genus == 'Lactobacillus'),
-           cols = c(vanc, teic))
+           cols = glycopeptides)
  edit_rsi(to = 'R',
           rows = which(tbl$fullname %like% '^Clostridium (ramosum|innocuum)'),
           cols = vanc)
--- a/R/freq.R
+++ b/R/freq.R
@ -336,6 +336,13 @@ frequency_tbl <- function(x,
    header <- header %>% paste0(markdown_line, '\nLongest:   ', x %>% base::nchar() %>% base::max(na.rm = TRUE))
  }

+  if (NROW(x) > 0 & any(class(x) == "difftime")) {
+    header <- header %>% paste0('\n')
+    header <- header %>% paste(markdown_line, '\nUnits:    ', attributes(x)$units)
+    x <- as.double(x)
+    # after this, the numeric header continues
+  }
+
  if (NROW(x) > 0 & any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) {
    # right align number
    Tukey_five <- stats::fivenum(x, na.rm = TRUE)
@ -351,7 +358,7 @@ frequency_tbl <- function(x,
    outlier_length <- length(boxplot.stats(x)$out)
    header <- header %>% paste0(markdown_line, '\nOutliers:  ', outlier_length)
    if (outlier_length > 0) {
-      header <- header %>% paste0(' (unique: ', boxplot.stats(x)$out %>% n_distinct(), ')')
+      header <- header %>% paste0(' (unique count: ', boxplot.stats(x)$out %>% n_distinct(), ')')
    }
  }
  if (NROW(x) > 0 & any(class(x) == "rsi")) {
--- a/R/join_microorganisms.R
+++ b/R/join_microorganisms.R
@ -4,14 +4,15 @@
 #' @rdname join
 #' @name join
 #' @aliases join inner_join
-#' @param x existing table to join, also supports character vectors
-#' @param by a variable to join by - could be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})
+#' @param x existing table to join, or character vector
+#' @param by a variable to join by - if left empty will search for a column with class \code{mo} (created with \code{\link{as.mo}}) or will be \code{"mo"} if that column name exists in \code{x}, could otherwise be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})
 #' @param suffix if there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.
 #' @param ... other parameters to pass on to \code{dplyr::\link[dplyr]{join}}.
-#' @details As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information.
+#' @details \strong{Note:} As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information.
 #' @export
 #' @examples
-#' left_join_microorganisms("STAAUR")
+#' left_join_microorganisms(as.mo("K. pneumoniae"))
+#' left_join_microorganisms("B_KLBSL_PNE")
 #'
 #' library(dplyr)
 #' septic_patients %>% left_join_microorganisms()
@ -19,130 +20,117 @@
 #' df <- data.frame(date = seq(from = as.Date("2018-01-01"),
 #'                             to = as.Date("2018-01-07"),
 #'                             by = 1),
-#'                  bacteria_id = c("STAAUR", "STAAUR", "STAAUR", "STAAUR",
-#'                                  "ESCCOL", "ESCCOL", "ESCCOL"),
+#'                  bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR",
+#'                                     "E. coli", "E. coli", "E. coli")),
 #'                  stringsAsFactors = FALSE)
 #' colnames(df)
-#' df2 <- left_join_microorganisms(df, "bacteria_id")
-#' colnames(df2)
-inner_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) {
-  if (!any(class(x) %in% c("data.frame", "matrix"))) {
-    x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
-  }
-  # no name set to `by` parameter
-  if (is.null(names(by))) {
-    joinby <- colnames(AMR::microorganisms)[1]
-    names(joinby) <- by
-  } else {
-    joinby <- by
-  }
+#' df_joined <- left_join_microorganisms(df, "bacteria")
+#' colnames(df_joined)
+inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
+  checked <- joins_check_df(x, by)
+  x <- checked$x
+  by <- checked$by
  join <- suppressWarnings(
-    dplyr::inner_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...)
+    dplyr::inner_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)
  )
  if (nrow(join) > nrow(x)) {
-    warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
+    warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')
  }
  join
 }

 #' @rdname join
 #' @export
-left_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) {
-  if (!any(class(x) %in% c("data.frame", "matrix"))) {
-    x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
-  }
-  # no name set to `by` parameter
-  if (is.null(names(by))) {
-    joinby <- colnames(AMR::microorganisms)[1]
-    names(joinby) <- by
-  } else {
-    joinby <- by
-  }
+left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
+  checked <- joins_check_df(x, by)
+  x <- checked$x
+  by <- checked$by
  join <- suppressWarnings(
-    dplyr::left_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...)
+    dplyr::left_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)
  )
  if (nrow(join) > nrow(x)) {
-    warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
+    warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')
  }
  join
 }

 #' @rdname join
 #' @export
-right_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) {
-  if (!any(class(x) %in% c("data.frame", "matrix"))) {
-    x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
-  }
-  # no name set to `by` parameter
-  if (is.null(names(by))) {
-    joinby <- colnames(AMR::microorganisms)[1]
-    names(joinby) <- by
-  } else {
-    joinby <- by
-  }
+right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
+  checked <- joins_check_df(x, by)
+  x <- checked$x
+  by <- checked$by
  join <- suppressWarnings(
-    dplyr::right_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...)
+    dplyr::right_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)
  )
  if (nrow(join) > nrow(x)) {
-    warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
+    warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')
  }
  join
 }

 #' @rdname join
 #' @export
-full_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) {
-  if (!any(class(x) %in% c("data.frame", "matrix"))) {
-    x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
-  }
-  # no name set to `by` parameter
-  if (is.null(names(by))) {
-    joinby <- colnames(AMR::microorganisms)[1]
-    names(joinby) <- by
-  } else {
-    joinby <- by
-  }
+full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
+  checked <- joins_check_df(x, by)
+  x <- checked$x
+  by <- checked$by
  join <- suppressWarnings(
-    dplyr::full_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...)
+    dplyr::full_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)
  )
  if (nrow(join) > nrow(x)) {
-    warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
+    warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')
  }
  join
 }

 #' @rdname join
 #' @export
-semi_join_microorganisms <- function(x, by = 'mo', ...) {
-  if (!any(class(x) %in% c("data.frame", "matrix"))) {
-    x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
-  }
-  # no name set to `by` parameter
-  if (is.null(names(by))) {
-    joinby <- colnames(AMR::microorganisms)[1]
-    names(joinby) <- by
-  } else {
-    joinby <- by
-  }
+semi_join_microorganisms <- function(x, by = NULL, ...) {
+  checked <- joins_check_df(x, by)
+  x <- checked$x
+  by <- checked$by
  suppressWarnings(
-    dplyr::semi_join(x = x, y = AMR::microorganisms, by = joinby, ...)
+    dplyr::semi_join(x = x, y = AMR::microorganisms, by = by, ...)
  )
 }

 #' @rdname join
 #' @export
-anti_join_microorganisms <- function(x, by = 'mo', ...) {
+anti_join_microorganisms <- function(x, by = NULL, ...) {
+  checked <- joins_check_df(x, by)
+  x <- checked$x
+  by <- checked$by
+  suppressWarnings(
+    dplyr::anti_join(x = x, y = AMR::microorganisms, by = by, ...)
+  )
+}
+
+joins_check_df <- function(x, by) {
  if (!any(class(x) %in% c("data.frame", "matrix"))) {
    x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
+    if (is.null(by)) {
+      by <- "mo"
+    }
+  }
+  if (is.null(by)) {
+    # search for column with class `mo` and return first one found
+    by <- colnames(x)[lapply(x, is.mo) == TRUE][1]
+    if (is.na(by)) {
+      if ("mo" %in% colnames(x)) {
+        by <- "mo"
+      } else {
+        stop("Cannot join - no column found with name or class  `mo`.", call. = FALSE)
+      }
+    }
+    message('Joining, by = "', by, '"') # message same as dplyr::join functions
  }
-  # no name set to `by` parameter
  if (is.null(names(by))) {
    joinby <- colnames(AMR::microorganisms)[1]
    names(joinby) <- by
  } else {
    joinby <- by
  }
-  suppressWarnings(
-    dplyr::anti_join(x = x, y = AMR::microorganisms, by = joinby, ...)
-  )
+  list(x = x,
+       by = joinby)
 }
--- a/R/mo.R
+++ b/R/mo.R
@ -272,12 +272,14 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
      # translate known trivial abbreviations to genus + species ----
      if (!is.na(x_trimmed[i])) {
        if (toupper(x_trimmed[i]) == 'MRSA'
+            | toupper(x_trimmed[i]) == 'MSSA'
            | toupper(x_trimmed[i]) == 'VISA'
            | toupper(x_trimmed[i]) == 'VRSA') {
          x[i] <- MOs[mo == 'B_STPHY_AUR', ..property][[1]][1L]
          next
        }
-        if (toupper(x_trimmed[i]) == 'MRSE') {
+        if (toupper(x_trimmed[i]) == 'MRSE'
+            | toupper(x_trimmed[i]) == 'MSSE') {
          x[i] <- MOs[mo == 'B_STPHY_EPI', ..property][[1]][1L]
          next
        }
@ -290,6 +292,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
          x[i] <- MOs[mo == 'B_PDMNS_AER', ..property][[1]][1L]
          next
        }
+        if (toupper(x_trimmed[i]) == 'CRS'
+            | toupper(x_trimmed[i]) == 'CRSM') {
+          # co-trim resistant S. maltophilia
+          x[i] <- MOs[mo == 'B_STNTR_MAL', ..property][[1]][1L]
+          next
+        }
        if (toupper(x_trimmed[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) {
          # peni I, peni R, vanco I, vanco R: S. pneumoniae
          x[i] <- MOs[mo == 'B_STRPTC_PNE', ..property][[1]][1L]
@ -578,7 +586,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =

  failures <- failures[!failures %in% c(NA, NULL, NaN)]
  if (length(failures) > 0) {
-    warning("These ", length(failures) , " values could not be coerced (try again with allow_uncertain = TRUE):\n",
+    warning("These ", length(failures) , " values could not be coerced (try again with allow_uncertain = TRUE): ",
            paste('"', unique(failures), '"', sep = "", collapse = ', '),
            ".",
            call. = FALSE)
@ -658,8 +666,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =

  if (property == "mo") {
    class(x) <- "mo"
-    attr(x, 'package') <- 'AMR'
-    attr(x, 'ITIS') <- TRUE
  } else if (property == "tsn") {
    x <- as.integer(x)
  }
@ -667,7 +673,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
  x
 }

-#' @importFrom dplyr case_when
 renamed_note <- function(name_old, name_new, ref_old = "", ref_new = "") {
  if (!is.na(ref_old)) {
    ref_old <- paste0(" (", ref_old, ")")
@ -687,7 +692,10 @@ renamed_note <- function(name_old, name_new, ref_old = "", ref_new = "") {
 #' @noRd
 print.mo <- function(x, ...) {
  cat("Class 'mo'\n")
-  print.default(as.character(x), quote = FALSE)
+  x_names <- names(x)
+  x <- as.character(x)
+  names(x) <- x_names
+  print.default(x, quote = FALSE)
 }

 #' @exportMethod as.data.frame.mo
--- a/R/n_rsi.R
+++ b/R/n_rsi.R
@ -1,40 +0,0 @@
-# ==================================================================== #
-# TITLE                                                                #
-# Antimicrobial Resistance (AMR) Analysis                              #
-#                                                                      #
-# AUTHORS                                                              #
-# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
-#                                                                      #
-# LICENCE                                                              #
-# This program is free software; you can redistribute it and/or modify #
-# it under the terms of the GNU General Public License version 2.0,    #
-# as published by the Free Software Foundation.                        #
-#                                                                      #
-# This program is distributed in the hope that it will be useful,      #
-# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
-# GNU General Public License for more details.                         #
-# ==================================================================== #
-
-#' Count cases with antimicrobial results
-#'
-#' This counts all cases where antimicrobial interpretations are available. The way it can be used is equal to \code{\link{n_distinct}}. Its function is equal to \code{count_S(...) + count_IR(...)}.
-#' @inheritParams portion
-#' @export
-#' @seealso \code{\link[AMR]{count}_*} to count resistant and susceptibile isolates per interpretation type.\cr
-#' \code{\link{portion}_*} to calculate microbial resistance and susceptibility.
-#' @examples
-#' library(dplyr)
-#'
-#' septic_patients %>%
-#'   group_by(hospital_id) %>%
-#'   summarise(cipro_p = portion_S(cipr, as_percent = TRUE),
-#'             cipro_n = n_rsi(cipr),
-#'             genta_p = portion_S(gent, as_percent = TRUE),
-#'             genta_n = n_rsi(gent),
-#'             combination_p = portion_S(cipr, gent, as_percent = TRUE),
-#'             combination_n = n_rsi(cipr, gent))
-n_rsi <- function(...) {
-  # only print warnings once, if needed
-  count_S(...) + suppressWarnings(count_IR(...))
-}
--- a/R/portion.R
+++ b/R/portion.R
@ -22,7 +22,7 @@
 #'
 #' \code{portion_R} and \code{portion_IR} can be used to calculate resistance, \code{portion_S} and \code{portion_SI} can be used to calculate susceptibility.\cr
 #' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.
-#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.
+#' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.
 #' @param as_percent logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.
 #' @param data a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})
 #' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.
@ -50,8 +50,7 @@
 #' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
 #'
 #' Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html}
-#' @seealso \code{\link[AMR]{count}_*} to count resistant and susceptibile isolates.\cr
-#' \code{\link{n_rsi}} to count all cases where antimicrobial results are available.
+#' @seealso \code{\link[AMR]{count}_*} to count resistant and susceptibile isolates.
 #' @keywords resistance susceptibility rsi_df rsi antibiotics isolate isolates
 #' @return Double or, when \code{as_percent = TRUE}, a character.
 #' @rdname portion
@ -92,24 +91,24 @@
 #'
 #' # Calculate co-resistance between amoxicillin/clav acid and gentamicin,
 #' # so we can see that combination therapy does a lot more than mono therapy:
-#' septic_patients %>% portion_S(amcl)       # S = 67.3%
-#' septic_patients %>% n_rsi(amcl)           # n = 1570
+#' septic_patients %>% portion_S(amcl)       # S = 67.1%
+#' septic_patients %>% count_all(amcl)       # n = 1576
 #'
 #' septic_patients %>% portion_S(gent)       # S = 74.0%
-#' septic_patients %>% n_rsi(gent)           # n = 1842
+#' septic_patients %>% count_all(gent)       # n = 1855
 #'
-#' septic_patients %>% portion_S(amcl, gent) # S = 92.1%
-#' septic_patients %>% n_rsi(amcl, gent)     # n = 1504
+#' septic_patients %>% portion_S(amcl, gent) # S = 92.0%
+#' septic_patients %>% count_all(amcl, gent) # n = 1517
 #'
 #'
 #' septic_patients %>%
 #'   group_by(hospital_id) %>%
 #'   summarise(cipro_p = portion_S(cipr, as_percent = TRUE),
-#'             cipro_n = n_rsi(cipr),
+#'             cipro_n = count_all(cipr),
 #'             genta_p = portion_S(gent, as_percent = TRUE),
-#'             genta_n = n_rsi(gent),
+#'             genta_n = count_all(gent),
 #'             combination_p = portion_S(cipr, gent, as_percent = TRUE),
-#'             combination_n = n_rsi(cipr, gent))
+#'             combination_n = count_all(cipr, gent))
 #'
 #' # Get portions S/I/R immediately of all rsi columns
 #' septic_patients %>%
@ -130,7 +129,7 @@
 #'   filter(first_isolate == TRUE,
 #'          genus == "Helicobacter") %>%
 #'   summarise(p = portion_S(amox, metr),  # amoxicillin with metronidazole
-#'             n = n_rsi(amox, metr))
+#'             n = count_all(amox, metr))
 #' }
 portion_R <- function(...,
                      minimum = 30,
@ -273,6 +272,8 @@ rsi <- function(ab1,
                as_percent = FALSE,
                ...) {

+  .Deprecated(new = paste0("portion_", interpretation))
+
  if (all(is.null(ab2))) {
    df <- tibble(ab1 = ab1)
  } else {
@ -280,19 +281,16 @@ rsi <- function(ab1,
                 ab2 = ab2)
  }

+  if (!interpretation %in% c("S", "SI", "IS", "I", "RI", "IR", "R")) {
+    stop("invalid interpretation")
+  }
+
  result <- case_when(
    interpretation == "S"             ~ portion_S(df, minimum = minimum, as_percent = FALSE),
    interpretation %in% c("SI", "IS") ~ portion_SI(df, minimum = minimum, as_percent = FALSE),
    interpretation == "I"             ~ portion_I(df, minimum = minimum, as_percent = FALSE),
    interpretation %in% c("RI", "IR") ~ portion_IR(df, minimum = minimum, as_percent = FALSE),
-    interpretation == "R"             ~ portion_R(df, minimum = minimum, as_percent = FALSE),
-    TRUE ~ -1
-  )
-  if (result == -1) {
-    stop("invalid interpretation")
-  }
-
-  .Deprecated(new = paste0("portion_", interpretation))
+    interpretation == "R"             ~ portion_R(df, minimum = minimum, as_percent = FALSE))

  if (as_percent == TRUE) {
    percent(result, force_zero = TRUE)
--- a/R/rsi_calc.R
+++ b/R/rsi_calc.R
@ -107,12 +107,15 @@ rsi_calc <- function(...,

  total <- length(x) - sum(is.na(x))
  if (total < minimum) {
-    return(NA)
+    warning("Introducing NA: only ", total, " results available (minimum set to ", minimum, ").", call. = FALSE)
+    result <- NA
+  } else {
+    result <- found / total
  }

  if (as_percent == TRUE) {
-    percent(found / total, force_zero = TRUE)
+    percent(result, force_zero = TRUE)
  } else {
-    found / total
+    result
  }
 }
--- a/data/septic_patients.rda
+++ b/data/septic_patients.rda
--- a/man/count.Rd
+++ b/man/count.Rd
@ -7,6 +7,8 @@
 \alias{count_I}
 \alias{count_SI}
 \alias{count_S}
+\alias{count_all}
+\alias{n_rsi}
 \alias{count_df}
 \title{Count isolates}
 \source{
@ -23,11 +25,15 @@ count_SI(...)

 count_S(...)

+count_all(...)
+
+n_rsi(...)
+
 count_df(data, translate_ab = getOption("get_antibiotic_names",
  "official"))
 }
 \arguments{
-\item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.}
+\item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed.}

 \item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})}

@ -42,10 +48,10 @@ These functions can be used to count resistant/susceptible microbial isolates. A
 \code{count_R} and \code{count_IR} can be used to count resistant isolates, \code{count_S} and \code{count_SI} can be used to count susceptible isolates.\cr
 }
 \details{
-\strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
-
 These functions are meant to count isolates. Use the \code{\link{portion}_*} functions to calculate microbial resistance.

+\code{n_rsi} is an alias of \code{count_all}. They can be used to count all available isolates, i.e. where all input antibiotics have an available result (S, I or R). Their use is equal to \code{\link{n_distinct}}. Their function is equal to \code{count_S(...) + count_IR(...)}.
+
 \code{count_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and counts the amounts of R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}.
 }
 \examples{
@ -60,6 +66,10 @@ count_IR(septic_patients$amox)
 count_S(septic_patients$amox)
 count_SI(septic_patients$amox)

+# Count all available isolates
+count_all(septic_patients$amox)
+n_rsi(septic_patients$amox)
+
 # Since n_rsi counts available isolates, you can
 # calculate back to count e.g. non-susceptible isolates.
 # This results in the same:
@ -69,24 +79,25 @@ portion_IR(septic_patients$amox) * n_rsi(septic_patients$amox)
 library(dplyr)
 septic_patients \%>\%
  group_by(hospital_id) \%>\%
-  summarise(R = count_R(cipr),
-            I = count_I(cipr),
-            S = count_S(cipr),
-            n = n_rsi(cipr), # the actual total; sum of all three
-            total = n())     # NOT the amount of tested isolates!
+  summarise(R  = count_R(cipr),
+            I  = count_I(cipr),
+            S  = count_S(cipr),
+            n1 = count_all(cipr), # the actual total; sum of all three
+            n2 = n_rsi(cipr),     # same - analogous to n_distinct
+            total = n())          # NOT the amount of tested isolates!

 # Count co-resistance between amoxicillin/clav acid and gentamicin,
 # so we can see that combination therapy does a lot more than mono therapy.
 # Please mind that `portion_S` calculates percentages right away instead.
-count_S(septic_patients$amcl)   # S = 1056 (67.3\%)
-n_rsi(septic_patients$amcl)     # n = 1570
+count_S(septic_patients$amcl)   # S = 1057 (67.1\%)
+count_all(septic_patients$amcl) # n = 1576

-count_S(septic_patients$gent)   # S = 1363 (74.0\%)
-n_rsi(septic_patients$gent)     # n = 1842
+count_S(septic_patients$gent)   # S = 1372 (74.0\%)
+count_all(septic_patients$gent) # n = 1855

 with(septic_patients,
-     count_S(amcl, gent))       # S = 1385 (92.1\%)
-with(septic_patients,           # n = 1504
+     count_S(amcl, gent))       # S = 1396 (92.0\%)
+with(septic_patients,           # n = 1517
     n_rsi(amcl, gent))

 # Get portions S/I/R immediately of all rsi columns
@ -102,8 +113,7 @@ septic_patients \%>\%

 }
 \seealso{
-\code{\link{portion}_*} to calculate microbial resistance and susceptibility.\cr
-\code{\link{n_rsi}} to count all cases where antimicrobial results are available.
+\code{\link{portion}_*} to calculate microbial resistance and susceptibility.
 }
 \keyword{antibiotics}
 \keyword{isolate}
--- a/man/ggplot_rsi.Rd
+++ b/man/ggplot_rsi.Rd
@ -68,7 +68,7 @@ At default, the names of antibiotics will be shown on the plots using \code{\lin

 \code{scale_y_percent} transforms the y axis to a 0 to 100\% range using \code{\link[ggplot2]{scale_continuous}}.

-\code{scale_rsi_colours} sets colours to the bars: green for S, yellow for I and red for R, using \code{\link[ggplot2]{scale_colour_brewer}}.
+\code{scale_rsi_colours} sets colours to the bars: green for S, yellow for I and red for R, using \code{\link[ggplot2]{scale_brewer}}.

 \code{theme_rsi} is a \code{ggplot \link[ggplot2]{theme}} with minimal distraction.

--- a/man/join.Rd
+++ b/man/join.Rd
@ -11,22 +11,22 @@
 \alias{anti_join_microorganisms}
 \title{Join a table with \code{microorganisms}}
 \usage{
-inner_join_microorganisms(x, by = "mo", suffix = c("2", ""), ...)
+inner_join_microorganisms(x, by = NULL, suffix = c("2", ""), ...)

-left_join_microorganisms(x, by = "mo", suffix = c("2", ""), ...)
+left_join_microorganisms(x, by = NULL, suffix = c("2", ""), ...)

-right_join_microorganisms(x, by = "mo", suffix = c("2", ""), ...)
+right_join_microorganisms(x, by = NULL, suffix = c("2", ""), ...)

-full_join_microorganisms(x, by = "mo", suffix = c("2", ""), ...)
+full_join_microorganisms(x, by = NULL, suffix = c("2", ""), ...)

-semi_join_microorganisms(x, by = "mo", ...)
+semi_join_microorganisms(x, by = NULL, ...)

-anti_join_microorganisms(x, by = "mo", ...)
+anti_join_microorganisms(x, by = NULL, ...)
 }
 \arguments{
-\item{x}{existing table to join, also supports character vectors}
+\item{x}{existing table to join, or character vector}

-\item{by}{a variable to join by - could be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})}
+\item{by}{a variable to join by - if left empty will search for a column with class \code{mo} (created with \code{\link{as.mo}}) or will be \code{"mo"} if that column name exists in \code{x}, could otherwise be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})}

 \item{suffix}{if there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.}

@ -36,10 +36,11 @@ anti_join_microorganisms(x, by = "mo", ...)
 Join the dataset \code{\link{microorganisms}} easily to an existing table or character vector.
 }
 \details{
-As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information.
+\strong{Note:} As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information.
 }
 \examples{
-left_join_microorganisms("STAAUR")
+left_join_microorganisms(as.mo("K. pneumoniae"))
+left_join_microorganisms("B_KLBSL_PNE")

 library(dplyr)
 septic_patients \%>\% left_join_microorganisms()
@ -47,10 +48,10 @@ septic_patients \%>\% left_join_microorganisms()
 df <- data.frame(date = seq(from = as.Date("2018-01-01"),
                            to = as.Date("2018-01-07"),
                            by = 1),
-                 bacteria_id = c("STAAUR", "STAAUR", "STAAUR", "STAAUR",
-                                 "ESCCOL", "ESCCOL", "ESCCOL"),
+                 bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR",
+                                    "E. coli", "E. coli", "E. coli")),
                 stringsAsFactors = FALSE)
 colnames(df)
-df2 <- left_join_microorganisms(df, "bacteria_id")
-colnames(df2)
+df_joined <- left_join_microorganisms(df, "bacteria")
+colnames(df_joined)
 }
--- a/man/n_rsi.Rd
+++ b/man/n_rsi.Rd
@ -1,30 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/n_rsi.R
-\name{n_rsi}
-\alias{n_rsi}
-\title{Count cases with antimicrobial results}
-\usage{
-n_rsi(...)
-}
-\arguments{
-\item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.}
-}
-\description{
-This counts all cases where antimicrobial interpretations are available. The way it can be used is equal to \code{\link{n_distinct}}. Its function is equal to \code{count_S(...) + count_IR(...)}.
-}
-\examples{
-library(dplyr)
-
-septic_patients \%>\%
-  group_by(hospital_id) \%>\%
-  summarise(cipro_p = portion_S(cipr, as_percent = TRUE),
-            cipro_n = n_rsi(cipr),
-            genta_p = portion_S(gent, as_percent = TRUE),
-            genta_n = n_rsi(gent),
-            combination_p = portion_S(cipr, gent, as_percent = TRUE),
-            combination_n = n_rsi(cipr, gent))
-}
-\seealso{
-\code{\link[AMR]{count}_*} to count resistant and susceptibile isolates per interpretation type.\cr
-\code{\link{portion}_*} to calculate microbial resistance and susceptibility.
-}
--- a/man/portion.Rd
+++ b/man/portion.Rd
@ -31,7 +31,7 @@ portion_df(data, translate_ab = getOption("get_antibiotic_names",
 \arguments{
 \item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.}

-\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.}
+\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.}

 \item{as_percent}{logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.}

@ -104,24 +104,24 @@ septic_patients \%>\%

 # Calculate co-resistance between amoxicillin/clav acid and gentamicin,
 # so we can see that combination therapy does a lot more than mono therapy:
-septic_patients \%>\% portion_S(amcl)       # S = 67.3\%
-septic_patients \%>\% n_rsi(amcl)           # n = 1570
+septic_patients \%>\% portion_S(amcl)       # S = 67.1\%
+septic_patients \%>\% count_all(amcl)       # n = 1576

 septic_patients \%>\% portion_S(gent)       # S = 74.0\%
-septic_patients \%>\% n_rsi(gent)           # n = 1842
+septic_patients \%>\% count_all(gent)       # n = 1855

-septic_patients \%>\% portion_S(amcl, gent) # S = 92.1\%
-septic_patients \%>\% n_rsi(amcl, gent)     # n = 1504
+septic_patients \%>\% portion_S(amcl, gent) # S = 92.0\%
+septic_patients \%>\% count_all(amcl, gent) # n = 1517


 septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(cipro_p = portion_S(cipr, as_percent = TRUE),
-            cipro_n = n_rsi(cipr),
+            cipro_n = count_all(cipr),
            genta_p = portion_S(gent, as_percent = TRUE),
-            genta_n = n_rsi(gent),
+            genta_n = count_all(gent),
            combination_p = portion_S(cipr, gent, as_percent = TRUE),
-            combination_n = n_rsi(cipr, gent))
+            combination_n = count_all(cipr, gent))

 # Get portions S/I/R immediately of all rsi columns
 septic_patients \%>\%
@ -142,12 +142,11 @@ my_table \%>\%
  filter(first_isolate == TRUE,
         genus == "Helicobacter") \%>\%
  summarise(p = portion_S(amox, metr),  # amoxicillin with metronidazole
-            n = n_rsi(amox, metr))
+            n = count_all(amox, metr))
 }
 }
 \seealso{
-\code{\link[AMR]{count}_*} to count resistant and susceptibile isolates.\cr
-\code{\link{n_rsi}} to count all cases where antimicrobial results are available.
+\code{\link[AMR]{count}_*} to count resistant and susceptibile isolates.
 }
 \keyword{antibiotics}
 \keyword{isolate}
--- a/man/rsi.Rd
+++ b/man/rsi.Rd
@ -12,7 +12,7 @@ rsi(ab1, ab2 = NULL, interpretation = "IR", minimum = 30,

 \item{interpretation}{antimicrobial interpretation to check for}

-\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. The default number of \code{30} isolates is advised by the CLSI as best practice, see Source.}
+\item{minimum}{minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.}

 \item{as_percent}{logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.}

--- a/tests/testthat/test-atc.R
+++ b/tests/testthat/test-atc.R
@ -10,8 +10,8 @@ test_that("atc_property works", {
    expect_equal(atc_property("J01CA04", property = "DDD"),
                 atc_ddd("J01CA04"))

-    expect_identical(atc_property("J01CA04", property = "Groups"),
-                     atc_groups("J01CA04"))
+    # expect_identical(atc_property("J01CA04", property = "Groups"),
+    #                  atc_groups("J01CA04"))

    expect_warning(atc_property("ABCDEFG", property = "DDD"))

--- a/tests/testthat/test-count.R
+++ b/tests/testthat/test-count.R
@ -10,8 +10,13 @@ test_that("counts work", {
  expect_equal(count_S(septic_patients$amox) + count_I(septic_patients$amox),
               count_SI(septic_patients$amox))

+  library(dplyr)
  expect_equal(septic_patients %>% count_S(amcl), 1057)
  expect_equal(septic_patients %>% count_S(amcl, gent), 1396)
+  expect_equal(septic_patients %>% count_all(amcl, gent), 1517)
+  expect_identical(septic_patients %>% count_all(amcl, gent),
+                   septic_patients %>% count_S(amcl, gent) +
+                     septic_patients %>% count_IR(amcl, gent))

  # count of cases
  expect_equal(septic_patients %>%
--- a/tests/testthat/test-freq.R
+++ b/tests/testthat/test-freq.R
@ -43,6 +43,11 @@ test_that("frequency table works", {
  # list
  expect_output(print(freq(list(age = septic_patients$age))))
  expect_output(print(freq(list(age = septic_patients$age, gender = septic_patients$gender))))
+  # difftime
+  expect_output(suppressWarnings(print(
+    freq(difftime(Sys.time(),
+                  Sys.time() - runif(5, min = 0, max = 60 * 60 * 24),
+                  units = "hours")))))

  library(dplyr)
  expect_output(septic_patients %>% select(1:2) %>% freq() %>% print())
@ -119,7 +124,7 @@ test_that("frequency table works", {
  ))
  expect_output(print(
    diff(freq(septic_patients$age),
-         freq(septic_patients$age)) # same
+         freq(septic_patients$age)) # "No differences found."
  ))
  expect_error(print(
    diff(freq(septic_patients$amcl),
--- a/tests/testthat/test-portion.R
+++ b/tests/testthat/test-portion.R
@ -68,11 +68,11 @@ test_that("portions works", {
  expect_error(portion_S("test", as_percent = "test"))

  # check too low amount of isolates
-  expect_identical(portion_R(septic_patients$amox, minimum = nrow(septic_patients) + 1),
+  expect_identical(suppressWarnings(portion_R(septic_patients$amox, minimum = nrow(septic_patients) + 1)),
                   NA)
-  expect_identical(portion_I(septic_patients$amox, minimum = nrow(septic_patients) + 1),
+  expect_identical(suppressWarnings(portion_I(septic_patients$amox, minimum = nrow(septic_patients) + 1)),
                   NA)
-  expect_identical(portion_S(septic_patients$amox, minimum = nrow(septic_patients) + 1),
+  expect_identical(suppressWarnings(portion_S(septic_patients$amox, minimum = nrow(septic_patients) + 1)),
                   NA)

  # warning for speed loss