(v.1.5.0.9000) implementation of EUCAST rules v11 (2021)

2025-07-08 13:21:50 +02:00 · 2021-01-12 22:08:04 +01:00
parent 3b84b8be75
commit d014955ce0
93 changed files with 3631 additions and 374 deletions
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@ -81,10 +81,13 @@ check_dataset_integrity <- function() {
  overwritten <- data_in_pkg[data_in_pkg %in% data_in_globalenv]
  # exception for example_isolates
  overwritten <- overwritten[overwritten != "example_isolates"]
-  stop_if(length(overwritten) > 0,
-          "the following data set is overwritten by your global environment and prevents the AMR package from working correctly:\n",
-          paste0("'", overwritten, "'", collapse = ", "),
-          ".\nPlease rename your object before using this function.", call = FALSE)
+  if (length(overwritten) > 0) {
+    warning_(ifelse(length(overwritten) == 1,
+                    "The following data set is overwritten by your global environment and prevents the AMR package from working correctly: ",
+                    "The following data sets are overwritten by your global environment and prevent the AMR package from working correctly: "),
+             paste0("'", overwritten, "'", collapse = ", "),
+             ".\nPlease rename your object(s).", call = FALSE)
+  }
  # check if other packages did not overwrite our data sets
  tryCatch({
    check_microorganisms <- all(c("mo", "fullname", "kingdom", "phylum",
@ -439,6 +442,20 @@ create_ab_documentation <- function(ab) {
  out
 }

+vector_or <- function(v, quotes = TRUE, reverse = FALSE) {
+  # makes unique and sorts, and this also removed NAs
+  v <- sort(unique(v))
+   if (length(v) == 1) {
+    return(paste0(ifelse(quotes, '"', ""), v, ifelse(quotes, '"', "")))
+  }
+  if (reverse == TRUE) {
+    v <- rev(v)
+  }
+  # all commas except for last item, so will become '"val1", "val2", "val3" or "val4"'
+  paste0(paste0(ifelse(quotes, '"', ""), v[seq_len(length(v) - 1)], ifelse(quotes, '"', ""), collapse = ", "),
+         " or ", paste0(ifelse(quotes, '"', ""), v[length(v)], ifelse(quotes, '"', "")))
+}
+
 # a check for every single argument in all functions
 meet_criteria <- function(object,
                          allow_class = NULL,
@ -463,15 +480,6 @@ meet_criteria <- function(object,
    return(invisible())
  }

-  vector_or <- function(v, quotes) {
-    if (length(v) == 1) {
-      return(paste0(ifelse(quotes, '"', ""), v, ifelse(quotes, '"', "")))
-    }
-    # all commas except for last item, so will become '"val1", "val2", "val3" or "val4"'
-    paste0(paste0(ifelse(quotes, '"', ""), v[seq_len(length(v) - 1)], ifelse(quotes, '"', ""), collapse = ", "),
-           " or ", paste0(ifelse(quotes, '"', ""), v[length(v)], ifelse(quotes, '"', "")))
-  }
-
  if (!is.null(allow_class)) {
    stop_ifnot(inherits(object, allow_class), "argument `", obj_name,
               "` must ", # ifelse(allow_NULL, "be NULL or must ", ""),
@ -527,24 +535,38 @@ meet_criteria <- function(object,
 }

 get_current_data <- function(arg_name, call) {
+  # try dplyr::cur_data_all() first to support dplyr groups
+  # only useful for e.g. dplyr::filter(), dplyr::mutate() and dplyr::summarise()
+  # not useful (throws error) with e.g. dplyr::select() - but that will be caught later in this function
+  cur_data_all <- import_fn("cur_data_all", "dplyr", error_on_fail = FALSE)
+  if (!is.null(cur_data_all)) {
+    out <- tryCatch(cur_data_all(), error = function(e) NULL)
+    if (is.data.frame(out)) {
+      return(out)
+    }
+  }
+  
  if (as.double(R.Version()$major) + (as.double(R.Version()$minor) / 10) < 3.2) {
+    # R-3.0 and R-3.1 do not have an `x` element in the call stack, rendering this function useless
    if (is.na(arg_name)) {
+      # like in carbapenems() etc.
      warning_("this function can only be used in R >= 3.2", call = call)
      return(data.frame())
    } else {
      stop_("argument `", arg_name, "` is missing with no default", call = call)
    }
  }
-  
+
  # try a (base R) method, by going over the complete system call stack with sys.frames()
  not_set <- TRUE
  frms <- lapply(sys.frames(), function(el) {
-    if (".Generic" %in% names(el)) {
-      if (tryCatch(not_set == TRUE && ".data" %in% names(el) && is.data.frame(el$`.data`), error = function(e) FALSE)) {
+    if (not_set == TRUE && ".Generic" %in% names(el)) {
+      if (tryCatch(".data" %in% names(el) && is.data.frame(el$`.data`), error = function(e) FALSE)) {
        # dplyr? - an element `.data` will be in the system call stack
+        # will be used in dplyr::select() (but not in dplyr::filter(), dplyr::mutate() or dplyr::summarise())
        not_set <<- FALSE
        el$`.data`
-      } else if (tryCatch(not_set == TRUE && any(c("x", "xx") %in% names(el)), error = function(e) FALSE)) {
+      } else if (tryCatch(any(c("x", "xx") %in% names(el)), error = function(e) FALSE)) {
        # otherwise try base R:
        # an element `x` will be in this environment for only cols, e.g. `example_isolates[, carbapenems()]`
        # an element `xx` will be in this environment for rows + cols, e.g. `example_isolates[c(1:3), carbapenems()]`
@ -574,9 +596,7 @@ get_current_data <- function(arg_name, call) {
    stop_("this function must be used inside valid dplyr selection verbs or inside a data.frame call", 
          call = call)
  } else {
-    stop_("argument `", arg_name, "` is missing with no default ",
-          "or function not used inside a valid dplyr verb", 
-          call = call)
+    stop_("argument `", arg_name, "` is missing with no default", call = call)
  }
 }

@ -595,19 +615,19 @@ unique_call_id <- function(entire_session = FALSE) {
 remember_thrown_message <- function(fn, entire_session = FALSE) {
  # this is to prevent that messages/notes will be printed for every dplyr group
  # e.g. this would show a msg 4 times: example_isolates %>% group_by(hospital_id) %>% filter(mo_is_gram_negative())
-  assign(x = paste0("thrown_msg_", fn),
+  assign(x = paste0("thrown_msg.", fn),
         value = unique_call_id(entire_session = entire_session),
         envir = pkg_env)
 }

 message_not_thrown_before <- function(fn, entire_session = FALSE) {
-  is.null(pkg_env[[paste0("thrown_msg_", fn)]]) || !identical(pkg_env[[paste0("thrown_msg_", fn)]], unique_call_id(entire_session))
+  is.null(pkg_env[[paste0("thrown_msg.", fn)]]) || !identical(pkg_env[[paste0("thrown_msg.", fn)]], unique_call_id(entire_session))
 }

 reset_all_thrown_messages <- function() {
  # for unit tests, where the environment and highest system call do not change
  pkg_env_contents <- ls(envir = pkg_env)
-  rm(list = pkg_env_contents[pkg_env_contents %like% "^thrown_msg_"],
+  rm(list = pkg_env_contents[pkg_env_contents %like% "^thrown_msg."],
     envir = pkg_env)
 }

--- a/R/ab.R
+++ b/R/ab.R
@ -160,6 +160,12 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = TRUE, ...) {
      from_text <- character(0)
    }
    
+    # old code for phenoxymethylpenicillin (Peni V)
+    if (x[i] == "PNV") {
+      x_new[i] <- "PHN"
+      next
+    }
+    
    # exact name
    found <- antibiotics[which(AB_lookup$generalised_name == x[i]), ]$ab
    if (length(found) > 0) {
--- a/R/data.R
+++ b/R/data.R
@ -95,7 +95,14 @@
 #' - `source`\cr Either "CoL", "DSMZ" (see Source) or "manually added"
 #' - `prevalence`\cr Prevalence of the microorganism, see [as.mo()]
 #' - `snomed`\cr SNOMED code of the microorganism. Use [mo_snomed()] to retrieve it quickly, see [mo_property()].
-#' @details Manually added were:
+#' @details 
+#' Please note that entries are only based on the Catalogue of Life and the LPSN (see below). Since these sources incorporate entries based on (recent) publications in the International Journal of Systematic and Evolutionary Microbiology (IJSEM), it can happen that the year of publication is sometimes later than one might expect.
+#' 
+#' For example, *Staphylococcus pettenkoferi* was newly named in Diagnostic Microbiology and Infectious Disease in 2002 (PMID 12106949), but it was not before 2007 that a publication in IJSEM followed (PMID 17625191). Consequently, the AMR package returns 2007 for `mo_year("S. pettenkoferi")`.
+#' 
+#' ### Manually additions
+#' For convenience, some entries were added manually:
+#' 
 #' - 11 entries of *Streptococcus* (beta-haemolytic: groups A, B, C, D, F, G, H, K and unspecified; other: viridans, milleri)
 #' - 2 entries of *Staphylococcus* (coagulase-negative (CoNS) and coagulase-positive (CoPS))
 #' - 3 entries of *Trichomonas* (*Trichomonas vaginalis*, and its family and genus)
@ -269,3 +276,21 @@ catalogue_of_life <- list(
 #'   # [1] "Enterococcus casseliflavus" "Enterococcus gallinarum"
 #' }
 "intrinsic_resistant"
+
+#' Data set with treatment dosages as defined by EUCAST
+#'
+#' EUCAST breakpoints used in this package are based on the dosages in this data set. They can be retrieved with [eucast_dosage()].
+#' @format A [data.frame] with `r format(nrow(dosage), big.mark = ",")` observations and `r ncol(dosage)` variables:
+#' - `ab`\cr Antibiotic ID as used in this package (such as `AMC`), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available
+#' - `name`\cr Official name of the antimicrobial agent as used by WHONET/EARS-Net or the WHO
+#' - `type`\cr Type of the dosage, either `r vector_or(dosage$type)`
+#' - `dose`\cr Dose, such as "2 g" or "25 mg/kg"
+#' - `dose_times`\cr Dose, such as "2 g" or "25 mg/kg"
+#' - `administration`\cr Route of administration, either `r vector_or(dosage$administration)`
+#' - `notes`\cr Additional dosage notes
+#' - `original_txt`\cr Original text in the PDF file of EUCAST
+#' - `eucast_version`\cr Version number of the EUCAST Clinical Breakpoints guideline to which these dosages apply
+#' @details `r format_eucast_version_nr(11.0)` are based on the dosages in this data set.
+#' @inheritSection AMR Reference data publicly available
+#' @inheritSection AMR Read more on our website!
+"dosage"
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@ -23,12 +23,16 @@
 # how to conduct AMR analysis: https://msberends.github.io/AMR/        #
 # ==================================================================== #

-# add new version numbers here, and add the rules themselves to "data-raw/eucast_rules.tsv"
-# (running "data-raw/internals.R" will process that TSV file)
-EUCAST_VERSION_BREAKPOINTS <- list("10.0" = list(version_txt = "v10.0",
+# add new version numbers here, and add the rules themselves to "data-raw/eucast_rules.tsv" and rsi_translation
+# (running "data-raw/internals.R" will process the TSV file)
+EUCAST_VERSION_BREAKPOINTS <- list("11.0" = list(version_txt = "v11.0",
+                                                 year = 2021, 
+                                                 title = "'EUCAST Clinical Breakpoint Tables'",
+                                                 url = "https://www.eucast.org/clinical_breakpoints/"),
+                                   "10.0" = list(version_txt = "v10.0",
                                                 year = 2020, 
-                                                 title = "'EUCAST Clinical Breakpoints'",
-                                                 url = "https://www.eucast.org/clinical_breakpoints/"))
+                                                 title = "'EUCAST Clinical Breakpoint Tables'",
+                                                 url = "https://www.eucast.org/ast_of_bacteria/previous_versions_of_documents/"))
 EUCAST_VERSION_EXPERT_RULES <- list("3.1" = list(version_txt = "v3.1",
                                                 year = 2016, 
                                                 title = "'EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes'",
@ -44,17 +48,17 @@ format_eucast_version_nr <- function(version, markdown = TRUE) {
  version <- format(version, nsmall = 1)
  if (markdown == TRUE) {
    paste0("[", lst[[version]]$title, " ", lst[[version]]$version_txt, "](", lst[[version]]$url, ")",
-           " from ", lst[[version]]$year)
+           " (", lst[[version]]$year, ")")
  } else {
    paste0(lst[[version]]$title, " ", lst[[version]]$version_txt,
-           " from ", lst[[version]]$year)
+           " (", lst[[version]]$year, ")")
  }
 }

 #' Apply EUCAST rules
 #' 
 #' @description
-#' Apply rules for clinical breakpoints and intrinsic resistance as defined by the European Committee on Antimicrobial Susceptibility Testing (EUCAST, <https://eucast.org>), see *Source*.
+#' Apply rules for clinical breakpoints and intrinsic resistance as defined by the European Committee on Antimicrobial Susceptibility Testing (EUCAST, <https://eucast.org>), see *Source*. Use [eucast_dosage()] to get advised dosages of a certain bug-drug combination, which is based on the [dosage] data set.
 #' 
 #' To improve the interpretation of the antibiogram before EUCAST rules are applied, some non-EUCAST rules can applied at default, see Details.
 #' @inheritSection lifecycle Stable lifecycle
@ -62,11 +66,12 @@ format_eucast_version_nr <- function(version, markdown = TRUE) {
 #' @param info a logical to indicate whether progress should be printed to the console, defaults to only print while in interactive sessions
 #' @param rules a character vector that specifies which rules should be applied. Must be one or more of `"breakpoints"`, `"expert"`, `"other"`, `"all"`, and defaults to `c("breakpoints", "expert")`. The default value can be set to another value, e.g. using `options(AMR_eucastrules = "all")`.
 #' @param verbose a [logical] to turn Verbose mode on and off (default is off). In Verbose mode, the function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way. Using Verbose mode takes a lot more time.
-#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline. Currently supported: `r paste0(names(EUCAST_VERSION_BREAKPOINTS), collapse = ", ")`.
-#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Currently supported: `r paste0(names(EUCAST_VERSION_EXPERT_RULES), collapse = ", ")`.
+#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline. Can be either `r vector_or(names(EUCAST_VERSION_BREAKPOINTS), reverse = TRUE)`.
+#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Can be either `r vector_or(names(EUCAST_VERSION_EXPERT_RULES), reverse = TRUE)`.
 #' @param ampc_cephalosporin_resistance a character value that should be applied for AmpC de-repressed cephalosporin-resistant mutants, defaults to `NA`. Currently only works when `version_expertrules` is `3.2`; '*EUCAST Expert Rules v3.2 on Enterobacterales*' states that susceptible (S) results of cefotaxime, ceftriaxone and ceftazidime should be reported with a note, or results should be suppressed (emptied) for these agents. A value of `NA` for this argument will remove results for these agents, while e.g. a value of `"R"` will make the results for these agents resistant. Use `NULL` to not alter the results for AmpC de-repressed cephalosporin-resistant mutants. \cr For *EUCAST Expert Rules* v3.2, this rule applies to: *`r gsub("[)(^]", "", gsub("|", ", ", eucast_rules_file[which(eucast_rules_file$reference.version == 3.2 & eucast_rules_file$reference.rule %like% "ampc"), "this_value"][1], fixed = TRUE))`*.
-#' 
 #' @param ... column name of an antibiotic, please see section *Antibiotics* below
+#' @param ab any (vector of) text that can be coerced to a valid antibiotic code with [as.ab()]
+#' @param administration route of administration, either `r vector_or(dosage$administration)`
 #' @inheritParams first_isolate
 #' @details
 #' **Note:** This function does not translate MIC values to RSI values. Use [as.rsi()] for that. \cr
@ -101,6 +106,7 @@ format_eucast_version_nr <- function(version, markdown = TRUE) {
 #' - EUCAST Intrinsic Resistance and Unusual Phenotypes. Version 3.2, 2020. [(link)](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/2020/Intrinsic_Resistance_and_Unusual_Phenotypes_Tables_v3.2_20200225.pdf)
 #' - EUCAST Breakpoint tables for interpretation of MICs and zone diameters. Version 9.0, 2019. [(link)](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/v_9.0_Breakpoint_Tables.xlsx)
 #' - EUCAST Breakpoint tables for interpretation of MICs and zone diameters. Version 10.0, 2020. [(link)](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/v_10.0_Breakpoint_Tables.xlsx)
+#' - EUCAST Breakpoint tables for interpretation of MICs and zone diameters. Version 11.0, 2021. [(link)](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/v_11.0_Breakpoint_Tables.xlsx)
 #' @inheritSection AMR Reference data publicly available
 #' @inheritSection AMR Read more on our website!
 #' @examples
@ -144,12 +150,14 @@ format_eucast_version_nr <- function(version, markdown = TRUE) {
 #' # containing all details about the transformations:
 #' c <- eucast_rules(a, verbose = TRUE)
 #' }
+#' 
+#' eucast_dosage(c("tobra", "genta", "cipro"), "iv")
 eucast_rules <- function(x,
                         col_mo = NULL,
                         info = interactive(),
                         rules = getOption("AMR_eucastrules", default = c("breakpoints", "expert")),
                         verbose = FALSE,
-                         version_breakpoints = 10.0,
+                         version_breakpoints = 11.0,
                         version_expertrules = 3.2,
                         ampc_cephalosporin_resistance = NA,
                         ...) {
@ -1168,3 +1176,26 @@ edit_rsi <- function(x,
  }
  return(track_changes)
 }
+
+#' @rdname eucast_rules
+#' @export
+eucast_dosage <- function(ab, administration = "iv", version_breakpoints = 11.0) {
+  # show used version_breakpoints number once per session (pkg_env will reload every session)
+  if (message_not_thrown_before(paste0("eucast_dosage_v", gsub("[^0-9]", "", version_breakpoints)), entire_session = TRUE)) {
+    message_("Dosages for antimicrobial drugs, as meant for ",
+             format_eucast_version_nr(version_breakpoints, markdown = FALSE), ". ",
+             font_red("This note will be shown once per session."))
+    remember_thrown_message(paste0("eucast_dosage_v", gsub("[^0-9]", "", version_breakpoints)), entire_session = TRUE)
+  }
+  ab <- as.ab(ab)
+  out <- character(length(ab))
+  for (i in seq_len(length(ab))) {
+    df <- data.frame(ab = ab[i], stringsAsFactors = FALSE, administration = administration) %pm>%
+      pm_inner_join(AMR::dosage, by = c("ab", "administration")) %pm>%
+      pm_mutate(txt = paste0(gsub("_", " ", type), ": ", dose_times, "x ", dose, " ", administration), perl = TRUE)
+    out[i] <- paste(df$txt, collapse = ", ")
+  }
+  names(out) <- ab_name(ab, language = NULL)
+  out[out == ""] <- NA_character_
+  out
+}
--- a/R/filter_ab_class.R
+++ b/R/filter_ab_class.R
@ -70,10 +70,11 @@
 #'     filter_aminoglycosides("R", "all") %>%
 #'     filter_fluoroquinolones("R", "all")
 #'   
-#'   # with dplyr 1.0.0 and higher (that adds 'across()'), this is equal:
+#'   # with dplyr 1.0.0 and higher (that adds 'across()'), this is all equal:
 #'   # (though the row names on the first are more correct)
 #'   example_isolates %>% filter_carbapenems("R", "all")
 #'   example_isolates %>% filter(across(carbapenems(), ~. == "R"))
+#'   example_isolates %>% filter(across(carbapenems(), function(x) x == "R"))
 #' }
 #' }
 filter_ab_class <- function(x,
@ -129,7 +130,7 @@ filter_ab_class <- function(x,
  # get the columns with a group names in the chosen ab class
  agents <- ab_in_data[names(ab_in_data) %in% ab_reference$ab]
  if (length(agents) == 0) {
-    message_("NOTE: no antimicrobial agents of class ", ab_group, 
+    message_("no antimicrobial agents of class ", ab_group, 
             " found (such as ", find_ab_names(ab_class, 2), 
             "), data left unchanged.")
    return(x.bak)
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@ -27,7 +27,7 @@
 #'
 #' Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type. To determine patient episodes not necessarily based on microorganisms, use [is_new_episode()] that also supports grouping with the `dplyr` package.
 #' @inheritSection lifecycle Stable lifecycle
-#' @param x a [data.frame] containing isolates. Can be left blank when used inside `dplyr` verbs, such as [`filter()`][dplyr::filter()], [`mutate()`][dplyr::mutate()] and [`summarise()`][dplyr::summarise()].
+#' @param x a [data.frame] containing isolates. Can be left blank for automatic determination.
 #' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column with a date class
 #' @param col_patient_id column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive)
 #' @param col_mo column name of the IDs of the microorganisms (see [as.mo()]), defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()].
--- a/R/isolate_identifier.R
+++ b/R/isolate_identifier.R
@ -0,0 +1,128 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Analysis for R                        #
+#                                                                      #
+# SOURCE                                                               #
+# https://github.com/msberends/AMR                                     #
+#                                                                      #
+# LICENCE                                                              #
+# (c) 2018-2021 Berends MS, Luz CF et al.                              #
+# Developed at the University of Groningen, the Netherlands, in        #
+# collaboration with non-profit organisations Certe Medical            #
+# Diagnostics & Advice, and University Medical Center Groningen.       # 
+#                                                                      #
+# This R package is free software; you can freely use and distribute   #
+# it for both personal and commercial purposes under the terms of the  #
+# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
+# the Free Software Foundation.                                        #
+# We created this package for both routine data analysis and academic  #
+# research and it was publicly released in the hope that it will be    #
+# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
+#                                                                      #
+# Visit our website for the full manual and a complete tutorial about  #
+# how to conduct AMR analysis: https://msberends.github.io/AMR/        #
+# ==================================================================== #
+
+#' Create identifier of an isolate
+#' 
+#' This function will paste the microorganism code with all antimicrobial results into one string for each row in a data set. This is useful to compare isolates, e.g. between institutions or regions, when there is no genotyping available.
+#' @inheritSection lifecycle Maturing lifecycle
+#' @inheritParams eucast_rules
+#' @param cols_ab a character vector of column names of `x`, or (a combination with) an [antibiotic selector function]([ab_class()]), such as [carbapenems()] and [aminoglysides()]
+#' @export
+#' @inheritSection AMR Read more on our website!
+#' @examples 
+#' # automatic selection of microorganism and antibiotics (i.e., all <rsi> columns, see ?as.rsi)
+#' x <- isolate_identifier(example_isolates)
+#' 
+#' # ignore microorganism codes, only use antimicrobial results
+#' x <- isolate_identifier(example_isolates, col_mo = FALSE, cols_ab = c("AMX", "TZP", "GEN", "TOB"))
+#' 
+#' # select antibiotics from certain antibiotic classes
+#' x <- isolate_identifier(example_isolates, cols_ab = c(carbapenems(), aminoglycosides()))
+isolate_identifier <- function(x, col_mo = NULL, cols_ab = NULL) {
+  if (is.null(col_mo)) {
+    col_mo <- search_type_in_df(x, "mo")
+  }
+  if (isFALSE(col_mo)) {
+    # is FALSE then ignore mo column
+    x$col_mo <- ""
+    col_mo <- "col_mo"
+  } else if (!is.null(col_mo)) {
+    x[, col_mo] <- paste0(as.mo(x[, col_mo, drop = TRUE]), "|")
+  }
+  
+  cols_ab <- deparse(substitute(cols_ab)) # support ab class selectors: isolate_identifier(x, cols_ab = carbapenems())
+  if (identical(cols_ab, "NULL")) {
+    cols_ab <- colnames(x)[vapply(FUN.VALUE = logical(1), x, is.rsi)]
+  } else {
+    cols_ab <- tryCatch(colnames(x[, eval(parse(text = cols_ab), envir = parent.frame())]),
+                        # tryCatch adds 4 calls, so total is -5
+                        error = function(e) stop_(e$message, call = -5))
+  }
+  if (length(cols_ab) == 0) {
+    warning_("no columns with antimicrobial agents found", call = TRUE)
+  }
+  
+  out <- x[, c(col_mo, cols_ab), drop = FALSE]
+  out <- do.call(paste, c(out, sep = ""))
+  out <- gsub("NA", ".", out, fixed = TRUE)
+  set_clean_class(out, new_class = c("isolate_identifier", "character"))
+}
+
+#' @method print isolate_identifier
+#' @export
+#' @noRd
+print.isolate_identifier <- function(x, ...) {
+  print(as.character(x), ...)
+}
+
+#' @method [ isolate_identifier
+#' @export
+#' @noRd
+"[.isolate_identifier" <- function(x, ...) {
+  y <- NextMethod()
+  attributes(y) <- attributes(x)
+  y
+}
+#' @method [[ isolate_identifier
+#' @export
+#' @noRd
+"[[.isolate_identifier" <- function(x, ...) {
+  y <- NextMethod()
+  attributes(y) <- attributes(x)
+  y
+}
+#' @method [<- isolate_identifier
+#' @export
+#' @noRd
+"[<-.isolate_identifier" <- function(i, j, ..., value) {
+  y <- NextMethod()
+  attributes(y) <- attributes(i)
+  y
+}
+#' @method [[<- isolate_identifier
+#' @export
+#' @noRd
+"[[<-.isolate_identifier" <- function(i, j, ..., value) {
+  y <- NextMethod()
+  attributes(y) <- attributes(i)
+  y
+}
+#' @method c isolate_identifier
+#' @export
+#' @noRd
+c.isolate_identifier <- function(x, ...) {
+  y <- NextMethod()
+  attributes(y) <- attributes(x)
+  y
+}
+
+#' @method unique isolate_identifier
+#' @export
+#' @noRd
+unique.isolate_identifier <- function(x, incomparables = FALSE, ...) {
+  y <- NextMethod()
+  attributes(y) <- attributes(x)
+  y
+}
--- a/R/mdro.R
+++ b/R/mdro.R
@ -27,7 +27,7 @@
 #'
 #' Determine which isolates are multidrug-resistant organisms (MDRO) according to international and national guidelines.
 #' @inheritSection lifecycle Stable lifecycle
-#' @param x a [data.frame] with antibiotics columns, like `AMX` or `amox`. Can be left blank when used inside `dplyr` verbs, such as [`filter()`][dplyr::filter()], [`mutate()`][dplyr::mutate()] and [`summarise()`][dplyr::summarise()].
+#' @param x a [data.frame] with antibiotics columns, like `AMX` or `amox`. Can be left blank for automatic determination.
 #' @param guideline a specific guideline to follow. When left empty, the publication by Magiorakos *et al.* (2012, Clinical Microbiology and Infection) will be followed, please see *Details*.
 #' @inheritParams eucast_rules
 #' @param pct_required_classes minimal required percentage of antimicrobial classes that must be available per isolate, rounded down. For example, with the default guideline, 17 antimicrobial classes must be available for *S. aureus*. Setting this `pct_required_classes` argument to `0.5` (default) means that for every *S. aureus* isolate at least 8 different classes must be available. Any lower number of available classes will return `NA` for that isolate.
--- a/R/mo.R
+++ b/R/mo.R
@ -756,7 +756,7 @@ exec_as.mo <- function(x,
        x[i] <- lookup(mo == "B_STRPT_HAEM", uncertainty = -1)
        next
      }
-      # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ----
+      # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
      if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] negatie?[vf]"
          | x_trimmed[i] %like_case% "[ck]oagulas[ea] negatie?[vf]"
          | x_backup_without_spp[i] %like_case% "[ck]o?ns[^a-z]?$") {
@ -841,8 +841,17 @@ exec_as.mo <- function(x,
        x[i] <- lookup(fullname == "Streptococcus pneumoniae", uncertainty = -1)
        next
      }
-      # }
-
+      
+      if (x_backup[i] %in% pkg_env$mo_failed) {
+        # previously failed already in this session ----
+        # (at this point the latest reference_df has also be checked)
+        x[i] <- lookup(mo == "UNKNOWN")
+        if (initial_search == TRUE) {
+          failures <- c(failures, x_backup[i])
+        }
+        next
+      }
+      
      # NOW RUN THROUGH DIFFERENT PREVALENCE LEVELS
      check_per_prevalence <- function(data_to_check,
                                       data.old_to_check,
@ -1397,6 +1406,7 @@ exec_as.mo <- function(x,
  failures <- failures[!failures %in% c(NA, NULL, NaN)]
  if (length(failures) > 0 & initial_search == TRUE) {
    pkg_env$mo_failures <- sort(unique(failures))
+    pkg_env$mo_failed <- c(pkg_env$mo_failed, pkg_env$mo_failures)
    plural <- c("value", "it", "was")
    if (pm_n_distinct(failures) > 1) {
      plural <- c("values", "them", "were")
@ -1412,7 +1422,7 @@ exec_as.mo <- function(x,
    }
    msg <- paste0(msg,
                  ".\nUse mo_failures() to review ", plural[2], ". Edit the `allow_uncertain` argument if needed (see ?as.mo).\n",
-                  "You can also use your own reference data, e.g.:\n",
+                  "You can also use your own reference data with set_mo_source() or directly, e.g.:\n",
                  '  as.mo("mycode", reference_df = data.frame(own = "mycode", mo = "', MO_lookup$mo[match("Escherichia coli", MO_lookup$fullname)], '"))\n',
                  '  mo_name("mycode", reference_df = data.frame(own = "mycode", mo = "', MO_lookup$mo[match("Escherichia coli", MO_lookup$fullname)], '"))\n')
    warning_(paste0("\n", msg),
@ -1430,7 +1440,7 @@ exec_as.mo <- function(x,
      plural <- c("s", "them", "were")
    }
    msg <- paste0("Translation to ", nr2char(length(uncertainties$input)), " microorganism", plural[1],
-                  " ", plural[3], " guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".")
+                  " was guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".")
    message_(msg)
  }

@ -1960,12 +1970,12 @@ replace_old_mo_codes <- function(x, property) {
    x[which(!is.na(matched))] <- mo_new[which(!is.na(matched))]
    n_matched <- length(matched[!is.na(matched)])
    if (property != "mo") {
-      message_(font_blue("NOTE: The input contained old microbial codes (from previous package versions). Please update your MO codes with as.mo()."))
+      message_(font_blue("The input contained old microbial codes (from previous package versions). Please update your MO codes with as.mo()."))
    } else {
      if (n_matched == 1) {
-        message_(font_blue("NOTE: 1 old microbial code (from previous package versions) was updated to a current used MO code."))
+        message_(font_blue("1 old microbial code (from previous package versions) was updated to a current used MO code."))
      } else {
-        message_(font_blue("NOTE:", n_matched, "old microbial codes (from previous package versions) were updated to current used MO codes.")) 
+        message_(font_blue(n_matched, "old microbial codes (from previous package versions) were updated to current used MO codes.")) 
      }
    }
  }
--- a/R/mo_matching_score.R
+++ b/R/mo_matching_score.R
@ -33,21 +33,23 @@
 #' @section Matching score for microorganisms:
 #' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as:
 #' 
-#' \deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}
+#' \ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300px" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}}
 #' 
 #' where:
 #' 
-#' * \eqn{x} is the user input;
-#' * \eqn{n} is a taxonomic name (genus, species, and subspecies);
-#' * \eqn{l_n}{l_n} is the length of \eqn{n};
-#' * lev is the [Levenshtein distance function](https://en.wikipedia.org/wiki/Levenshtein_distance), which counts any insertion, deletion and substitution as 1 that is needed to change \eqn{x} into \eqn{n};
-#' * \eqn{p_n}{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below;
-#' * \eqn{k_n}{p_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.
+#' * \ifelse{html}{\out{<i>x</i> is the user input;}}{\eqn{x} is the user input;}
+#' * \ifelse{html}{\out{<i>n</i> is a taxonomic name (genus, species, and subspecies);}}{\eqn{n} is a taxonomic name (genus, species, and subspecies);}
+#' * \ifelse{html}{\out{<i>l<sub>n</sub></i> is the length of <i>n</i>;}}{l_n is the length of \eqn{n};}
+#' * \ifelse{html}{\out{<i>lev</i> is the <a href="https://en.wikipedia.org/wiki/Levenshtein_distance">Levenshtein distance function</a>, which counts any insertion, deletion and substitution as 1 that is needed to change <i>x</i> into <i>n</i>;}}{lev is the Levenshtein distance function, which counts any insertion, deletion and substitution as 1 that is needed to change \eqn{x} into \eqn{n};}
+#' * \ifelse{html}{\out{<i>p<sub>n</sub></i> is the human pathogenic prevalence group of <i>n</i>, as described below;}}{p_n is the human pathogenic prevalence group of \eqn{n}, as described below;}
+#' * \ifelse{html}{\out{<i>k<sub>n</sub></i> is the taxonomic kingdom of <i>n</i>, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.}}{l_n is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.}
 #' 
 #' The grouping into human pathogenic prevalence (\eqn{p}) is based on experience from several microbiological laboratories in the Netherlands in conjunction with international reports on pathogen prevalence. **Group 1** (most prevalent microorganisms) consists of all microorganisms where the taxonomic class is Gammaproteobacteria or where the taxonomic genus is *Enterococcus*, *Staphylococcus* or *Streptococcus*. This group consequently contains all common Gram-negative bacteria, such as *Pseudomonas* and *Legionella* and all species within the order Enterobacterales. **Group 2** consists of all microorganisms where the taxonomic phylum is Proteobacteria, Firmicutes, Actinobacteria or Sarcomastigophora, or where the taxonomic genus is *Absidia*, *Acremonium*, *Actinotignum*, *Alternaria*, *Anaerosalibacter*, *Apophysomyces*, *Arachnia*, *Aspergillus*, *Aureobacterium*, *Aureobasidium*, *Bacteroides*, *Basidiobolus*, *Beauveria*, *Blastocystis*, *Branhamella*, *Calymmatobacterium*, *Candida*, *Capnocytophaga*, *Catabacter*, *Chaetomium*, *Chryseobacterium*, *Chryseomonas*, *Chrysonilia*, *Cladophialophora*, *Cladosporium*, *Conidiobolus*, *Cryptococcus*, *Curvularia*, *Exophiala*, *Exserohilum*, *Flavobacterium*, *Fonsecaea*, *Fusarium*, *Fusobacterium*, *Hendersonula*, *Hypomyces*, *Koserella*, *Lelliottia*, *Leptosphaeria*, *Leptotrichia*, *Malassezia*, *Malbranchea*, *Mortierella*, *Mucor*, *Mycocentrospora*, *Mycoplasma*, *Nectria*, *Ochroconis*, *Oidiodendron*, *Phoma*, *Piedraia*, *Pithomyces*, *Pityrosporum*, *Prevotella*,\\*Pseudallescheria*, *Rhizomucor*, *Rhizopus*, *Rhodotorula*, *Scolecobasidium*, *Scopulariopsis*, *Scytalidium*,*Sporobolomyces*, *Stachybotrys*, *Stomatococcus*, *Treponema*, *Trichoderma*, *Trichophyton*, *Trichosporon*, *Tritirachium* or *Ureaplasma*. **Group 3** consists of all other microorganisms.
 #' 
 #' All matches are sorted descending on their matching score and for all user input values, the top match will be returned. This will lead to the effect that e.g., `"E. coli"` will return the microbial ID of *Escherichia coli* (\eqn{m = `r round(mo_matching_score("E. coli", "Escherichia coli"), 3)`}, a highly prevalent microorganism found in humans) and not *Entamoeba coli* (\eqn{m = `r round(mo_matching_score("E. coli", "Entamoeba coli"), 3)`}, a less prevalent microorganism in humans), although the latter would alphabetically come first. 
 #' @export
+#' @inheritSection AMR Reference data publicly available
+#' @inheritSection AMR Read more on our website!
 #' @examples 
 #' as.mo("E. coli")
 #' mo_uncertainties()
--- a/R/mo_property.R
+++ b/R/mo_property.R
@ -27,7 +27,7 @@
 #'
 #' Use these functions to return a specific property of a microorganism based on the latest accepted taxonomy. All input values will be evaluated internally with [as.mo()], which makes it possible to use microbial abbreviations, codes and names as input. Please see *Examples*.
 #' @inheritSection lifecycle Stable lifecycle
-#' @param x any character (vector) that can be coerced to a valid microorganism code with [as.mo()]. Can be left blank for auto-guessing the column containing microorganism codes when used inside `dplyr` verbs, such as [`filter()`][dplyr::filter()], [`mutate()`][dplyr::mutate()] and [`summarise()`][dplyr::summarise()], please see *Examples*.
+#' @param x any character (vector) that can be coerced to a valid microorganism code with [as.mo()]. Can be left blank for auto-guessing the column containing microorganism codes if used in a data set, please see *Examples*.
 #' @param property one of the column names of the [microorganisms] data set: `r paste0('"``', colnames(microorganisms), '\``"', collapse = ", ")`, or must be `"shortname"`
 #' @param language language of the returned text, defaults to system language (see [get_locale()]) and can be overwritten by setting the option `AMR_locale`, e.g. `options(AMR_locale = "de")`, see [translate]. Also used to translate text like "no growth". Use `language = NULL` or `language = ""` to prevent translation.
 #' @param ... other arguments passed on to [as.mo()], such as 'allow_uncertain' and 'ignore_pattern'
@ -44,6 +44,8 @@
 #'
 #' The Gram stain - [mo_gramstain()] - will be determined based on the taxonomic kingdom and phylum. According to Cavalier-Smith (2002, [PMID 11837318](https://pubmed.ncbi.nlm.nih.gov/11837318)), who defined subkingdoms Negibacteria and Posibacteria, only these phyla are Posibacteria: Actinobacteria, Chloroflexi, Firmicutes and Tenericutes. These bacteria are considered Gram-positive - all other bacteria are considered Gram-negative. Species outside the kingdom of Bacteria will return a value `NA`. Functions [mo_is_gram_negative()] and [mo_is_gram_positive()] always return `TRUE` or `FALSE` (except when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria.
 #' 
+#' Determination of yeasts - [mo_is_yeast()] - will be based on the taxonomic phylum, class and order. Budding yeasts are true fungi of the phylum Ascomycetes, class Saccharomycetes (also called Hemiascomycetes). The true yeasts are separated into one main order Saccharomycetales. For all microorganisms that are in one of those two groups, the function will return `TRUE`. It returns `FALSE` for all other taxonomic entries.
+#' 
 #' Intrinsic resistance - [mo_is_intrinsic_resistant()] - will be determined based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.2)`. The [mo_is_intrinsic_resistant()] can be vectorised over arguments `x` (input for microorganisms) and over `ab` (input for antibiotics).
 #'
 #' All output will be [translate]d where possible.
@ -145,6 +147,8 @@
 #'
 #' # other --------------------------------------------------------------------
 #' 
+#' mo_is_yeast(c("Candida", "E. coli"))      # TRUE, FALSE
+#' 
 #' # gram stains and intrinsic resistance can also be used as a filter in dplyr verbs
 #' if (require("dplyr")) {
 #'   example_isolates %>%
@ -331,7 +335,10 @@ mo_type <- function(x, language = get_locale(), ...) {
  meet_criteria(x, allow_NA = TRUE)
  meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE)
  
-  translate_AMR(mo_validate(x = x, property = "kingdom", language = language, ...), language = language, only_unknown = FALSE)
+  x.mo <- as.mo(x, language = language, ...)
+  out <- mo_kingdom(x.mo, language = NULL)
+  out[which(mo_is_yeast(x.mo))] <- "Yeasts"
+  translate_AMR(out, language = language, only_unknown = FALSE)
 }

 #' @rdname mo_property
@ -410,6 +417,33 @@ mo_is_gram_positive <- function(x, language = get_locale(), ...) {
  out
 }

+#' @rdname mo_property
+#' @export
+mo_is_yeast <- function(x, language = get_locale(), ...) {
+  if (missing(x)) {
+    # this tries to find the data and an <mo> column
+    x <- find_mo_col(fn = "mo_is_yeast")
+  }
+  meet_criteria(x, allow_NA = TRUE)
+  meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE)
+  
+  x.mo <- as.mo(x, language = language, ...)
+  metadata <- get_mo_failures_uncertainties_renamed()
+  
+  x.kingdom <- mo_kingdom(x.mo, language = NULL)
+  x.phylum <- mo_phylum(x.mo, language = NULL)
+  x.class <- mo_class(x.mo, language = NULL)
+  x.order <- mo_order(x.mo, language = NULL)
+  
+  load_mo_failures_uncertainties_renamed(metadata)
+  
+  out <- rep(FALSE, length(x))
+  out[x.kingdom == "Fungi" &
+        ((x.phylum == "Ascomycetes" & x.class == "Saccharomycetes") | x.order == "Saccharomycetales")] <- TRUE
+  out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA
+  out
+}
+
 #' @rdname mo_property
 #' @export
 mo_is_intrinsic_resistant <- function(x, ab, language = get_locale(), ...) {
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/R/zzz.R
+++ b/R/zzz.R
@ -25,6 +25,7 @@

 # set up package environment, used by numerous AMR functions
 pkg_env <- new.env(hash = FALSE)
+pkg_env$mo_failed <- character(0)

 .onLoad <- function(libname, pkgname) {