Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115

2026-07-24 19:10:54 +02:00 · 2023-06-09 22:30:43 +02:00
parent 9591688811
commit 89c447b290
47 changed files with 42982 additions and 18373 deletions
--- a/R/aa_amr-package.R
+++ b/R/aa_amr-package.R
@@ -36,7 +36,7 @@
 #'
 #' This work was published in the Journal of Statistical Software (Volume 104(3); \doi{jss.v104.i03}) and formed the basis of two PhD theses (\doi{10.33612/diss.177417131} and \doi{10.33612/diss.192486375}).
 #'
-#' After installing this package, R knows [**`r format_included_data_number(AMR::microorganisms)` microorganisms**](https://msberends.github.io/AMR/reference/microorganisms.html) (updated `r format(TAXONOMY_VERSION$GBIF$accessed_date, "%B %Y")`) and all [**`r format_included_data_number(nrow(AMR::antibiotics) + nrow(AMR::antivirals))` antibiotic, antimycotic and antiviral drugs**](https://msberends.github.io/AMR/reference/antibiotics.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid SIR and MIC values. The integral breakpoint guidelines from CLSI and EUCAST are included from the last 10 years. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl).
+#' After installing this package, R knows [**`r format_included_data_number(AMR::microorganisms)` microorganisms**](https://msberends.github.io/AMR/reference/microorganisms.html) (updated `r format(TAXONOMY_VERSION$GBIF$accessed_date, "%B %Y")`) and all [**`r format_included_data_number(nrow(AMR::antibiotics) + nrow(AMR::antivirals))` antibiotic, antimycotic and antiviral drugs**](https://msberends.github.io/AMR/reference/antibiotics.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid SIR and MIC values. The integral clinical breakpoint guidelines from CLSI and EUCAST are included, even with epidemiological cut-off (ECOFF) values. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl).
 #'
 #' The `AMR` package is available in `r vector_and(vapply(FUN.VALUE = character(1), LANGUAGES_SUPPORTED_NAMES, function(x) x$exonym), quotes = FALSE, sort = FALSE)`. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages.
 #' @section Reference Data Publicly Available:
--- a/R/aa_globals.R
+++ b/R/aa_globals.R
@@ -30,6 +30,12 @@
 # add new version numbers here, and add the rules themselves to "data-raw/eucast_rules.tsv" and clinical_breakpoints
 # (sourcing "data-raw/_pre_commit_hook.R" will process the TSV file)
 EUCAST_VERSION_BREAKPOINTS <- list(
+  "13.0" = list(
+    version_txt = "v13.0",
+    year = 2023,
+    title = "'EUCAST Clinical Breakpoint Tables'",
+    url = "https://www.eucast.org/clinical_breakpoints/"
+  ),
  "12.0" = list(
    version_txt = "v12.0",
    year = 2022,
@@ -50,10 +56,16 @@ EUCAST_VERSION_BREAKPOINTS <- list(
  )
 )
 EUCAST_VERSION_EXPERT_RULES <- list(
-  "3.1" = list(
-    version_txt = "v3.1",
-    year = 2016,
-    title = "'EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes'",
+  "1.2" = list(
+    version_txt = "v3.3",
+    year = 2023,
+    title = "'EUCAST Expert Rules' and 'Expected Resistant Phenotypes'",
+    url = "https://www.eucast.org/expert_rules_and_expected_phenotypes/"
+  ),
+  "3.3" = list(
+    version_txt = "v3.3",
+    year = 2021,
+    title = "'EUCAST Expert Rules' and 'EUCAST Intrinsic Resistance and Unusual Phenotypes'",
    url = "https://www.eucast.org/expert_rules_and_expected_phenotypes/"
  ),
  "3.2" = list(
@@ -62,10 +74,10 @@ EUCAST_VERSION_EXPERT_RULES <- list(
    title = "'EUCAST Expert Rules' and 'EUCAST Intrinsic Resistance and Unusual Phenotypes'",
    url = "https://www.eucast.org/expert_rules_and_expected_phenotypes/"
  ),
-  "3.3" = list(
-    version_txt = "v3.3",
-    year = 2021,
-    title = "'EUCAST Expert Rules' and 'EUCAST Intrinsic Resistance and Unusual Phenotypes'",
+  "3.1" = list(
+    version_txt = "v3.1",
+    year = 2016,
+    title = "'EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes'",
    url = "https://www.eucast.org/expert_rules_and_expected_phenotypes/"
  )
 )
--- a/R/ab_selectors.R
+++ b/R/ab_selectors.R
@@ -620,7 +620,7 @@ administrable_iv <- function(only_sir_columns = FALSE, ...) {

 #' @rdname antibiotic_class_selectors
 #' @inheritParams eucast_rules
-#' @details The [not_intrinsic_resistant()] function can be used to only select antibiotic columns that pose no intrinsic resistance for the microorganisms in the data set. For example, if a data set contains only microorganism codes or names of *E. coli* and *K. pneumoniae* and contains a column "vancomycin", this column will be removed (or rather, unselected) using this function. It currently applies `r format_eucast_version_nr(names(EUCAST_VERSION_EXPERT_RULES[length(EUCAST_VERSION_EXPERT_RULES)]))` to determine intrinsic resistance, using the [eucast_rules()] function internally. Because of this determination, this function is quite slow in terms of performance.
+#' @details The [not_intrinsic_resistant()] function can be used to only select antibiotic columns that pose no intrinsic resistance for the microorganisms in the data set. For example, if a data set contains only microorganism codes or names of *E. coli* and *K. pneumoniae* and contains a column "vancomycin", this column will be removed (or rather, unselected) using this function. It currently applies `r format_eucast_version_nr(names(EUCAST_VERSION_EXPERT_RULES[1]))` to determine intrinsic resistance, using the [eucast_rules()] function internally. Because of this determination, this function is quite slow in terms of performance.
 #' @export
 not_intrinsic_resistant <- function(only_sir_columns = FALSE, col_mo = NULL, version_expertrules = 3.3, ...) {
  meet_criteria(only_sir_columns, allow_class = "logical", has_length = 1)
--- a/R/data.R
+++ b/R/data.R
@@ -257,8 +257,11 @@
 #' - `disk_dose`\cr Dose of the used disk diffusion method
 #' - `breakpoint_S`\cr Lowest MIC value or highest number of millimetres that leads to "S"
 #' - `breakpoint_R`\cr Highest MIC value or lowest number of millimetres that leads to "R"
+#' - `ecoff`\cr Epidemiological cut-off (ECOFF) value, used in antimicrobial susceptibility testing to differentiate between wild-type and non-wild-type strains of bacteria or fungi (use [as.sir(..., ecoff = TRUE)] to interpret raw data using ECOFF values)
 #' - `uti`\cr A [logical] value (`TRUE`/`FALSE`) to indicate whether the rule applies to a urinary tract infection (UTI)
 #' @details
+#' Clinical breakpoints are validated through [WHONET](https://whonet.org), a free desktop Windows application developed and supported by the WHO Collaborating Centre for Surveillance of Antimicrobial Resistance. More can be read on [their website](https://whonet.org).
+#' 
 #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw).
 #'
 #' They **allow for machine reading EUCAST and CLSI guidelines**, which is almost impossible with the MS Excel and PDF files distributed by EUCAST and CLSI.
@@ -296,11 +299,8 @@
 #' - `administration`\cr Route of administration, either `r vector_or(dosage$administration)`
 #' - `notes`\cr Additional dosage notes
 #' - `original_txt`\cr Original text in the PDF file of EUCAST
-#' - `eucast_version`\cr Version number of the EUCAST Clinical Breakpoints guideline to which these dosages apply
+#' - `eucast_version`\cr Version number of the EUCAST Clinical Breakpoints guideline to which these dosages apply, either `r vector_or(dosage$eucast_version, quotes = FALSE, sort = TRUE, reverse = TRUE)`
 #' @details
-#' This data set is based on `r format_eucast_version_nr(12.0)` and `r format_eucast_version_nr(11.0)`.
-#'
-#' ### Direct download
 #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw).
 #' @examples
 #' dosage
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@@ -1185,7 +1185,7 @@ edit_sir <- function(x,

 #' @rdname eucast_rules
 #' @export
-eucast_dosage <- function(ab, administration = "iv", version_breakpoints = 12.0) {
+eucast_dosage <- function(ab, administration = "iv", version_breakpoints = 13.0) {
  meet_criteria(ab, allow_class = c("character", "numeric", "integer", "factor"))
  meet_criteria(administration, allow_class = "character", is_in = dosage$administration[!is.na(dosage$administration)], has_length = 1)
  meet_criteria(version_breakpoints, allow_class = c("numeric", "integer"), has_length = 1, is_in = as.double(names(EUCAST_VERSION_BREAKPOINTS)))
--- a/R/mo.R
+++ b/R/mo.R
@@ -171,10 +171,11 @@ as.mo <- function(x,
  meet_criteria(info, allow_class = "logical", has_length = 1)

  add_MO_lookup_to_AMR_env()
-
-  if (tryCatch(all(x %in% c(AMR_env$MO_lookup$mo, NA)) &&
-    isFALSE(Becker) &&
-    isFALSE(Lancefield), error = function(e) FALSE)) {
+  
+  if (tryCatch(all(x %in% c(AMR_env$MO_lookup$mo, NA)), error = function(e) FALSE) &&
+      isFALSE(Becker) &&
+      isFALSE(Lancefield) &&
+      isTRUE(keep_synonyms)) {
    # don't look into valid MO codes, just return them
    # is.mo() won't work - MO codes might change between package versions
    return(set_clean_class(x, new_class = c("mo", "character")))
@@ -297,15 +298,19 @@ as.mo <- function(x,
      } else if (length(x_parts) > 3) {
        first_chars <- paste0("(^| )[", paste(substr(x_parts, 1, 1), collapse = ""), "]")
        filtr <- which(AMR_env$MO_lookup$full_first %like_case% first_chars)
+      } else if (nchar(x_out) == 3) {
+        # no space and 3 characters - probably a code such as SAU or ECO
+        msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", substr(x_out, 1, 1), AMR_env$dots, " ", substr(x_out, 2, 3), AMR_env$dots, "\""))
+        filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 1), ".* ", substr(x_out, 2, 3)))
      } else if (nchar(x_out) == 4) {
        # no space and 4 characters - probably a code such as STAU or ESCO
-        msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(substr(x_out, 1, 2), substr(x_out, 3, 4)), sort = FALSE)))
+        msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", substr(x_out, 1, 2), AMR_env$dots, " ", substr(x_out, 3, 4), AMR_env$dots, "\""))
        filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 2), ".* ", substr(x_out, 3, 4)))
      } else if (nchar(x_out) <= 6) {
        # no space and 5-6 characters - probably a code such as STAAUR or ESCCOL
        first_part <- paste0(substr(x_out, 1, 2), "[a-z]*", substr(x_out, 3, 3))
        second_part <- substr(x_out, 4, nchar(x_out))
-        msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(gsub("[a-z]*", "(...)", first_part, fixed = TRUE), second_part), sort = FALSE)))
+        msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", gsub("[a-z]*", AMR_env$dots, first_part, fixed = TRUE), " ", second_part, AMR_env$dots, "\""))
        filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", first_part, ".* ", second_part))
      } else {
        # for genus or species or subspecies
@@ -328,15 +333,18 @@ as.mo <- function(x,
        minimum_matching_score_current <- minimum_matching_score_current / AMR_env$MO_lookup$prevalence[match(mo_to_search, AMR_env$MO_lookup$fullname)]
        # correct back for kingdom
        minimum_matching_score_current <- minimum_matching_score_current / AMR_env$MO_lookup$kingdom_index[match(mo_to_search, AMR_env$MO_lookup$fullname)]
+        minimum_matching_score_current <- pmax(minimum_matching_score_current, m)
+        if (all(m <= 0.55, na.rm = TRUE)) {
+          # if the highest score is 0.5, we have nothing serious - 0.5 is the lowest for pathogenic group 1
+          # make everything NA so the results will get removed below
+          m[seq_len(length(m))] <- NA_real_
+        }
      } else {
+        # minimum_matching_score was set, so remove everything below it
+        m[m < minimum_matching_score] <- NA_real_
        minimum_matching_score_current <- minimum_matching_score
      }

-      if (sum(m >= minimum_matching_score_current) > 10) {
-        # at least 10 are left over, make the ones under `m` NA
-        m[m < minimum_matching_score_current] <- NA_real_
-      }
-
      top_hits <- mo_to_search[order(m, decreasing = TRUE, na.last = NA)] # na.last = NA will remove the NAs
      if (length(top_hits) == 0) {
        warning_("No hits found for \"", x_search, "\" with minimum_matching_score = ", ifelse(is.null(minimum_matching_score), paste0("NULL (=", round(min(minimum_matching_score_current, na.rm = TRUE), 3), ")"), minimum_matching_score), ". Try setting this value lower or even to 0.", call = FALSE)
@@ -815,7 +823,7 @@ rep.mo <- function(x, ...) {
 print.mo_uncertainties <- function(x, n = 10, ...) {
  more_than_50 <- FALSE
  if (NROW(x) == 0) {
-    cat(word_wrap("No uncertainties to show. Only uncertainties of the last call of `as.mo()` or any `mo_*()` function are stored.\n\n", add_fn = font_blue))
+    cat(word_wrap("No uncertainties to show. Only uncertainties of the last call to `as.mo()` or any `mo_*()` function are stored.\n\n", add_fn = font_blue))
    return(invisible(NULL))
  } else if (NROW(x) > 50) {
    more_than_50 <- TRUE
@@ -833,20 +841,20 @@ print.mo_uncertainties <- function(x, n = 10, ...) {

  if (has_colour()) {
    cat(word_wrap("Colour keys: ",
-      col_red(" 0.000-0.499 "),
-      col_orange(" 0.500-0.599 "),
-      col_yellow(" 0.600-0.699 "),
-      col_green(" 0.700-1.000"),
+      col_red(" 0.000-0.549 "),
+      col_orange(" 0.550-0.649 "),
+      col_yellow(" 0.650-0.749 "),
+      col_green(" 0.750-1.000"),
      add_fn = font_blue
    ), font_green_bg(" "), "\n", sep = "")
  }

  score_set_colour <- function(text, scores) {
    # set colours to scores
-    text[scores >= 0.7] <- col_green(text[scores >= 0.7])
-    text[scores >= 0.6 & scores < 0.7] <- col_yellow(text[scores >= 0.6 & scores < 0.7])
-    text[scores >= 0.5 & scores < 0.6] <- col_orange(text[scores >= 0.5 & scores < 0.6])
-    text[scores < 0.5] <- col_red(text[scores < 0.5])
+    text[scores >= 0.75] <- col_green(text[scores >= 0.75])
+    text[scores >= 0.65 & scores < 0.75] <- col_yellow(text[scores >= 0.65 & scores < 0.75])
+    text[scores >= 0.55 & scores < 0.65] <- col_orange(text[scores >= 0.55 & scores < 0.65])
+    text[scores < 0.55] <- col_red(text[scores < 0.55])
    text
  }

--- a/R/zzz.R
+++ b/R/zzz.R
@@ -83,9 +83,11 @@ if (utf8_supported && !is_latex) {
  # \u2139 is a symbol officially named 'information source'
  AMR_env$info_icon <- "\u2139"
  AMR_env$bullet_icon <- "\u2022"
+  AMR_env$dots <- "\u2026"
 } else {
  AMR_env$info_icon <- "i"
  AMR_env$bullet_icon <- "*"
+  AMR_env$dots <- "..."
 }

 .onLoad <- function(lib, pkg) {