Fix for adding MO's

2025-08-15 16:03:09 +02:00 · 2023-01-14 17:10:10 +01:00
parent 33f269e2f4
commit ca79068604
13 changed files with 147 additions and 86 deletions
--- a/4
+++ b/4
@@ -1,6 +1,6 @@
 Package: AMR
-Version: 1.8.2.9091
-Date: 2023-01-12
+Version: 1.8.2.9092
+Date: 2023-01-14
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
  data analysis and to work with microbial and antimicrobial properties by
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# AMR 1.8.2.9091
+# AMR 1.8.2.9092

 *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)*

--- a/R/add_custom_antimicrobials.R
+++ b/R/add_custom_antimicrobials.R
--- a/R/add_custom_microorganisms.R
+++ b/R/add_custom_microorganisms.R
@@ -30,7 +30,7 @@
 #' Add Custom Microorganisms to This Package
 #'
 #' With [add_custom_microorganisms()] you can add your own custom microorganisms to the `AMR` package, such the non-taxonomic outcome of laboratory analysis.
-#' @param x a [data.frame] resembling the [microorganisms] data set, at least containing columns "genus" and "species"
+#' @param x a [data.frame] resembling the [microorganisms] data set, at least containing column "genus" (case-insensitive)
 #' @details This function will fill in missing taxonomy for you, if specific taxonomic columns are missing, see *Examples*.
 #' 
 #' **Important:** Due to how \R works, the [add_custom_microorganisms()] function has to be run in every \R session - added microorganisms are not stored between sessions and are thus lost when \R is exited. 
@@ -39,7 +39,7 @@
 #' 
 #' **Method 1:** Save the microorganisms to a local or remote file (can even be the internet). To use this method:
 #' 
-#'    1. Create a data set in the structure of the [microorganisms] data set (containing at the very least columns "genus" and "species") and save it with [saveRDS()] to a location of choice, e.g. `"~/my_custom_mo.rds"`, or any remote location.
+#'    1. Create a data set in the structure of the [microorganisms] data set (containing at the very least column "genus") and save it with [saveRDS()] to a location of choice, e.g. `"~/my_custom_mo.rds"`, or any remote location.
 #'    
 #'    2. Set the file location to the `AMR_custom_mo` \R option: `options(AMR_custom_mo = "~/my_custom_mo.rds")`. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the `.Rprofile` file so that it will loaded on start-up of \R. To do this, open the `.Rprofile` file using e.g. `utils::file.edit("~/.Rprofile")`, add this text and save the file:
 #'
@@ -98,16 +98,27 @@
 #' mo_gramstain("Enterobacter asburiae/cloacae")
 #'
 #' mo_info("Enterobacter asburiae/cloacae")
+#' 
+#' 
+#' # the function tries to be forgiving:
+#' add_custom_microorganisms(
+#'   data.frame(GENUS = "ESCHERICHIA / KLEBSIELLA",
+#'              SPECIES = "SPECIES"))
+#' mo_name("ESCHERICHIA / KLEBSIELLA")
+#' mo_family("Escherichia/Klebsiella")
+#' 
+#' add_custom_microorganisms(
+#'   data.frame(genus = "Citrobacter", species = "freundii complex"))
+#' mo_name("C. freundii complex")
+#' mo_gramstain("C. freundii complex")
 #' }
 add_custom_microorganisms <- function(x) {
  meet_criteria(x, allow_class = "data.frame")
-  stop_ifnot(
-    all(c("genus", "species") %in% colnames(x)),
-    paste0("`x` must contain columns ", vector_and(c("genus", "species"), sort = FALSE), ".")
-  )
+  stop_ifnot("genus" %in% tolower(colnames(x)), paste0("`x` must contain column 'genus'."))
  
  # remove any extra class/type, such as grouped tbl, or data.table:
  x <- as.data.frame(x, stringsAsFactors = FALSE)
+  colnames(x) <- tolower(colnames(x))
  # rename 'name' to 'fullname' if it's in the data set
  if ("name" %in% colnames(x) && !"fullname" %in% colnames(x)) {
    colnames(x)[colnames(x) == "name"] <- "fullname"
@@ -116,21 +127,30 @@ add_custom_microorganisms <- function(x) {
  x <- x[, colnames(AMR_env$MO_lookup)[colnames(AMR_env$MO_lookup) %in% colnames(x)], drop = FALSE]
  
  # clean the input ----
-  if (!"subspecies" %in% colnames(x)) {
-    x$subspecies <- NA_character_
+  for (col in c("genus", "species", "subspecies")) {
+    if (!col %in% colnames(x)) {
+      x[, col] <- ""
+    }
+    if (is.factor(x[, col, drop = TRUE])) {
+      x[, col] <- as.character(x[, col, drop = TRUE])
+    }
+    col_ <- x[, col, drop = TRUE]
+    col_ <- tolower(trimws2(col_))
+    col_[col_ %like% "(sub)?species"] <- ""
+    col_ <- gsub(" *([/-]) *", "\\1", col_, perl = TRUE)
+    # groups are in our taxonomic table with a capital G, and complexes might be added by the user
+    col_ <- gsub(" group( |$)", " Group\\1", col_, perl = TRUE)
+    col_ <- gsub(" complex( |$)", " Complex\\1", col_, perl = TRUE)
+    
+    col_[is.na(col_)] <- ""
+    if (col == "genus") {
+      substr(col_, 1, 1) <- toupper(substr(col_, 1, 1))
+      col_ <- gsub("/([a-z])", "/\\U\\1", col_, perl = TRUE)
+      stop_if(any(col_ == ""), "the 'genus' column cannot be empty")
+      stop_if(any(col_ %like% " "), "the 'genus' column must not contain spaces")
+    }
+    x[, col] <- col_
  }
-  x$genus <- trimws2(x$genus)
-  x$species <- trimws2(x$species)
-  x$subspecies <- trimws2(x$subspecies)
-  x$genus[is.na(x$genus)] <- ""
-  x$species[is.na(x$species)] <- ""
-  x$subspecies[is.na(x$subspecies)] <- ""
-  stop_if(any(x$genus %like% " "),
-          "the 'genus' column must not contain spaces")
-  stop_if(any(x$species %like% " "),
-          "the 'species' column must not contain spaces")
-  stop_if(any(x$subspecies %like% " "),
-          "the 'subspecies' column must not contain spaces")
  
  if ("rank" %in% colnames(x)) {
    stop_ifnot(all(x$rank %in% AMR_env$MO_lookup$rank),
@@ -139,7 +159,7 @@ add_custom_microorganisms <- function(x) {
    x$rank <- ifelse(x$subspecies != "", "subspecies",
                     ifelse(x$species != "", "species",
                            ifelse(x$genus != "", "genus",
-                                   stop("in add_custom_microorganisms(): the 'genus' column cannot be empty",
+                                   stop("in add_custom_microorganisms(): only microorganisms up to the genus level can be added",
                                        call. = FALSE))))
  }
  x$source <- "Added by user"
@@ -158,22 +178,29 @@ add_custom_microorganisms <- function(x) {
  x$family[is.na(x$family)] <- ""
  
  for (col in colnames(x)) {
+    if (is.factor(x[, col, drop = TRUE])) {
+      x[, col] <- as.character(x[, col, drop = TRUE])
+    }
    if (is.list(AMR_env$MO_lookup[, col, drop = TRUE])) {
      x[, col] <- as.list(x[, col, drop = TRUE])
    }
  }
  
-  # fill in other columns
+  # fill in taxonomy based on genus
+  genus_to_check <- gsub("^(.*)[^a-zA-Z].*", "\\1", x$genus, perl = TRUE)
+  x$kingdom[which(x$kingdom == "" & genus_to_check != "")] <- AMR_env$MO_lookup$kingdom[match(genus_to_check[which(x$kingdom == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
+  x$phylum[which(x$phylum == "" & genus_to_check != "")] <- AMR_env$MO_lookup$phylum[match(genus_to_check[which(x$phylum == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
+  x$class[which(x$class == "" & genus_to_check != "")] <- AMR_env$MO_lookup$class[match(genus_to_check[which(x$class == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
+  x$order[which(x$order == "" & genus_to_check != "")] <- AMR_env$MO_lookup$order[match(genus_to_check[which(x$order == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
+  x$family[which(x$family == "" & genus_to_check != "")] <- AMR_env$MO_lookup$family[match(genus_to_check[which(x$family == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
+  
+  # fill in other columns that are used in internal algorithms
  x$status <- "accepted"
  x$prevalence <- 1
-  
-  x$kingdom[which(x$kingdom == "" & x$genus != "")] <- AMR_env$MO_lookup$kingdom[match(x$genus[which(x$kingdom == "" & x$genus != "")], AMR_env$MO_lookup$genus)]
-  x$phylum[which(x$phylum == "" & x$genus != "")] <- AMR_env$MO_lookup$phylum[match(x$genus[which(x$phylum == "" & x$genus != "")], AMR_env$MO_lookup$genus)]
-  x$class[which(x$class == "" & x$genus != "")] <- AMR_env$MO_lookup$class[match(x$genus[which(x$class == "" & x$genus != "")], AMR_env$MO_lookup$genus)]
-  x$order[which(x$order == "" & x$genus != "")] <- AMR_env$MO_lookup$order[match(x$genus[which(x$order == "" & x$genus != "")], AMR_env$MO_lookup$genus)]
-  x$family[which(x$family == "" & x$genus != "")] <- AMR_env$MO_lookup$family[match(x$genus[which(x$family == "" & x$genus != "")], AMR_env$MO_lookup$genus)]
-  
-  x$kingdom_index <- AMR_env$MO_lookup$kingdom_index[match(x$genus, AMR_env$MO_lookup$genus)]
+  x$ref <- paste("Self-added,", format(Sys.Date(), "%Y"))
+  x$kingdom_index <- AMR_env$MO_lookup$kingdom_index[match(genus_to_check, AMR_env$MO_lookup$genus)]
+  # complete missing kingdom index, so mo_matching_score() will not return NA
+  x$kingdom_index[is.na(x$kingdom_index)] <- 1
  x$fullname_lower <- tolower(x$fullname)
  x$full_first <- substr(x$fullname_lower, 1, 1)
  x$species_first <- tolower(substr(x$species, 1, 1))
@@ -183,18 +210,19 @@ add_custom_microorganisms <- function(x) {
    # create the mo code
    x$mo <- NA_character_
  }
-  x$mo <- trimws2(x$mo)
+  x$mo <- trimws2(as.character(x$mo))
  x$mo[x$mo == ""] <- NA_character_
+  current <- sum(AMR_env$MO_lookup$source == "Added by user", na.rm = TRUE)
  x$mo[is.na(x$mo)] <- paste0("CUSTOM",
-                              seq(from = sum(AMR_env$MO_lookup$source == "Added by user", na.rm = TRUE) + 1, to = nrow(x), by = 1),
+                              seq.int(from = current + 1, to = current + nrow(x), by = 1),
                              "_",
                              toupper(unname(abbreviate(gsub(" +", " _ ",
                                                             gsub("[^A-Za-z0-9-]", " ",
                                                                  trimws2(paste(x$genus, x$species, x$subspecies)))),
                                                        minlength = 10))))
+  stop_if(anyDuplicated(c(as.character(AMR_env$MO_lookup$mo), x$mo)), "MO codes must be unique and not match existing MO codes of the AMR package")
  
  # add to package ----
-  
  AMR_env$custom_mo_codes <- c(AMR_env$custom_mo_codes, x$mo)
  class(AMR_env$MO_lookup$mo) <- "character"
  
@@ -215,7 +243,11 @@ add_custom_microorganisms <- function(x) {
  
  AMR_env$MO_lookup <- unique(rbind(AMR_env$MO_lookup, new_df))
  class(AMR_env$MO_lookup$mo) <- c("mo", "character")
-  message_("Added ", nr2char(nrow(x)), " record", ifelse(nrow(x) > 1, "s", ""), " to the internal `microorganisms` data set.")
+  if (nrow(x) <= 3) {
+    message_("Added ", vector_and(italicise(x$fullname), quotes = FALSE), " to the internal `microorganisms` data set.")
+  } else {
+    message_("Added ", nr2char(nrow(x)), " records to the internal `microorganisms` data set.")
+  }
 }

 #' @rdname add_custom_microorganisms
--- a/R/mo.R
+++ b/R/mo.R
@@ -183,7 +183,7 @@ as.mo <- function(x,
  x_lower <- tolower(x)
  
  complexes <- x[trimws2(x_lower) %like_case% " (complex|group)$"]
-  if (length(complexes) > 0 && identical(remove_from_input, mo_cleaning_regex())) {
+  if (length(complexes) > 0 && identical(remove_from_input, mo_cleaning_regex()) && !any(AMR_env$MO_lookup$fullname[which(AMR_env$MO_lookup$source == "Added by user")] %like% "(group|complex)", na.rm = TRUE)) {
    warning_("in `as.mo()`: 'complex' and 'group' were ignored from the input in ", length(complexes), " case", ifelse(length(complexes) > 1, "s", ""), ", as they are currently not supported.\nYou can add your own microorganism with `add_custom_microorganisms()`.", call = FALSE)
  }
  
@@ -222,7 +222,7 @@ as.mo <- function(x,
  if (isTRUE(info) && message_not_thrown_before("as.mo", old, new, entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) {
    message_(
      "Returning previously coerced value", ifelse(sum(is.na(old) & !is.na(new)) > 1, "s", ""),
-      " for ", vector_and(x[is.na(old) & !is.na(new)]), ". Run `mo_reset_session()` to reset this."
+      " for ", vector_and(x[is.na(old) & !is.na(new)]), ". Run `mo_reset_session()` to reset this. This note will be shown once per session for this input."
    )
  }

@@ -236,8 +236,9 @@ as.mo <- function(x,
    x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_
    x[trimws2(x) %like% paste0("^(", translate_into_language("no|not", language = language), ") ")] <- NA_character_

-    # groups are in our taxonomic table with a capital G
-    x <- gsub(" group ", " Group ", x, fixed = TRUE)
+    # groups are in our taxonomic table with a capital G, and complexes might be added by the user
+    x <- gsub(" group( |$)", " Group\\1", x, perl = TRUE)
+    x <- gsub(" complex( |$)", " Complex\\1", x, perl = TRUE)

    # run over all unique leftovers
    x_unique <- unique(x[is.na(out) & !is.na(x)])
@@ -281,11 +282,11 @@ as.mo <- function(x,
        first_chars <- paste0("(^| )", "[", paste(substr(x_parts, 1, 1), collapse = ""), "]")
        filtr <- which(AMR_env$MO_lookup$full_first %like_case% first_chars)
      } else if (nchar(x_out) == 4) {
-        # no space and 4 characters - probably a code such as STAU or ESCO!
+        # no space and 4 characters - probably a code such as STAU or ESCO
        msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(substr(x_out, 1, 2), substr(x_out, 3, 4)), sort = FALSE)))
        filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 2), ".* ", substr(x_out, 3, 4)))
      } else if (nchar(x_out) <= 6) {
-        # no space and 5-6 characters - probably a code such as STAAUR or ESCCOL!
+        # no space and 5-6 characters - probably a code such as STAAUR or ESCCOL
        first_part <- paste0(substr(x_out, 1, 2), "[a-z]*", substr(x_out, 3, 3))
        second_part <- substr(x_out, 4, nchar(x_out))
        msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(gsub("[a-z]*", "(...)", first_part, fixed = TRUE), second_part), sort = FALSE)))
@@ -992,6 +993,9 @@ italicise <- function(x) {
                                                 gsub("Salmonella ", "", x[x %like_case% "Salmonella [A-Z]"]))
  out[x %like_case% "Streptococcus [A-Z]"] <- paste(font_italic("Streptococcus"), 
                                                    gsub("Streptococcus ", "", x[x %like_case% "Streptococcus [A-Z]"]))
+  if (has_colour()) {
+    out <- gsub("(Group|group|Complex|complex)(\033\\[23m)?", "\033[23m\\1", out, perl = TRUE)
+  }
  out
 }

@@ -1009,33 +1013,36 @@ nr2char <- function(x) {

 parse_and_convert <- function(x) {
  if (tryCatch(is.character(x) && all(Encoding(x) == "unknown", na.rm = TRUE), error = function(e) FALSE)) {
-    return(trimws2(x))
-  }
-  tryCatch(
-    {
-      if (!is.null(dim(x))) {
-        if (NCOL(x) > 2) {
-          stop("a maximum of two columns is allowed", call. = FALSE)
-        } else if (NCOL(x) == 2) {
-          # support Tidyverse selection like: df %>% select(colA, colB)
-          # paste these columns together
-          x <- as.data.frame(x, stringsAsFactors = FALSE)
-          colnames(x) <- c("A", "B")
-          x <- paste(x$A, x$B)
-        } else {
-          # support Tidyverse selection like: df %>% select(colA)
-          x <- as.data.frame(x, stringsAsFactors = FALSE)[[1]]
+    out <- x
+  } else {
+    out <- tryCatch(
+      {
+        if (!is.null(dim(x))) {
+          if (NCOL(x) > 2) {
+            stop("a maximum of two columns is allowed", call. = FALSE)
+          } else if (NCOL(x) == 2) {
+            # support Tidyverse selection like: df %>% select(colA, colB)
+            # paste these columns together
+            x <- as.data.frame(x, stringsAsFactors = FALSE)
+            colnames(x) <- c("A", "B")
+            x <- paste(x$A, x$B)
+          } else {
+            # support Tidyverse selection like: df %>% select(colA)
+            x <- as.data.frame(x, stringsAsFactors = FALSE)[[1]]
+          }
        }
-      }
-      parsed <- iconv(as.character(x), to = "UTF-8")
-      parsed[is.na(parsed) & !is.na(x)] <- iconv(x[is.na(parsed) & !is.na(x)], from = "Latin1", to = "ASCII//TRANSLIT")
-      parsed <- gsub('"', "", parsed, fixed = TRUE)
-      parsed <- gsub(" +", " ", parsed, perl = TRUE)
-      parsed
-    },
-    error = function(e) stop(e$message, call. = FALSE)
-  ) # this will also be thrown when running `as.mo(no_existing_object)`
-  trimws2(parsed)
+        parsed <- iconv(as.character(x), to = "UTF-8")
+        parsed[is.na(parsed) & !is.na(x)] <- iconv(x[is.na(parsed) & !is.na(x)], from = "Latin1", to = "ASCII//TRANSLIT")
+        parsed <- gsub('"', "", parsed, fixed = TRUE)
+        parsed
+      },
+      error = function(e) stop(e$message, call. = FALSE)
+    ) # this will also be thrown when running `as.mo(no_existing_object)`
+  }
+  out <- trimws2(out)
+  out <- gsub(" +", " ", out, perl = TRUE)
+  out <- gsub(" ?/ ? ", "/", out, perl = TRUE)
+  out
 }

 replace_old_mo_codes <- function(x, property) {
--- a/R/mo_matching_score.R
+++ b/R/mo_matching_score.R
@@ -39,7 +39,9 @@
 #' @section Matching Score for Microorganisms:
 #' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as:
 #'
-#' \ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}}
+#' \ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{
+#' 
+#' \ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}}
 #'
 #' where:
 #'
@@ -47,7 +49,7 @@
 #' * \eqn{n} is a taxonomic name (genus, species, and subspecies);
 #' * \eqn{l_n} is the length of \eqn{n};
 #' * \eqn{lev} is the [Levenshtein distance function](https://en.wikipedia.org/wiki/Levenshtein_distance) (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};
-#' * \eqn{p_{n}} is the human pathogenic prevalence group of \eqn{n}, as described below;
+#' * \eqn{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below;
 #' * \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.
 #'
 #' The grouping into human pathogenic prevalence \eqn{p} is based on recent work from Bartlett *et al.* (2022, \doi{10.1099/mic.0.001269}) who extensively studied medical-scientific literature to categorise all bacterial species into these groups:
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -777,6 +777,7 @@ mo_info <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A

  info <- lapply(x, function(y) {
    c(
+      list(identifier = x),
      mo_taxonomy(y, language = language, keep_synonyms = keep_synonyms),
      list(
        status = mo_status(y, language = language, keep_synonyms = keep_synonyms),
--- a/index.md
+++ b/index.md
@@ -15,11 +15,11 @@

 ### Introduction

-The `AMR` package is a [free and open-source](#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. 
+The `AMR` package is a [free and open-source](#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. [Many different researchers](./authors.html) from around the globe are continually helping us to make this a successful and durable project!

 This work was published in the Journal of Statistical Software (Volume 104(3); [DOI 10.18637/jss.v104.i03](https://doi.org/10.18637/jss.v104.i03)) and formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)).

-After installing this package, R knows [**~52,000 distinct microbial species**](./reference/microorganisms.html) (updated December 2022) and all [**~600 antibiotic, antimycotic and antiviral drugs**](./reference/antibiotics.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. The integral breakpoint guidelines from CLSI and EUCAST are included from the last 10 years. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl), and is being [actively and durably maintained](./news) by two public healthcare organisations in the Netherlands.
+After installing this package, R knows [**~52,000 distinct microbial species**](./reference/microorganisms.html) (updated December 2022) and all [**~600 antibiotic, antimycotic and antiviral drugs**](./reference/antibiotics.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. The integral breakpoint guidelines from CLSI and EUCAST are included from the last 10 years. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl).

 ##### Used in 175 countries, translated to 16 languages

@@ -27,7 +27,7 @@ After installing this package, R knows [**~52,000 distinct microbial species**](

 Since its first public release in early 2018, this R package has been used in almost all countries in the world. Click the map to enlarge and to see the country names.

-The `AMR` package is available in <img src="lang_en.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> English, <img src="lang_zh.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Chinese, <img src="lang_da.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Danish, <img src="lang_nl.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Dutch, <img src="lang_fr.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> French, <img src="lang_de.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> German, <img src="lang_el.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Greek, <img src="lang_it.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Italian, <img src="lang_ja.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Japanese, <img src="lang_pl.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Polish, <img src="lang_pt.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Portuguese, <img src="lang_ru.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Russian, <img src="lang_es.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Spanish, <img src="lang_sv.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Swedish, <img src="lang_tr.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Turkish, and <img src="lang_uk.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Ukrainian. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages.
+With the help of contributors from all corners of the world, the `AMR` package is available in <img src="lang_en.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> English, <img src="lang_zh.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Chinese, <img src="lang_da.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Danish, <img src="lang_nl.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Dutch, <img src="lang_fr.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> French, <img src="lang_de.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> German, <img src="lang_el.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Greek, <img src="lang_it.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Italian, <img src="lang_ja.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Japanese, <img src="lang_pl.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Polish, <img src="lang_pt.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Portuguese, <img src="lang_ru.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Russian, <img src="lang_es.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Spanish, <img src="lang_sv.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Swedish, <img src="lang_tr.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Turkish, and <img src="lang_uk.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Ukrainian. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages.

 ### Practical examples

--- a/man/add_custom_antimicrobials.Rd
+++ b/man/add_custom_antimicrobials.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/add_custom_antimicrobials.R
+% Please edit documentation in R/custom_antimicrobials.R
 \name{add_custom_antimicrobials}
 \alias{add_custom_antimicrobials}
 \alias{clear_custom_antimicrobials}
--- a/man/add_custom_microorganisms.Rd
+++ b/man/add_custom_microorganisms.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/add_custom_microorganisms.R
+% Please edit documentation in R/custom_microorganisms.R
 \name{add_custom_microorganisms}
 \alias{add_custom_microorganisms}
 \alias{clear_custom_microorganisms}
@@ -10,7 +10,7 @@ add_custom_microorganisms(x)
 clear_custom_microorganisms()
 }
 \arguments{
-\item{x}{a \link{data.frame} resembling the \link{microorganisms} data set, at least containing columns "genus" and "species"}
+\item{x}{a \link{data.frame} resembling the \link{microorganisms} data set, at least containing column "genus" (case-insensitive)}
 }
 \description{
 With \code{\link[=add_custom_microorganisms]{add_custom_microorganisms()}} you can add your own custom microorganisms to the \code{AMR} package, such the non-taxonomic outcome of laboratory analysis.
@@ -24,7 +24,7 @@ There are two ways to automate this process:

 \strong{Method 1:} Save the microorganisms to a local or remote file (can even be the internet). To use this method:
 \enumerate{
-\item Create a data set in the structure of the \link{microorganisms} data set (containing at the very least columns "genus" and "species") and save it with \code{\link[=saveRDS]{saveRDS()}} to a location of choice, e.g. \code{"~/my_custom_mo.rds"}, or any remote location.
+\item Create a data set in the structure of the \link{microorganisms} data set (containing at the very least column "genus") and save it with \code{\link[=saveRDS]{saveRDS()}} to a location of choice, e.g. \code{"~/my_custom_mo.rds"}, or any remote location.
 \item Set the file location to the \code{AMR_custom_mo} \R option: \code{options(AMR_custom_mo = "~/my_custom_mo.rds")}. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the \code{.Rprofile} file so that it will loaded on start-up of \R. To do this, open the \code{.Rprofile} file using e.g. \code{utils::file.edit("~/.Rprofile")}, add this text and save the file:

 \if{html}{\out{<div class="sourceCode r">}}\preformatted{# Add custom microorganism codes:
@@ -79,6 +79,19 @@ mo_family("E. asburiae/cloacae")
 mo_gramstain("Enterobacter asburiae/cloacae")

 mo_info("Enterobacter asburiae/cloacae")
+
+
+# the function tries to be forgiving:
+add_custom_microorganisms(
+  data.frame(GENUS = "ESCHERICHIA / KLEBSIELLA",
+             SPECIES = "SPECIES"))
+mo_name("ESCHERICHIA / KLEBSIELLA")
+mo_family("Escherichia/Klebsiella")
+
+add_custom_microorganisms(
+  data.frame(genus = "Citrobacter", species = "freundii complex"))
+mo_name("C. freundii complex")
+mo_gramstain("C. freundii complex")
 }
 }
 \seealso{
--- a/man/as.mo.Rd
+++ b/man/as.mo.Rd
@@ -131,7 +131,9 @@ The coercion rules consider the prevalence of microorganisms in humans, which is

 With ambiguous user input in \code{\link[=as.mo]{as.mo()}} and all the \code{\link[=mo_property]{mo_*}} functions, the returned results are chosen based on their matching score using \code{\link[=mo_matching_score]{mo_matching_score()}}. This matching score \eqn{m}, is calculated as:

-\ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}}
+\ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{
+
+\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}}

 where:
 \itemize{
@@ -139,7 +141,7 @@ where:
 \item \eqn{n} is a taxonomic name (genus, species, and subspecies);
 \item \eqn{l_n} is the length of \eqn{n};
 \item \eqn{lev} is the \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance function} (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};
-\item \eqn{p_{n}} is the human pathogenic prevalence group of \eqn{n}, as described below;
+\item \eqn{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below;
 \item \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.
 }

--- a/man/mo_matching_score.Rd
+++ b/man/mo_matching_score.Rd
@@ -23,7 +23,9 @@ Later, the work of Bartlett A \emph{et al.} about bacterial pathogens infecting

 With ambiguous user input in \code{\link[=as.mo]{as.mo()}} and all the \code{\link[=mo_property]{mo_*}} functions, the returned results are chosen based on their matching score using \code{\link[=mo_matching_score]{mo_matching_score()}}. This matching score \eqn{m}, is calculated as:

-\ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}}
+\ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{
+
+\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}}

 where:
 \itemize{
@@ -31,7 +33,7 @@ where:
 \item \eqn{n} is a taxonomic name (genus, species, and subspecies);
 \item \eqn{l_n} is the length of \eqn{n};
 \item \eqn{lev} is the \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance function} (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};
-\item \eqn{p_{n}} is the human pathogenic prevalence group of \eqn{n}, as described below;
+\item \eqn{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below;
 \item \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.
 }

--- a/pkgdown/extra.js
+++ b/pkgdown/extra.js
@@ -34,12 +34,14 @@ $(document).ready(function() {
  // Replace 'Developers' with 'Maintainers' on the main page, and "Contributors" on the Authors page
  $(".developers h2").text("Maintainers");
  $(".citation h2:first").text("All contributors");
+  $(".citation h2:second").text("How to Cite");

-  // replace \donttest and \dontrun texts in Examples
+  // remove \donttest and \dontrun texts in Examples
  if ($("#ref-examples ~ div pre").length > 0) {
-    $("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll("# \\donttest{", ""));
-    $("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll("# \\dontrun{", ""));
-    $("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll("# }", ""));
+    const regex1 = /# \\dont(test|run)\{(\n|<br>)*/ig;
+    const regex2 = /(\n|<br>)*# \}/ig;
+    $("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll(regex1, ""));
+    $("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll(regex2, ""));
  }

  // remove leading newline in code examples on changelog