add oxygen tolerance

2026-03-07 16:41:35 +01:00 · 2023-05-11 21:56:27 +02:00
parent bf08d136a0
commit 91fa73dedf
28 changed files with 52310 additions and 52203 deletions
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -505,7 +505,7 @@ word_wrap <- function(...,

  # clean introduced whitespace between fullstops
  msg <- gsub("[.] +[.]", "..", msg)
-  # remove extra space that was introduced (e.g. "Smith et al., 2022")
+  # remove extra space that was introduced (e.g. "Smith et al. , 2022")
  msg <- gsub(". ,", ".,", msg, fixed = TRUE)
  msg <- gsub("[ ,", "[,", msg, fixed = TRUE)
  msg <- gsub("/ /", "//", msg, fixed = TRUE)
--- a/R/custom_microorganisms.R
+++ b/R/custom_microorganisms.R
@@ -247,19 +247,14 @@ add_custom_microorganisms <- function(x) {
    "CUSTOM",
    seq.int(from = current + 1, to = current + nrow(x), by = 1),
    "_",
-    toupper(unname(abbreviate(
-      gsub(
-        " +", " _ ",
-        gsub(
-          "[^A-Za-z0-9-]", " ",
-          trimws2(paste(x$genus, x$species, x$subspecies))
-        )
-      ),
-      minlength = 10
-    )))
-  )
+    trimws(
+      paste(abbreviate_mo(x$genus, 5),
+            abbreviate_mo(x$species, 4, hyphen_as_space = TRUE),
+            abbreviate_mo(x$subspecies, 4, hyphen_as_space = TRUE),
+            sep = "_"),
+      whitespace = "_"))
  stop_if(anyDuplicated(c(as.character(AMR_env$MO_lookup$mo), x$mo)), "MO codes must be unique and not match existing MO codes of the AMR package")
-
+  
  # add to package ----
  AMR_env$custom_mo_codes <- c(AMR_env$custom_mo_codes, x$mo)
  class(AMR_env$MO_lookup$mo) <- "character"
@@ -306,3 +301,26 @@ clear_custom_microorganisms <- function() {
  AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, , drop = FALSE]
  message_("Cleared ", nr2char(n - n2), " custom record", ifelse(n - n2 > 1, "s", ""), " from the internal `microorganisms` data set.")
 }
+
+abbreviate_mo <- function(x, minlength = 5, prefix = "", hyphen_as_space = FALSE, ...) {
+  if (hyphen_as_space == TRUE) {
+    x <- gsub("-", " ", x, fixed = TRUE)
+  }
+  # keep a starting Latin ae
+  suppressWarnings(
+    gsub("(\u00C6|\u00E6)+",
+         "AE",
+         toupper(
+           paste0(prefix,
+                  abbreviate(
+                    gsub("^ae",
+                         "\u00E6\u00E6",
+                         x,
+                         ignore.case = TRUE),
+                    minlength = minlength,
+                    use.classes = TRUE,
+                    method = "both.sides",
+                    ...
+                  ))))
+  )
+}
--- a/R/data.R
+++ b/R/data.R
@@ -93,6 +93,7 @@
 #' - `rank`\cr Text of the taxonomic rank of the microorganism, such as `"species"` or `"genus"`
 #' - `ref`\cr Author(s) and year of related scientific publication. This contains only the *first surname* and year of the *latest* authors, e.g. "Wallis *et al.* 2006 *emend.* Smith and Jones 2018" becomes "Smith *et al.*, 2018". This field is directly retrieved from the source specified in the column `source`. Moreover, accents were removed to comply with CRAN that only allows ASCII characters, e.g. "V`r "\u00e1\u0148ov\u00e1"`" becomes "Vanova".
 #' - `lpsn`\cr Identifier ('Record number') of the List of Prokaryotic names with Standing in Nomenclature (LPSN). This will be the first/highest LPSN identifier to keep one identifier per row. For example, *Acetobacter ascendens* has LPSN Record number 7864 and 11011. Only the first is available in the `microorganisms` data set.
+#' - `oxygen_tolerance` \cr Oxygen tolerance, either `r vector_or(microorganisms$oxygen_tolerance)`. These data were retrieved from BacDive (see *Source*). Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus to guess the oxygen tolerance. Currently `r round(length(microorganisms$oxygen_tolerance[which(!is.na(microorganisms$oxygen_tolerance))]) / nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]) * 100, 1)`% of all `r format_included_data_number(nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]))` bacteria in the data set contain an oxygen tolerance.
 #' - `lpsn_parent`\cr LPSN identifier of the parent taxon
 #' - `lpsn_renamed_to`\cr LPSN identifier of the currently valid taxon
 #' - `gbif`\cr Identifier ('taxonID') of the Global Biodiversity Information Facility (GBIF)
@@ -145,6 +146,8 @@
 #' * Grimont *et al.* (2007). Antigenic Formulae of the Salmonella Serovars, 9th Edition. WHO Collaborating Centre for Reference and Research on *Salmonella* (WHOCC-SALM).
 #'
 #' * Bartlett *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269}
+#' 
+#' * Reimer *et al.* (2022). ***BacDive* in 2022: the knowledge base for standardized bacterial and archaeal data.**  *Nucleic Acids Res.* 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
 #' @seealso [as.mo()], [mo_property()], [microorganisms.codes], [intrinsic_resistant]
 #' @examples
 #' microorganisms
--- a/R/mo.R
+++ b/R/mo.R
@@ -95,13 +95,14 @@
 #' 1. Berends MS *et al.* (2022). **AMR: An R Package for Working with Antimicrobial Resistance Data**. *Journal of Statistical Software*, 104(3), 1-31; \doi{10.18637/jss.v104.i03}
 #' 2. Becker K *et al.* (2014). **Coagulase-Negative Staphylococci.** *Clin Microbiol Rev.* 27(4): 870-926; \doi{10.1128/CMR.00109-13}
 #' 3. Becker K *et al.* (2019). **Implications of identifying the recently defined members of the *S. aureus* complex, *S. argenteus* and *S. schweitzeri*: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).** *Clin Microbiol Infect*; \doi{10.1016/j.cmi.2019.02.028}
-#' 4. Becker K *et al.* (2020). **Emergence of coagulase-negative staphylococci** *Expert Rev Anti Infect Ther.* 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
-#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci**. *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571}
-#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019** *Microorganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801}
+#' 4. Becker K *et al.* (2020). **Emergence of coagulase-negative staphylococci.** *Expert Rev Anti Infect Ther.* 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
+#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci.** *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571}
+#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/** *Micro.rganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801}
 #' 7. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`.
 #' 8. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`.
 #' 9. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`>
 #' 10. Bartlett A *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269}
+#' 11. Reimer *et al.* (2022). ***BacDive* in 2022: the knowledge base for standardized bacterial and archaeal data.**  *Nucleic Acids Res.* 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
 #' @export
 #' @return A [character] [vector] with additional class [`mo`]
 #' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's.
@@ -888,8 +889,6 @@ print.mo_uncertainties <- function(x, n = 10, ...) {
        ),
        collapse = "\n"
      ),
-      # Add "Based on {input}" text if it differs from the original input
-      ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
      # Add note if result was coerced to accepted taxonomic name
      ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR_env$MO_lookup$mo[which(AMR_env$MO_lookup$status == "synonym")],
        paste0(
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -53,6 +53,8 @@
 #' Determination of yeasts ([mo_is_yeast()]) will be based on the taxonomic kingdom and class. *Budding yeasts* are fungi of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes). *True yeasts* are aggregated into the underlying order Saccharomycetales. Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`).
 #'
 #' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) will be based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.3)`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antibiotics).
+#' 
+#' Determination of bacterial oxygen tolerance ([mo_oxygen_tolerance()]) will be based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicting an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria.
 #'
 #' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species.
 #'
@@ -589,6 +591,40 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), keep_s
  paste(x, ab) %in% AMR_env$intrinsic_resistant
 }

+#' @rdname mo_property
+#' @export
+mo_oxygen_tolerance <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
+  if (missing(x)) {
+    # this tries to find the data and an 'mo' column
+    x <- find_mo_col(fn = "mo_oxygen_tolerance")
+  }
+  meet_criteria(x, allow_NA = TRUE)
+  language <- validate_language(language)
+  meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
+  
+  mo_validate(x = x, property = "oxygen_tolerance", language = language, keep_synonyms = keep_synonyms, ...)
+}
+
+#' @rdname mo_property
+#' @export
+mo_is_anaerobic <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
+  if (missing(x)) {
+    # this tries to find the data and an 'mo' column
+    x <- find_mo_col(fn = "mo_is_anaerobic")
+  }
+  meet_criteria(x, allow_NA = TRUE)
+  language <- validate_language(language)
+  meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
+  
+  x.mo <- as.mo(x, language = language, keep_synonyms = keep_synonyms, ...)
+  metadata <- get_mo_uncertainties()
+  oxygen <- mo_oxygen_tolerance(x.mo, language = NULL, keep_synonyms = keep_synonyms)
+  load_mo_uncertainties(metadata)
+  out <- oxygen == "anaerobe" & !is.na(oxygen)
+  out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA
+  out
+}
+
 #' @rdname mo_property
 #' @export
 mo_snomed <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
@@ -791,9 +827,12 @@ mo_info <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A
        status = mo_status(y, language = language, keep_synonyms = keep_synonyms),
        synonyms = mo_synonyms(y, keep_synonyms = keep_synonyms),
        gramstain = mo_gramstain(y, language = language, keep_synonyms = keep_synonyms),
+        oxygen_tolerance = mo_oxygen_tolerance(y, language = language, keep_synonyms = keep_synonyms),
        url = unname(mo_url(y, open = FALSE, keep_synonyms = keep_synonyms)),
        ref = mo_ref(y, keep_synonyms = keep_synonyms),
-        snomed = unlist(mo_snomed(y, keep_synonyms = keep_synonyms))
+        snomed = unlist(mo_snomed(y, keep_synonyms = keep_synonyms)),
+        lpsn = mo_lpsn(y, language = language, keep_synonyms = keep_synonyms),
+        gbif = mo_gbif(y, language = language, keep_synonyms = keep_synonyms)
      )
    )
  })
--- a/R/sysdata.rda
+++ b/R/sysdata.rda