add oxygen tolerance

2025-06-07 14:34:01 +02:00 · 2023-05-11 21:56:27 +02:00 · 2023-05-11 21:56:27 +02:00 · 91fa73dedf
commit 91fa73dedf
parent bf08d136a0
28 changed files with 52310 additions and 52203 deletions
--- a/4
+++ b/4
@ -1,6 +1,6 @@
 Package: AMR
-Version: 2.0.0.9014
-Date: 2023-05-08
+Version: 2.0.0.9015
+Date: 2023-05-11
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
  data analysis and to work with microbial and antimicrobial properties by
--- a/2
+++ b/2
@ -330,6 +330,7 @@ export(mo_gbif)
 export(mo_genus)
 export(mo_gramstain)
 export(mo_info)
+export(mo_is_anaerobic)
 export(mo_is_gram_negative)
 export(mo_is_gram_positive)
 export(mo_is_intrinsic_resistant)
@ -339,6 +340,7 @@ export(mo_lpsn)
 export(mo_matching_score)
 export(mo_name)
 export(mo_order)
+export(mo_oxygen_tolerance)
 export(mo_pathogenicity)
 export(mo_phylum)
 export(mo_property)
--- a/NEWS.md
+++ b/NEWS.md
@ -1,13 +1,16 @@
-# AMR 2.0.0.9014
+# AMR 2.0.0.9015

 ## Changed
+* Added oxygen tolerance to over 25,000 bacteria in the `microorganisms` data set
+  * Added `mo_oxygen_tolerance()` to retrieve the values
+  * Added `mo_is_anaerobic()` to determine which species are obligate anaerobic bacteria
+* Added LPSN and GBIF identifiers, and oxygen tolerance to `mo_info()`
 * formatting fix for `sir_interpretation_history()`
 * Fixed some WHONET codes for microorganisms and consequently a couple of entries in `clinical_breakpoints`
 * Added microbial codes for Gram-negative/positive anaerobic bacteria
 * `mo_rank()` now returns `NA` for 'unknown' microorganisms (`B_ANAER`, `B_ANAER-NEG`, `B_ANAER-POS`, `B_GRAMN`, `B_GRAMP`, `F_FUNGUS`, `F_YEAST`, and `UNKNOWN`)
 * Fixed a bug for `as.mo()` that led to coercion of `NA` values when using custom microorganism codes

-
 # AMR 2.0.0

 This is a new major release of the AMR package, with great new additions but also some breaking changes for current users. These are all listed below.
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@ -505,7 +505,7 @@ word_wrap <- function(...,

  # clean introduced whitespace between fullstops
  msg <- gsub("[.] +[.]", "..", msg)
-  # remove extra space that was introduced (e.g. "Smith et al., 2022")
+  # remove extra space that was introduced (e.g. "Smith et al. , 2022")
  msg <- gsub(". ,", ".,", msg, fixed = TRUE)
  msg <- gsub("[ ,", "[,", msg, fixed = TRUE)
  msg <- gsub("/ /", "//", msg, fixed = TRUE)
--- a/R/custom_microorganisms.R
+++ b/R/custom_microorganisms.R
@ -247,19 +247,14 @@ add_custom_microorganisms <- function(x) {
    "CUSTOM",
    seq.int(from = current + 1, to = current + nrow(x), by = 1),
    "_",
-    toupper(unname(abbreviate(
-      gsub(
-        " +", " _ ",
-        gsub(
-          "[^A-Za-z0-9-]", " ",
-          trimws2(paste(x$genus, x$species, x$subspecies))
-        )
-      ),
-      minlength = 10
-    )))
-  )
+    trimws(
+      paste(abbreviate_mo(x$genus, 5),
+            abbreviate_mo(x$species, 4, hyphen_as_space = TRUE),
+            abbreviate_mo(x$subspecies, 4, hyphen_as_space = TRUE),
+            sep = "_"),
+      whitespace = "_"))
  stop_if(anyDuplicated(c(as.character(AMR_env$MO_lookup$mo), x$mo)), "MO codes must be unique and not match existing MO codes of the AMR package")
-
+  
  # add to package ----
  AMR_env$custom_mo_codes <- c(AMR_env$custom_mo_codes, x$mo)
  class(AMR_env$MO_lookup$mo) <- "character"
@ -306,3 +301,26 @@ clear_custom_microorganisms <- function() {
  AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, , drop = FALSE]
  message_("Cleared ", nr2char(n - n2), " custom record", ifelse(n - n2 > 1, "s", ""), " from the internal `microorganisms` data set.")
 }
+
+abbreviate_mo <- function(x, minlength = 5, prefix = "", hyphen_as_space = FALSE, ...) {
+  if (hyphen_as_space == TRUE) {
+    x <- gsub("-", " ", x, fixed = TRUE)
+  }
+  # keep a starting Latin ae
+  suppressWarnings(
+    gsub("(\u00C6|\u00E6)+",
+         "AE",
+         toupper(
+           paste0(prefix,
+                  abbreviate(
+                    gsub("^ae",
+                         "\u00E6\u00E6",
+                         x,
+                         ignore.case = TRUE),
+                    minlength = minlength,
+                    use.classes = TRUE,
+                    method = "both.sides",
+                    ...
+                  ))))
+  )
+}
--- a/R/data.R
+++ b/R/data.R
@ -93,6 +93,7 @@
 #' - `rank`\cr Text of the taxonomic rank of the microorganism, such as `"species"` or `"genus"`
 #' - `ref`\cr Author(s) and year of related scientific publication. This contains only the *first surname* and year of the *latest* authors, e.g. "Wallis *et al.* 2006 *emend.* Smith and Jones 2018" becomes "Smith *et al.*, 2018". This field is directly retrieved from the source specified in the column `source`. Moreover, accents were removed to comply with CRAN that only allows ASCII characters, e.g. "V`r "\u00e1\u0148ov\u00e1"`" becomes "Vanova".
 #' - `lpsn`\cr Identifier ('Record number') of the List of Prokaryotic names with Standing in Nomenclature (LPSN). This will be the first/highest LPSN identifier to keep one identifier per row. For example, *Acetobacter ascendens* has LPSN Record number 7864 and 11011. Only the first is available in the `microorganisms` data set.
+#' - `oxygen_tolerance` \cr Oxygen tolerance, either `r vector_or(microorganisms$oxygen_tolerance)`. These data were retrieved from BacDive (see *Source*). Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus to guess the oxygen tolerance. Currently `r round(length(microorganisms$oxygen_tolerance[which(!is.na(microorganisms$oxygen_tolerance))]) / nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]) * 100, 1)`% of all `r format_included_data_number(nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]))` bacteria in the data set contain an oxygen tolerance.
 #' - `lpsn_parent`\cr LPSN identifier of the parent taxon
 #' - `lpsn_renamed_to`\cr LPSN identifier of the currently valid taxon
 #' - `gbif`\cr Identifier ('taxonID') of the Global Biodiversity Information Facility (GBIF)
@ -145,6 +146,8 @@
 #' * Grimont *et al.* (2007). Antigenic Formulae of the Salmonella Serovars, 9th Edition. WHO Collaborating Centre for Reference and Research on *Salmonella* (WHOCC-SALM).
 #'
 #' * Bartlett *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269}
+#' 
+#' * Reimer *et al.* (2022). ***BacDive* in 2022: the knowledge base for standardized bacterial and archaeal data.**  *Nucleic Acids Res.* 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
 #' @seealso [as.mo()], [mo_property()], [microorganisms.codes], [intrinsic_resistant]
 #' @examples
 #' microorganisms
--- a/R/mo.R
+++ b/R/mo.R
@ -95,13 +95,14 @@
 #' 1. Berends MS *et al.* (2022). **AMR: An R Package for Working with Antimicrobial Resistance Data**. *Journal of Statistical Software*, 104(3), 1-31; \doi{10.18637/jss.v104.i03}
 #' 2. Becker K *et al.* (2014). **Coagulase-Negative Staphylococci.** *Clin Microbiol Rev.* 27(4): 870-926; \doi{10.1128/CMR.00109-13}
 #' 3. Becker K *et al.* (2019). **Implications of identifying the recently defined members of the *S. aureus* complex, *S. argenteus* and *S. schweitzeri*: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).** *Clin Microbiol Infect*; \doi{10.1016/j.cmi.2019.02.028}
-#' 4. Becker K *et al.* (2020). **Emergence of coagulase-negative staphylococci** *Expert Rev Anti Infect Ther.* 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
-#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci**. *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571}
-#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019** *Microorganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801}
+#' 4. Becker K *et al.* (2020). **Emergence of coagulase-negative staphylococci.** *Expert Rev Anti Infect Ther.* 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
+#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci.** *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571}
+#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/** *Micro.rganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801}
 #' 7. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`.
 #' 8. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`.
 #' 9. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`>
 #' 10. Bartlett A *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269}
+#' 11. Reimer *et al.* (2022). ***BacDive* in 2022: the knowledge base for standardized bacterial and archaeal data.**  *Nucleic Acids Res.* 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
 #' @export
 #' @return A [character] [vector] with additional class [`mo`]
 #' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's.
@ -888,8 +889,6 @@ print.mo_uncertainties <- function(x, n = 10, ...) {
        ),
        collapse = "\n"
      ),
-      # Add "Based on {input}" text if it differs from the original input
-      ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
      # Add note if result was coerced to accepted taxonomic name
      ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR_env$MO_lookup$mo[which(AMR_env$MO_lookup$status == "synonym")],
        paste0(
--- a/R/mo_property.R
+++ b/R/mo_property.R
@ -53,6 +53,8 @@
 #' Determination of yeasts ([mo_is_yeast()]) will be based on the taxonomic kingdom and class. *Budding yeasts* are fungi of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes). *True yeasts* are aggregated into the underlying order Saccharomycetales. Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`).
 #'
 #' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) will be based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.3)`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antibiotics).
+#' 
+#' Determination of bacterial oxygen tolerance ([mo_oxygen_tolerance()]) will be based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicting an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria.
 #'
 #' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species.
 #'
@ -589,6 +591,40 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), keep_s
  paste(x, ab) %in% AMR_env$intrinsic_resistant
 }

+#' @rdname mo_property
+#' @export
+mo_oxygen_tolerance <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
+  if (missing(x)) {
+    # this tries to find the data and an 'mo' column
+    x <- find_mo_col(fn = "mo_oxygen_tolerance")
+  }
+  meet_criteria(x, allow_NA = TRUE)
+  language <- validate_language(language)
+  meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
+  
+  mo_validate(x = x, property = "oxygen_tolerance", language = language, keep_synonyms = keep_synonyms, ...)
+}
+
+#' @rdname mo_property
+#' @export
+mo_is_anaerobic <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
+  if (missing(x)) {
+    # this tries to find the data and an 'mo' column
+    x <- find_mo_col(fn = "mo_is_anaerobic")
+  }
+  meet_criteria(x, allow_NA = TRUE)
+  language <- validate_language(language)
+  meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
+  
+  x.mo <- as.mo(x, language = language, keep_synonyms = keep_synonyms, ...)
+  metadata <- get_mo_uncertainties()
+  oxygen <- mo_oxygen_tolerance(x.mo, language = NULL, keep_synonyms = keep_synonyms)
+  load_mo_uncertainties(metadata)
+  out <- oxygen == "anaerobe" & !is.na(oxygen)
+  out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA
+  out
+}
+
 #' @rdname mo_property
 #' @export
 mo_snomed <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
@ -791,9 +827,12 @@ mo_info <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A
        status = mo_status(y, language = language, keep_synonyms = keep_synonyms),
        synonyms = mo_synonyms(y, keep_synonyms = keep_synonyms),
        gramstain = mo_gramstain(y, language = language, keep_synonyms = keep_synonyms),
+        oxygen_tolerance = mo_oxygen_tolerance(y, language = language, keep_synonyms = keep_synonyms),
        url = unname(mo_url(y, open = FALSE, keep_synonyms = keep_synonyms)),
        ref = mo_ref(y, keep_synonyms = keep_synonyms),
-        snomed = unlist(mo_snomed(y, keep_synonyms = keep_synonyms))
+        snomed = unlist(mo_snomed(y, keep_synonyms = keep_synonyms)),
+        lpsn = mo_lpsn(y, language = language, keep_synonyms = keep_synonyms),
+        gbif = mo_gbif(y, language = language, keep_synonyms = keep_synonyms)
      )
    )
  })
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/data-raw/microorganisms.dta
+++ b/data-raw/microorganisms.dta
--- a/data-raw/microorganisms.feather
+++ b/data-raw/microorganisms.feather
--- a/data-raw/microorganisms.md5
+++ b/data-raw/microorganisms.md5
@ -1 +1 @@
-20bb7a68431826bce777a6c239f0fed0
+63cc9e5166dc50c7b474bb809557c392
--- a/data-raw/microorganisms.parquet
+++ b/data-raw/microorganisms.parquet
--- a/data-raw/microorganisms.rds
+++ b/data-raw/microorganisms.rds
--- a/data-raw/microorganisms.sas
+++ b/data-raw/microorganisms.sas
--- a/data-raw/microorganisms.sav
+++ b/data-raw/microorganisms.sav
--- a/data-raw/microorganisms.txt
+++ b/data-raw/microorganisms.txt
--- a/data-raw/microorganisms.xlsx
+++ b/data-raw/microorganisms.xlsx
--- a/data-raw/reproduction_of_microorganisms.R
+++ b/data-raw/reproduction_of_microorganisms.R
@ -1342,11 +1342,13 @@ bacdive <- vroom::vroom("data-raw/bacdive.csv", skip = 2) %>%
 bacdive <- bacdive %>% 
  # fill in missing species from previous rows
  mutate(species = ifelse(is.na(species), lag(species), species)) %>%
-  filter(!is.na(species), !is.na(oxygen), oxygen %unlike% "tolerant")
+  filter(!is.na(species), !is.na(oxygen), oxygen %unlike% "tolerant", species %unlike% "unclassified") %>% 
+  mutate(mo = as.mo(species, keep_synonyms = FALSE))
 bacdive <- bacdive %>% 
  # now determine type per species
-  group_by(species) %>%
-  summarise(oxygen_tolerance = case_when(any(oxygen %like% "facultative") ~ "facultative anaerobe",
+  group_by(mo) %>%
+  summarise(species = first(species),
+            oxygen_tolerance = case_when(any(oxygen %like% "facultative") ~ "facultative anaerobe",
                                         all(oxygen == "microaerophile") ~ "microaerophile",
                                         all(oxygen %in% c("anaerobe", "obligate anaerobe")) ~ "anaerobe",
                                         all(oxygen %in% c("anaerobe", "obligate anaerobe", "microaerophile")) ~ "anaerobe/microaerophile",
@ -1354,10 +1356,25 @@ bacdive <- bacdive %>%
                                         all(!oxygen %in% c("anaerobe", "obligate anaerobe")) ~ "aerobe",
                                         all(c("aerobe", "anaerobe") %in% oxygen) ~ "facultative anaerobe",
                                         TRUE ~ NA_character_))
+# now find all synonyms and copy them from their current taxonomic names
+synonyms <- as.mo(unique(unlist(mo_synonyms(bacdive$mo, keep_synonyms = TRUE))),
+                  keep_synonyms = TRUE)
+syns <- tibble(species = synonyms,
+               mo = synonyms %>% mo_current() %>% as.mo()) %>% 
+  filter(species != mo) %>% 
+  mutate(species = mo_name(species, keep_synonyms = TRUE)) %>% 
+  left_join(bacdive %>% select(mo, oxygen_tolerance)) %>% 
+  # set mo to mo of the synonym
+  mutate(mo = as.mo(species, keep_synonyms = TRUE)) %>% 
+  select(all_of(colnames(bacdive)))
+
+bacdive <- bacdive %>% 
+  bind_rows(syns) %>% 
+  distinct()

 bacdive_genus <- bacdive %>%
-  mutate(genus = gsub("^([A-Za-z]+) .*", "\\1", species), oxygen = oxygen_tolerance) %>%
-  group_by(species = genus) %>% 
+  mutate(oxygen = oxygen_tolerance) %>% 
+  group_by(species = mo_genus(mo)) %>% 
  summarise(oxygen_tolerance = case_when(any(oxygen == "facultative anaerobe") ~ "facultative anaerobe",
                                         any(oxygen == "anaerobe/microaerophile") ~ "anaerobe/microaerophile",
                                         all(oxygen == "microaerophile") ~ "microaerophile",
@ -1369,7 +1386,7 @@ bacdive <- bacdive %>%
  filter(species %unlike% " sp[.]") %>% 
  bind_rows(bacdive_genus) %>% 
  arrange(species) %>% 
-  mutate(mo = as.mo(species, keep_synonyms = FALSE))
+  mutate(mo = as.mo(species, keep_synonyms = TRUE))

 other_species <- microorganisms %>%
  filter(kingdom == "Bacteria", rank == "species", !mo %in% bacdive$mo, genus %in% bacdive$species) %>%
@ -1378,22 +1395,20 @@ other_species <- microorganisms %>%
  mutate(oxygen_tolerance = ifelse(oxygen_tolerance %in% c("aerobe", "anaerobe", "microaerophile", "anaerobe/microaerophile"),
                                   oxygen_tolerance,
                                   paste("likely", oxygen_tolerance))) %>% 
-  select(species, oxygen_tolerance, mo = mo2)
+  select(species, oxygen_tolerance, mo = mo2) %>% 
+  distinct(species, .keep_all = TRUE)

 bacdive <- bacdive %>% 
  bind_rows(other_species) %>% 
-  arrange(species)
+  arrange(species) %>% 
+  distinct(mo, .keep_all = TRUE) %>% 
+  select(-species)

 taxonomy <- taxonomy %>%
-  left_join(
-    bacdive %>% 
-      select(-species),
-    by = "mo") %>% 
-  
+  left_join(bacdive, by = "mo") %>% 
+  relocate(oxygen_tolerance, .after = ref)


-# TODO look up synonyms and fill them in as well
-
 # Clean data set ----------------------------------------------------------

 # format to tibble and check again for invalid characters
--- a/data/microorganisms.rda
+++ b/data/microorganisms.rda
--- a/inst/tinytest/test-add_custom_antimicrobials.R
+++ b/inst/tinytest/test-add_custom_antimicrobials.R
--- a/inst/tinytest/test-add_custom_microorganisms.R
+++ b/inst/tinytest/test-add_custom_microorganisms.R
@ -41,3 +41,6 @@ suppressMessages(
 expect_identical(as.character(as.mo("ENT_ASB_CLO")), "ENT_ASB_CLO")
 expect_identical(mo_name("ENT_ASB_CLO"), "Enterobacter asburiae/cloacae")
 expect_identical(mo_gramstain("ENT_ASB_CLO", language = NULL), "Gram-negative")
+
+expect_identical(paste("B", AMR:::abbreviate_mo("Klebsiella"), AMR:::abbreviate_mo("pneumoniae", 4), sep = "_"),
+                 as.character(as.mo("Klebsiella pneumoniae")))
--- a/inst/tinytest/test-mo_property.R
+++ b/inst/tinytest/test-mo_property.R
@ -98,11 +98,14 @@ expect_equal(names(mo_info("Escherichia coli")), c(
  "mo",
  "kingdom", "phylum", "class", "order",
  "family", "genus", "species", "subspecies",
-  "status", "synonyms", "gramstain", "url", "ref",
-  "snomed"
+  "status", "synonyms", "gramstain", "oxygen_tolerance",
+  "url", "ref", "snomed"
 ))
 expect_inherits(mo_info(c("Escherichia coli", "Staphylococcus aureus")), "list")

+expect_identical(mo_oxygen_tolerance(c("Klebsiella pneumoniae", "Clostridioides difficile")),
+                 c("aerobe", "anaerobe"))
+
 expect_equal(as.character(table(mo_pathogenicity(example_isolates$mo))),
             c("1561", "422", "1", "16"))

--- a/man/antibiogram.Rd
+++ b/man/antibiogram.Rd
@ -47,7 +47,7 @@ antibiogram(

 \item{antibiotics}{vector of any antibiotic name or code (will be evaluated with \code{\link[=as.ab]{as.ab()}}, column name of \code{x}, or (any combinations of) \link[=antibiotic_class_selectors]{antibiotic selectors} such as \code{\link[=aminoglycosides]{aminoglycosides()}} or \code{\link[=carbapenems]{carbapenems()}}. For combination antibiograms, this can also be set to values separated with \code{"+"}, such as "TZP+TOB" or "cipro + genta", given that columns resembling such antibiotics exist in \code{x}. See \emph{Examples}.}

-\item{mo_transform}{a character to transform microorganism input - must be "name", "shortname", "gramstain", or one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed". Can also be \code{NULL} to not transform the input.}
+\item{mo_transform}{a character to transform microorganism input - must be "name", "shortname", "gramstain", or one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed". Can also be \code{NULL} to not transform the input.}

 \item{ab_transform}{a character to transform antibiotic input - must be one of the column names of the \link{antibiotics} data set: "ab", "cid", "name", "group", "atc", "atc_group1", "atc_group2", "abbreviations", "synonyms", "oral_ddd", "oral_units", "iv_ddd", "iv_units", or "loinc". Can also be \code{NULL} to not transform the input.}

--- a/man/as.mo.Rd
+++ b/man/as.mo.Rd
@ -121,13 +121,14 @@ The coercion rules consider the prevalence of microorganisms in humans, which is
 \item Berends MS \emph{et al.} (2022). \strong{AMR: An R Package for Working with Antimicrobial Resistance Data}. \emph{Journal of Statistical Software}, 104(3), 1-31; \doi{10.18637/jss.v104.i03}
 \item Becker K \emph{et al.} (2014). \strong{Coagulase-Negative Staphylococci.} \emph{Clin Microbiol Rev.} 27(4): 870-926; \doi{10.1128/CMR.00109-13}
 \item Becker K \emph{et al.} (2019). \strong{Implications of identifying the recently defined members of the \emph{S. aureus} complex, \emph{S. argenteus} and \emph{S. schweitzeri}: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).} \emph{Clin Microbiol Infect}; \doi{10.1016/j.cmi.2019.02.028}
-\item Becker K \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
-\item Lancefield RC (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci}. \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571}
-\item Berends MS \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019} \emph{Microorganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801}
+\item Becker K \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci.} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
+\item Lancefield RC (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci.} \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571}
+\item Berends MS \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/} \emph{Micro.rganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801}
 \item Parte, AC \emph{et al.} (2020). \strong{List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.} International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \doi{10.1099/ijsem.0.004332}. Accessed from \url{https://lpsn.dsmz.de} on 11 December, 2022.
 \item GBIF Secretariat (2022). GBIF Backbone Taxonomy. Checklist dataset \doi{10.15468/39omei}. Accessed from \url{https://www.gbif.org} on 11 December, 2022.
 \item Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS). US Edition of SNOMED CT from 1 September 2020. Value Set Name 'Microoganism', OID 2.16.840.1.114222.4.11.1009 (v12). URL: \url{https://phinvads.cdc.gov}
 \item Bartlett A \emph{et al.} (2022). \strong{A comprehensive list of bacterial pathogens infecting humans} \emph{Microbiology} 168:001269; \doi{10.1099/mic.0.001269}
+\item Reimer \emph{et al.} (2022). \strong{\emph{BacDive} in 2022: the knowledge base for standardized bacterial and archaeal data.}  \emph{Nucleic Acids Res.} 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
 }
 }

--- a/man/custom_eucast_rules.Rd
+++ b/man/custom_eucast_rules.Rd
@ -60,7 +60,7 @@ eucast_rules(df, rules = "custom", custom_rules = x, info = FALSE)

 \subsection{Using taxonomic properties in rules}{

-There is one exception in columns used for the rules: all column names of the \link{microorganisms} data set can also be used, but do not have to exist in the data set. These column names are: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", and "snomed". Thus, this next example will work as well, despite the fact that the \code{df} data set does not contain a column \code{genus}:
+There is one exception in columns used for the rules: all column names of the \link{microorganisms} data set can also be used, but do not have to exist in the data set. These column names are: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", and "snomed". Thus, this next example will work as well, despite the fact that the \code{df} data set does not contain a column \code{genus}:

 \if{html}{\out{<div class="sourceCode r">}}\preformatted{y <- custom_eucast_rules(TZP == "S" & genus == "Klebsiella" ~ aminopenicillins == "S",
                         TZP == "R" & genus == "Klebsiella" ~ aminopenicillins == "R")
--- a/man/microorganisms.Rd
+++ b/man/microorganisms.Rd
@ -5,7 +5,7 @@
 \alias{microorganisms}
 \title{Data Set with 52 151 Microorganisms}
 \format{
-A \link[tibble:tibble]{tibble} with 52 151 observations and 22 variables:
+A \link[tibble:tibble]{tibble} with 52 151 observations and 23 variables:
 \itemize{
 \item \code{mo}\cr ID of microorganism as used by this package
 \item \code{fullname}\cr Full name, like \code{"Escherichia coli"}. For the taxonomic ranks genus, species and subspecies, this is the 'pasted' text of genus, species, and subspecies. For all taxonomic ranks higher than genus, this is the name of the taxon.
@ -14,6 +14,7 @@ A \link[tibble:tibble]{tibble} with 52 151 observations and 22 variables:
 \item \code{rank}\cr Text of the taxonomic rank of the microorganism, such as \code{"species"} or \code{"genus"}
 \item \code{ref}\cr Author(s) and year of related scientific publication. This contains only the \emph{first surname} and year of the \emph{latest} authors, e.g. "Wallis \emph{et al.} 2006 \emph{emend.} Smith and Jones 2018" becomes "Smith \emph{et al.}, 2018". This field is directly retrieved from the source specified in the column \code{source}. Moreover, accents were removed to comply with CRAN that only allows ASCII characters, e.g. "Váňová" becomes "Vanova".
 \item \code{lpsn}\cr Identifier ('Record number') of the List of Prokaryotic names with Standing in Nomenclature (LPSN). This will be the first/highest LPSN identifier to keep one identifier per row. For example, \emph{Acetobacter ascendens} has LPSN Record number 7864 and 11011. Only the first is available in the \code{microorganisms} data set.
+\item \code{oxygen_tolerance} \cr Oxygen tolerance, either "aerobe", "anaerobe", "anaerobe/microaerophile", "facultative anaerobe", "likely facultative anaerobe", or "microaerophile". These data were retrieved from BacDive (see \emph{Source}). Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus to guess the oxygen tolerance. Currently 73.4\% of all ~36 000 bacteria in the data set contain an oxygen tolerance.
 \item \code{lpsn_parent}\cr LPSN identifier of the parent taxon
 \item \code{lpsn_renamed_to}\cr LPSN identifier of the currently valid taxon
 \item \code{gbif}\cr Identifier ('taxonID') of the Global Biodiversity Information Facility (GBIF)
@ -31,6 +32,7 @@ A \link[tibble:tibble]{tibble} with 52 151 observations and 22 variables:
 \item Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS). US Edition of SNOMED CT from 1 September 2020. Value Set Name 'Microoganism', OID 2.16.840.1.114222.4.11.1009 (v12). URL: \url{https://phinvads.cdc.gov}
 \item Grimont \emph{et al.} (2007). Antigenic Formulae of the Salmonella Serovars, 9th Edition. WHO Collaborating Centre for Reference and Research on \emph{Salmonella} (WHOCC-SALM).
 \item Bartlett \emph{et al.} (2022). \strong{A comprehensive list of bacterial pathogens infecting humans} \emph{Microbiology} 168:001269; \doi{10.1099/mic.0.001269}
+\item Reimer \emph{et al.} (2022). \strong{\emph{BacDive} in 2022: the knowledge base for standardized bacterial and archaeal data.}  \emph{Nucleic Acids Res.} 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
 }
 }
 \usage{
--- a/man/mo_property.Rd
+++ b/man/mo_property.Rd
@ -22,6 +22,8 @@
 \alias{mo_is_gram_positive}
 \alias{mo_is_yeast}
 \alias{mo_is_intrinsic_resistant}
+\alias{mo_oxygen_tolerance}
+\alias{mo_is_anaerobic}
 \alias{mo_snomed}
 \alias{mo_ref}
 \alias{mo_authors}
@ -177,6 +179,20 @@ mo_is_intrinsic_resistant(
  ...
 )

+mo_oxygen_tolerance(
+  x,
+  language = get_AMR_locale(),
+  keep_synonyms = getOption("AMR_keep_synonyms", FALSE),
+  ...
+)
+
+mo_is_anaerobic(
+  x,
+  language = get_AMR_locale(),
+  keep_synonyms = getOption("AMR_keep_synonyms", FALSE),
+  ...
+)
+
 mo_snomed(
  x,
  language = get_AMR_locale(),
@ -278,7 +294,7 @@ mo_property(

 \item{open}{browse the URL using \code{\link[utils:browseURL]{browseURL()}}}

-\item{property}{one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed", or must be \code{"shortname"}}
+\item{property}{one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed", or must be \code{"shortname"}}
 }
 \value{
 \itemize{
@ -313,6 +329,8 @@ Determination of yeasts (\code{\link[=mo_is_yeast]{mo_is_yeast()}}) will be base

 Determination of intrinsic resistance (\code{\link[=mo_is_intrinsic_resistant]{mo_is_intrinsic_resistant()}}) will be based on the \link{intrinsic_resistant} data set, which is based on \href{https://www.eucast.org/expert_rules_and_expected_phenotypes/}{'EUCAST Expert Rules' and 'EUCAST Intrinsic Resistance and Unusual Phenotypes' v3.3} (2021). The \code{\link[=mo_is_intrinsic_resistant]{mo_is_intrinsic_resistant()}} function can be vectorised over both argument \code{x} (input for microorganisms) and \code{ab} (input for antibiotics).

+Determination of bacterial oxygen tolerance (\code{\link[=mo_oxygen_tolerance]{mo_oxygen_tolerance()}}) will be based on BacDive, see \emph{Source}. The function \code{\link[=mo_is_anaerobic]{mo_is_anaerobic()}} only returns \code{TRUE} if the oxygen tolerance is \code{"anaerobe"}, indicting an obligate anaerobic species or genus. It always returns \code{FALSE} for species outside the taxonomic kingdom of Bacteria.
+
 The function \code{\link[=mo_url]{mo_url()}} will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species.

 SNOMED codes (\code{\link[=mo_snomed]{mo_snomed()}}) are from the version of 1 July, 2021. See \emph{Source} and the \link{microorganisms} data set for more info.
@ -332,13 +350,14 @@ This function uses \code{\link[=as.mo]{as.mo()}} internally, which uses an advan
 \item Berends MS \emph{et al.} (2022). \strong{AMR: An R Package for Working with Antimicrobial Resistance Data}. \emph{Journal of Statistical Software}, 104(3), 1-31; \doi{10.18637/jss.v104.i03}
 \item Becker K \emph{et al.} (2014). \strong{Coagulase-Negative Staphylococci.} \emph{Clin Microbiol Rev.} 27(4): 870-926; \doi{10.1128/CMR.00109-13}
 \item Becker K \emph{et al.} (2019). \strong{Implications of identifying the recently defined members of the \emph{S. aureus} complex, \emph{S. argenteus} and \emph{S. schweitzeri}: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).} \emph{Clin Microbiol Infect}; \doi{10.1016/j.cmi.2019.02.028}
-\item Becker K \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
-\item Lancefield RC (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci}. \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571}
-\item Berends MS \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019} \emph{Microorganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801}
+\item Becker K \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci.} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
+\item Lancefield RC (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci.} \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571}
+\item Berends MS \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/} \emph{Micro.rganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801}
 \item Parte, AC \emph{et al.} (2020). \strong{List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.} International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \doi{10.1099/ijsem.0.004332}. Accessed from \url{https://lpsn.dsmz.de} on 11 December, 2022.
 \item GBIF Secretariat (2022). GBIF Backbone Taxonomy. Checklist dataset \doi{10.15468/39omei}. Accessed from \url{https://www.gbif.org} on 11 December, 2022.
 \item Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS). US Edition of SNOMED CT from 1 September 2020. Value Set Name 'Microoganism', OID 2.16.840.1.114222.4.11.1009 (v12). URL: \url{https://phinvads.cdc.gov}
 \item Bartlett A \emph{et al.} (2022). \strong{A comprehensive list of bacterial pathogens infecting humans} \emph{Microbiology} 168:001269; \doi{10.1099/mic.0.001269}
+\item Reimer \emph{et al.} (2022). \strong{\emph{BacDive} in 2022: the knowledge base for standardized bacterial and archaeal data.}  \emph{Nucleic Acids Res.} 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
 }
 }