1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-10 03:02:05 +02:00

add oxygen tolerance

This commit is contained in:
2023-05-11 21:56:27 +02:00
parent bf08d136a0
commit 91fa73dedf
28 changed files with 52310 additions and 52203 deletions

View File

@ -505,7 +505,7 @@ word_wrap <- function(...,
# clean introduced whitespace between fullstops
msg <- gsub("[.] +[.]", "..", msg)
# remove extra space that was introduced (e.g. "Smith et al., 2022")
# remove extra space that was introduced (e.g. "Smith et al. , 2022")
msg <- gsub(". ,", ".,", msg, fixed = TRUE)
msg <- gsub("[ ,", "[,", msg, fixed = TRUE)
msg <- gsub("/ /", "//", msg, fixed = TRUE)

View File

@ -247,19 +247,14 @@ add_custom_microorganisms <- function(x) {
"CUSTOM",
seq.int(from = current + 1, to = current + nrow(x), by = 1),
"_",
toupper(unname(abbreviate(
gsub(
" +", " _ ",
gsub(
"[^A-Za-z0-9-]", " ",
trimws2(paste(x$genus, x$species, x$subspecies))
)
),
minlength = 10
)))
)
trimws(
paste(abbreviate_mo(x$genus, 5),
abbreviate_mo(x$species, 4, hyphen_as_space = TRUE),
abbreviate_mo(x$subspecies, 4, hyphen_as_space = TRUE),
sep = "_"),
whitespace = "_"))
stop_if(anyDuplicated(c(as.character(AMR_env$MO_lookup$mo), x$mo)), "MO codes must be unique and not match existing MO codes of the AMR package")
# add to package ----
AMR_env$custom_mo_codes <- c(AMR_env$custom_mo_codes, x$mo)
class(AMR_env$MO_lookup$mo) <- "character"
@ -306,3 +301,26 @@ clear_custom_microorganisms <- function() {
AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, , drop = FALSE]
message_("Cleared ", nr2char(n - n2), " custom record", ifelse(n - n2 > 1, "s", ""), " from the internal `microorganisms` data set.")
}
abbreviate_mo <- function(x, minlength = 5, prefix = "", hyphen_as_space = FALSE, ...) {
if (hyphen_as_space == TRUE) {
x <- gsub("-", " ", x, fixed = TRUE)
}
# keep a starting Latin ae
suppressWarnings(
gsub("(\u00C6|\u00E6)+",
"AE",
toupper(
paste0(prefix,
abbreviate(
gsub("^ae",
"\u00E6\u00E6",
x,
ignore.case = TRUE),
minlength = minlength,
use.classes = TRUE,
method = "both.sides",
...
))))
)
}

View File

@ -93,6 +93,7 @@
#' - `rank`\cr Text of the taxonomic rank of the microorganism, such as `"species"` or `"genus"`
#' - `ref`\cr Author(s) and year of related scientific publication. This contains only the *first surname* and year of the *latest* authors, e.g. "Wallis *et al.* 2006 *emend.* Smith and Jones 2018" becomes "Smith *et al.*, 2018". This field is directly retrieved from the source specified in the column `source`. Moreover, accents were removed to comply with CRAN that only allows ASCII characters, e.g. "V`r "\u00e1\u0148ov\u00e1"`" becomes "Vanova".
#' - `lpsn`\cr Identifier ('Record number') of the List of Prokaryotic names with Standing in Nomenclature (LPSN). This will be the first/highest LPSN identifier to keep one identifier per row. For example, *Acetobacter ascendens* has LPSN Record number 7864 and 11011. Only the first is available in the `microorganisms` data set.
#' - `oxygen_tolerance` \cr Oxygen tolerance, either `r vector_or(microorganisms$oxygen_tolerance)`. These data were retrieved from BacDive (see *Source*). Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus to guess the oxygen tolerance. Currently `r round(length(microorganisms$oxygen_tolerance[which(!is.na(microorganisms$oxygen_tolerance))]) / nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]) * 100, 1)`% of all `r format_included_data_number(nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]))` bacteria in the data set contain an oxygen tolerance.
#' - `lpsn_parent`\cr LPSN identifier of the parent taxon
#' - `lpsn_renamed_to`\cr LPSN identifier of the currently valid taxon
#' - `gbif`\cr Identifier ('taxonID') of the Global Biodiversity Information Facility (GBIF)
@ -145,6 +146,8 @@
#' * Grimont *et al.* (2007). Antigenic Formulae of the Salmonella Serovars, 9th Edition. WHO Collaborating Centre for Reference and Research on *Salmonella* (WHOCC-SALM).
#'
#' * Bartlett *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269}
#'
#' * Reimer *et al.* (2022). ***BacDive* in 2022: the knowledge base for standardized bacterial and archaeal data.** *Nucleic Acids Res.* 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
#' @seealso [as.mo()], [mo_property()], [microorganisms.codes], [intrinsic_resistant]
#' @examples
#' microorganisms

9
R/mo.R
View File

@ -95,13 +95,14 @@
#' 1. Berends MS *et al.* (2022). **AMR: An R Package for Working with Antimicrobial Resistance Data**. *Journal of Statistical Software*, 104(3), 1-31; \doi{10.18637/jss.v104.i03}
#' 2. Becker K *et al.* (2014). **Coagulase-Negative Staphylococci.** *Clin Microbiol Rev.* 27(4): 870-926; \doi{10.1128/CMR.00109-13}
#' 3. Becker K *et al.* (2019). **Implications of identifying the recently defined members of the *S. aureus* complex, *S. argenteus* and *S. schweitzeri*: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).** *Clin Microbiol Infect*; \doi{10.1016/j.cmi.2019.02.028}
#' 4. Becker K *et al.* (2020). **Emergence of coagulase-negative staphylococci** *Expert Rev Anti Infect Ther.* 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci**. *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571}
#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019** *Microorganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801}
#' 4. Becker K *et al.* (2020). **Emergence of coagulase-negative staphylococci.** *Expert Rev Anti Infect Ther.* 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci.** *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571}
#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/** *Micro.rganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801}
#' 7. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`.
#' 8. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`.
#' 9. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`>
#' 10. Bartlett A *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269}
#' 11. Reimer *et al.* (2022). ***BacDive* in 2022: the knowledge base for standardized bacterial and archaeal data.** *Nucleic Acids Res.* 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
#' @export
#' @return A [character] [vector] with additional class [`mo`]
#' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's.
@ -888,8 +889,6 @@ print.mo_uncertainties <- function(x, n = 10, ...) {
),
collapse = "\n"
),
# Add "Based on {input}" text if it differs from the original input
ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
# Add note if result was coerced to accepted taxonomic name
ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR_env$MO_lookup$mo[which(AMR_env$MO_lookup$status == "synonym")],
paste0(

View File

@ -53,6 +53,8 @@
#' Determination of yeasts ([mo_is_yeast()]) will be based on the taxonomic kingdom and class. *Budding yeasts* are fungi of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes). *True yeasts* are aggregated into the underlying order Saccharomycetales. Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`).
#'
#' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) will be based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.3)`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antibiotics).
#'
#' Determination of bacterial oxygen tolerance ([mo_oxygen_tolerance()]) will be based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicting an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria.
#'
#' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species.
#'
@ -589,6 +591,40 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), keep_s
paste(x, ab) %in% AMR_env$intrinsic_resistant
}
#' @rdname mo_property
#' @export
mo_oxygen_tolerance <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
if (missing(x)) {
# this tries to find the data and an 'mo' column
x <- find_mo_col(fn = "mo_oxygen_tolerance")
}
meet_criteria(x, allow_NA = TRUE)
language <- validate_language(language)
meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
mo_validate(x = x, property = "oxygen_tolerance", language = language, keep_synonyms = keep_synonyms, ...)
}
#' @rdname mo_property
#' @export
mo_is_anaerobic <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
if (missing(x)) {
# this tries to find the data and an 'mo' column
x <- find_mo_col(fn = "mo_is_anaerobic")
}
meet_criteria(x, allow_NA = TRUE)
language <- validate_language(language)
meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
x.mo <- as.mo(x, language = language, keep_synonyms = keep_synonyms, ...)
metadata <- get_mo_uncertainties()
oxygen <- mo_oxygen_tolerance(x.mo, language = NULL, keep_synonyms = keep_synonyms)
load_mo_uncertainties(metadata)
out <- oxygen == "anaerobe" & !is.na(oxygen)
out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA
out
}
#' @rdname mo_property
#' @export
mo_snomed <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
@ -791,9 +827,12 @@ mo_info <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A
status = mo_status(y, language = language, keep_synonyms = keep_synonyms),
synonyms = mo_synonyms(y, keep_synonyms = keep_synonyms),
gramstain = mo_gramstain(y, language = language, keep_synonyms = keep_synonyms),
oxygen_tolerance = mo_oxygen_tolerance(y, language = language, keep_synonyms = keep_synonyms),
url = unname(mo_url(y, open = FALSE, keep_synonyms = keep_synonyms)),
ref = mo_ref(y, keep_synonyms = keep_synonyms),
snomed = unlist(mo_snomed(y, keep_synonyms = keep_synonyms))
snomed = unlist(mo_snomed(y, keep_synonyms = keep_synonyms)),
lpsn = mo_lpsn(y, language = language, keep_synonyms = keep_synonyms),
gbif = mo_gbif(y, language = language, keep_synonyms = keep_synonyms)
)
)
})

Binary file not shown.