Add oxygen tolerance

This commit is contained in:
Dr. Matthijs Berends 2023-05-12 10:37:07 +02:00 committed by GitHub
commit 5f9769a4f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 52323 additions and 52212 deletions

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 2.0.0.9014
Date: 2023-05-08
Version: 2.0.0.9015
Date: 2023-05-12
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
data analysis and to work with microbial and antimicrobial properties by

View File

@ -330,6 +330,7 @@ export(mo_gbif)
export(mo_genus)
export(mo_gramstain)
export(mo_info)
export(mo_is_anaerobic)
export(mo_is_gram_negative)
export(mo_is_gram_positive)
export(mo_is_intrinsic_resistant)
@ -339,6 +340,7 @@ export(mo_lpsn)
export(mo_matching_score)
export(mo_name)
export(mo_order)
export(mo_oxygen_tolerance)
export(mo_pathogenicity)
export(mo_phylum)
export(mo_property)

View File

@ -1,11 +1,16 @@
# AMR 2.0.0.9014
# AMR 2.0.0.9015
## Changed
* Added oxygen tolerance to over 25,000 bacteria in the `microorganisms` data set
* Added `mo_oxygen_tolerance()` to retrieve the values
* Added `mo_is_anaerobic()` to determine which species are obligate anaerobic bacteria
* Added LPSN and GBIF identifiers, and oxygen tolerance to `mo_info()`
* formatting fix for `sir_interpretation_history()`
* Fixed some WHONET codes for microorganisms and consequently a couple of entries in `clinical_breakpoints`
* Added microbial codes for Gram-negative/positive anaerobic bacteria
* `mo_rank()` now returns `NA` for 'unknown' microorganisms (`B_ANAER`, `B_ANAER-NEG`, `B_ANAER-POS`, `B_GRAMN`, `B_GRAMP`, `F_FUNGUS`, `F_YEAST`, and `UNKNOWN`)
* Fixed a bug for `as.mo()` that led to coercion of `NA` values when using custom microorganism codes
* Added SAS Transport files (file extension `.xpt`) to [our download page](https://msberends.github.io/AMR/articles/datasets.html) to use in SAS software
# AMR 2.0.0

View File

@ -505,7 +505,7 @@ word_wrap <- function(...,
# clean introduced whitespace between fullstops
msg <- gsub("[.] +[.]", "..", msg)
# remove extra space that was introduced (e.g. "Smith et al., 2022")
# remove extra space that was introduced (e.g. "Smith et al. , 2022")
msg <- gsub(". ,", ".,", msg, fixed = TRUE)
msg <- gsub("[ ,", "[,", msg, fixed = TRUE)
msg <- gsub("/ /", "//", msg, fixed = TRUE)

View File

@ -247,19 +247,14 @@ add_custom_microorganisms <- function(x) {
"CUSTOM",
seq.int(from = current + 1, to = current + nrow(x), by = 1),
"_",
toupper(unname(abbreviate(
gsub(
" +", " _ ",
gsub(
"[^A-Za-z0-9-]", " ",
trimws2(paste(x$genus, x$species, x$subspecies))
)
),
minlength = 10
)))
)
trimws(
paste(abbreviate_mo(x$genus, 5),
abbreviate_mo(x$species, 4, hyphen_as_space = TRUE),
abbreviate_mo(x$subspecies, 4, hyphen_as_space = TRUE),
sep = "_"),
whitespace = "_"))
stop_if(anyDuplicated(c(as.character(AMR_env$MO_lookup$mo), x$mo)), "MO codes must be unique and not match existing MO codes of the AMR package")
# add to package ----
AMR_env$custom_mo_codes <- c(AMR_env$custom_mo_codes, x$mo)
class(AMR_env$MO_lookup$mo) <- "character"
@ -306,3 +301,26 @@ clear_custom_microorganisms <- function() {
AMR_env$mo_uncertainties <- AMR_env$mo_uncertainties[0, , drop = FALSE]
message_("Cleared ", nr2char(n - n2), " custom record", ifelse(n - n2 > 1, "s", ""), " from the internal `microorganisms` data set.")
}
abbreviate_mo <- function(x, minlength = 5, prefix = "", hyphen_as_space = FALSE, ...) {
if (hyphen_as_space == TRUE) {
x <- gsub("-", " ", x, fixed = TRUE)
}
# keep a starting Latin ae
suppressWarnings(
gsub("(\u00C6|\u00E6)+",
"AE",
toupper(
paste0(prefix,
abbreviate(
gsub("^ae",
"\u00E6\u00E6",
x,
ignore.case = TRUE),
minlength = minlength,
use.classes = TRUE,
method = "both.sides",
...
))))
)
}

View File

@ -93,6 +93,7 @@
#' - `rank`\cr Text of the taxonomic rank of the microorganism, such as `"species"` or `"genus"`
#' - `ref`\cr Author(s) and year of related scientific publication. This contains only the *first surname* and year of the *latest* authors, e.g. "Wallis *et al.* 2006 *emend.* Smith and Jones 2018" becomes "Smith *et al.*, 2018". This field is directly retrieved from the source specified in the column `source`. Moreover, accents were removed to comply with CRAN that only allows ASCII characters, e.g. "V`r "\u00e1\u0148ov\u00e1"`" becomes "Vanova".
#' - `lpsn`\cr Identifier ('Record number') of the List of Prokaryotic names with Standing in Nomenclature (LPSN). This will be the first/highest LPSN identifier to keep one identifier per row. For example, *Acetobacter ascendens* has LPSN Record number 7864 and 11011. Only the first is available in the `microorganisms` data set.
#' - `oxygen_tolerance` \cr Oxygen tolerance, either `r vector_or(microorganisms$oxygen_tolerance)`. These data were retrieved from BacDive (see *Source*). Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus to guess the oxygen tolerance. Currently `r round(length(microorganisms$oxygen_tolerance[which(!is.na(microorganisms$oxygen_tolerance))]) / nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]) * 100, 1)`% of all `r format_included_data_number(nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]))` bacteria in the data set contain an oxygen tolerance.
#' - `lpsn_parent`\cr LPSN identifier of the parent taxon
#' - `lpsn_renamed_to`\cr LPSN identifier of the currently valid taxon
#' - `gbif`\cr Identifier ('taxonID') of the Global Biodiversity Information Facility (GBIF)
@ -145,6 +146,8 @@
#' * Grimont *et al.* (2007). Antigenic Formulae of the Salmonella Serovars, 9th Edition. WHO Collaborating Centre for Reference and Research on *Salmonella* (WHOCC-SALM).
#'
#' * Bartlett *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269}
#'
#' * Reimer *et al.* (2022). ***BacDive* in 2022: the knowledge base for standardized bacterial and archaeal data.** *Nucleic Acids Res.* 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
#' @seealso [as.mo()], [mo_property()], [microorganisms.codes], [intrinsic_resistant]
#' @examples
#' microorganisms

9
R/mo.R
View File

@ -95,13 +95,14 @@
#' 1. Berends MS *et al.* (2022). **AMR: An R Package for Working with Antimicrobial Resistance Data**. *Journal of Statistical Software*, 104(3), 1-31; \doi{10.18637/jss.v104.i03}
#' 2. Becker K *et al.* (2014). **Coagulase-Negative Staphylococci.** *Clin Microbiol Rev.* 27(4): 870-926; \doi{10.1128/CMR.00109-13}
#' 3. Becker K *et al.* (2019). **Implications of identifying the recently defined members of the *S. aureus* complex, *S. argenteus* and *S. schweitzeri*: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).** *Clin Microbiol Infect*; \doi{10.1016/j.cmi.2019.02.028}
#' 4. Becker K *et al.* (2020). **Emergence of coagulase-negative staphylococci** *Expert Rev Anti Infect Ther.* 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci**. *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571}
#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019** *Microorganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801}
#' 4. Becker K *et al.* (2020). **Emergence of coagulase-negative staphylococci.** *Expert Rev Anti Infect Ther.* 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci.** *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571}
#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/** *Micro.rganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801}
#' 7. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`.
#' 8. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`.
#' 9. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`>
#' 10. Bartlett A *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269}
#' 11. Reimer *et al.* (2022). ***BacDive* in 2022: the knowledge base for standardized bacterial and archaeal data.** *Nucleic Acids Res.* 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
#' @export
#' @return A [character] [vector] with additional class [`mo`]
#' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's.
@ -888,8 +889,6 @@ print.mo_uncertainties <- function(x, n = 10, ...) {
),
collapse = "\n"
),
# Add "Based on {input}" text if it differs from the original input
ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
# Add note if result was coerced to accepted taxonomic name
ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR_env$MO_lookup$mo[which(AMR_env$MO_lookup$status == "synonym")],
paste0(

View File

@ -53,6 +53,8 @@
#' Determination of yeasts ([mo_is_yeast()]) will be based on the taxonomic kingdom and class. *Budding yeasts* are fungi of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes). *True yeasts* are aggregated into the underlying order Saccharomycetales. Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`).
#'
#' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) will be based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.3)`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antibiotics).
#'
#' Determination of bacterial oxygen tolerance ([mo_oxygen_tolerance()]) will be based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicting an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria.
#'
#' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species.
#'
@ -589,6 +591,40 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), keep_s
paste(x, ab) %in% AMR_env$intrinsic_resistant
}
#' @rdname mo_property
#' @export
mo_oxygen_tolerance <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
if (missing(x)) {
# this tries to find the data and an 'mo' column
x <- find_mo_col(fn = "mo_oxygen_tolerance")
}
meet_criteria(x, allow_NA = TRUE)
language <- validate_language(language)
meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
mo_validate(x = x, property = "oxygen_tolerance", language = language, keep_synonyms = keep_synonyms, ...)
}
#' @rdname mo_property
#' @export
mo_is_anaerobic <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
if (missing(x)) {
# this tries to find the data and an 'mo' column
x <- find_mo_col(fn = "mo_is_anaerobic")
}
meet_criteria(x, allow_NA = TRUE)
language <- validate_language(language)
meet_criteria(keep_synonyms, allow_class = "logical", has_length = 1)
x.mo <- as.mo(x, language = language, keep_synonyms = keep_synonyms, ...)
metadata <- get_mo_uncertainties()
oxygen <- mo_oxygen_tolerance(x.mo, language = NULL, keep_synonyms = keep_synonyms)
load_mo_uncertainties(metadata)
out <- oxygen == "anaerobe" & !is.na(oxygen)
out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA
out
}
#' @rdname mo_property
#' @export
mo_snomed <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("AMR_keep_synonyms", FALSE), ...) {
@ -791,9 +827,12 @@ mo_info <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A
status = mo_status(y, language = language, keep_synonyms = keep_synonyms),
synonyms = mo_synonyms(y, keep_synonyms = keep_synonyms),
gramstain = mo_gramstain(y, language = language, keep_synonyms = keep_synonyms),
oxygen_tolerance = mo_oxygen_tolerance(y, language = language, keep_synonyms = keep_synonyms),
url = unname(mo_url(y, open = FALSE, keep_synonyms = keep_synonyms)),
ref = mo_ref(y, keep_synonyms = keep_synonyms),
snomed = unlist(mo_snomed(y, keep_synonyms = keep_synonyms))
snomed = unlist(mo_snomed(y, keep_synonyms = keep_synonyms)),
lpsn = mo_lpsn(y, language = language, keep_synonyms = keep_synonyms),
gbif = mo_gbif(y, language = language, keep_synonyms = keep_synonyms)
)
)
})

Binary file not shown.

View File

@ -366,7 +366,7 @@ if (changed_md5(clin_break)) {
write_md5(clin_break)
try(saveRDS(clin_break, "data-raw/clinical_breakpoints.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(clin_break, "data-raw/clinical_breakpoints.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(clin_break, "data-raw/clinical_breakpoints.sas"), silent = TRUE)
try(haven::write_xpt(clin_break, "data-raw/clinical_breakpoints.xpt"), silent = TRUE)
try(haven::write_sav(clin_break, "data-raw/clinical_breakpoints.sav"), silent = TRUE)
try(haven::write_dta(clin_break, "data-raw/clinical_breakpoints.dta"), silent = TRUE)
try(openxlsx::write.xlsx(clin_break, "data-raw/clinical_breakpoints.xlsx"), silent = TRUE)
@ -382,7 +382,7 @@ if (changed_md5(microorganisms)) {
mo <- microorganisms
mo$snomed <- max_50_snomed
mo <- dplyr::mutate_if(mo, ~ !is.numeric(.), as.character)
try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE)
try(haven::write_xpt(mo, "data-raw/microorganisms.xpt"), silent = TRUE)
try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE)
try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE)
mo_all_snomed <- microorganisms %>% mutate_if(is.list, function(x) sapply(x, paste, collapse = ","))
@ -397,7 +397,7 @@ if (changed_md5(ab)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('antibiotics')} to {usethis::ui_value('data-raw/')}"))
write_md5(ab)
try(saveRDS(antibiotics, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE)
try(haven::write_sas(ab, "data-raw/antibiotics.sas"), silent = TRUE)
try(haven::write_xpt(ab, "data-raw/antibiotics.xpt"), silent = TRUE)
try(haven::write_sav(ab, "data-raw/antibiotics.sav"), silent = TRUE)
try(haven::write_dta(ab, "data-raw/antibiotics.dta"), silent = TRUE)
ab_lists <- antibiotics %>% mutate_if(is.list, function(x) sapply(x, paste, collapse = ","))
@ -412,7 +412,7 @@ if (changed_md5(av)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('antivirals')} to {usethis::ui_value('data-raw/')}"))
write_md5(av)
try(saveRDS(antivirals, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE)
try(haven::write_sas(av, "data-raw/antivirals.sas"), silent = TRUE)
try(haven::write_xpt(av, "data-raw/antivirals.xpt"), silent = TRUE)
try(haven::write_sav(av, "data-raw/antivirals.sav"), silent = TRUE)
try(haven::write_dta(av, "data-raw/antivirals.dta"), silent = TRUE)
av_lists <- antivirals %>% mutate_if(is.list, function(x) sapply(x, paste, collapse = ","))
@ -433,7 +433,7 @@ if (changed_md5(intrinsicR)) {
write_md5(intrinsicR)
try(saveRDS(intrinsicR, "data-raw/intrinsic_resistant.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(intrinsicR, "data-raw/intrinsic_resistant.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(intrinsicR, "data-raw/intrinsic_resistant.sas"), silent = TRUE)
try(haven::write_xpt(intrinsicR, "data-raw/intrinsic_resistant.xpt"), silent = TRUE)
try(haven::write_sav(intrinsicR, "data-raw/intrinsic_resistant.sav"), silent = TRUE)
try(haven::write_dta(intrinsicR, "data-raw/intrinsic_resistant.dta"), silent = TRUE)
try(openxlsx::write.xlsx(intrinsicR, "data-raw/intrinsic_resistant.xlsx"), silent = TRUE)
@ -446,7 +446,7 @@ if (changed_md5(dosage)) {
write_md5(dosage)
try(saveRDS(dosage, "data-raw/dosage.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(dosage, "data-raw/dosage.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(dosage, "data-raw/dosage.sas"), silent = TRUE)
try(haven::write_xpt(dosage, "data-raw/dosage.xpt"), silent = TRUE)
try(haven::write_sav(dosage, "data-raw/dosage.sav"), silent = TRUE)
try(haven::write_dta(dosage, "data-raw/dosage.dta"), silent = TRUE)
try(openxlsx::write.xlsx(dosage, "data-raw/dosage.xlsx"), silent = TRUE)

BIN
data-raw/antibiotics.xpt Normal file

Binary file not shown.

BIN
data-raw/antivirals.xpt Normal file

Binary file not shown.

Binary file not shown.

BIN
data-raw/dosage.xpt Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1 +1 @@
20bb7a68431826bce777a6c239f0fed0
63cc9e5166dc50c7b474bb809557c392

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

BIN
data-raw/microorganisms.xpt Normal file

Binary file not shown.

View File

@ -1342,11 +1342,13 @@ bacdive <- vroom::vroom("data-raw/bacdive.csv", skip = 2) %>%
bacdive <- bacdive %>%
# fill in missing species from previous rows
mutate(species = ifelse(is.na(species), lag(species), species)) %>%
filter(!is.na(species), !is.na(oxygen), oxygen %unlike% "tolerant")
filter(!is.na(species), !is.na(oxygen), oxygen %unlike% "tolerant", species %unlike% "unclassified") %>%
mutate(mo = as.mo(species, keep_synonyms = FALSE))
bacdive <- bacdive %>%
# now determine type per species
group_by(species) %>%
summarise(oxygen_tolerance = case_when(any(oxygen %like% "facultative") ~ "facultative anaerobe",
group_by(mo) %>%
summarise(species = first(species),
oxygen_tolerance = case_when(any(oxygen %like% "facultative") ~ "facultative anaerobe",
all(oxygen == "microaerophile") ~ "microaerophile",
all(oxygen %in% c("anaerobe", "obligate anaerobe")) ~ "anaerobe",
all(oxygen %in% c("anaerobe", "obligate anaerobe", "microaerophile")) ~ "anaerobe/microaerophile",
@ -1354,10 +1356,25 @@ bacdive <- bacdive %>%
all(!oxygen %in% c("anaerobe", "obligate anaerobe")) ~ "aerobe",
all(c("aerobe", "anaerobe") %in% oxygen) ~ "facultative anaerobe",
TRUE ~ NA_character_))
# now find all synonyms and copy them from their current taxonomic names
synonyms <- as.mo(unique(unlist(mo_synonyms(bacdive$mo, keep_synonyms = TRUE))),
keep_synonyms = TRUE)
syns <- tibble(species = synonyms,
mo = synonyms %>% mo_current() %>% as.mo()) %>%
filter(species != mo) %>%
mutate(species = mo_name(species, keep_synonyms = TRUE)) %>%
left_join(bacdive %>% select(mo, oxygen_tolerance)) %>%
# set mo to mo of the synonym
mutate(mo = as.mo(species, keep_synonyms = TRUE)) %>%
select(all_of(colnames(bacdive)))
bacdive <- bacdive %>%
bind_rows(syns) %>%
distinct()
bacdive_genus <- bacdive %>%
mutate(genus = gsub("^([A-Za-z]+) .*", "\\1", species), oxygen = oxygen_tolerance) %>%
group_by(species = genus) %>%
mutate(oxygen = oxygen_tolerance) %>%
group_by(species = mo_genus(mo)) %>%
summarise(oxygen_tolerance = case_when(any(oxygen == "facultative anaerobe") ~ "facultative anaerobe",
any(oxygen == "anaerobe/microaerophile") ~ "anaerobe/microaerophile",
all(oxygen == "microaerophile") ~ "microaerophile",
@ -1369,7 +1386,7 @@ bacdive <- bacdive %>%
filter(species %unlike% " sp[.]") %>%
bind_rows(bacdive_genus) %>%
arrange(species) %>%
mutate(mo = as.mo(species, keep_synonyms = FALSE))
mutate(mo = as.mo(species, keep_synonyms = TRUE))
other_species <- microorganisms %>%
filter(kingdom == "Bacteria", rank == "species", !mo %in% bacdive$mo, genus %in% bacdive$species) %>%
@ -1378,22 +1395,20 @@ other_species <- microorganisms %>%
mutate(oxygen_tolerance = ifelse(oxygen_tolerance %in% c("aerobe", "anaerobe", "microaerophile", "anaerobe/microaerophile"),
oxygen_tolerance,
paste("likely", oxygen_tolerance))) %>%
select(species, oxygen_tolerance, mo = mo2)
select(species, oxygen_tolerance, mo = mo2) %>%
distinct(species, .keep_all = TRUE)
bacdive <- bacdive %>%
bind_rows(other_species) %>%
arrange(species)
arrange(species) %>%
distinct(mo, .keep_all = TRUE) %>%
select(-species)
taxonomy <- taxonomy %>%
left_join(
bacdive %>%
select(-species),
by = "mo") %>%
left_join(bacdive, by = "mo") %>%
relocate(oxygen_tolerance, .after = ref)
# TODO look up synonyms and fill them in as well
# Clean data set ----------------------------------------------------------
# format to tibble and check again for invalid characters

Binary file not shown.

View File

@ -41,3 +41,6 @@ suppressMessages(
expect_identical(as.character(as.mo("ENT_ASB_CLO")), "ENT_ASB_CLO")
expect_identical(mo_name("ENT_ASB_CLO"), "Enterobacter asburiae/cloacae")
expect_identical(mo_gramstain("ENT_ASB_CLO", language = NULL), "Gram-negative")
expect_identical(paste("B", AMR:::abbreviate_mo("Klebsiella"), AMR:::abbreviate_mo("pneumoniae", 4), sep = "_"),
as.character(as.mo("Klebsiella pneumoniae")))

View File

@ -96,13 +96,15 @@ expect_true(length(mo_synonyms("Candida albicans")) > 1)
expect_inherits(mo_synonyms(c("Candida albicans", "Escherichia coli")), "list")
expect_equal(names(mo_info("Escherichia coli")), c(
"mo",
"kingdom", "phylum", "class", "order",
"family", "genus", "species", "subspecies",
"status", "synonyms", "gramstain", "url", "ref",
"snomed"
"kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies",
"status", "synonyms", "gramstain", "oxygen_tolerance",
"url", "ref", "snomed", "lpsn", "gbif"
))
expect_inherits(mo_info(c("Escherichia coli", "Staphylococcus aureus")), "list")
expect_identical(mo_oxygen_tolerance(c("Klebsiella pneumoniae", "Clostridioides difficile")),
c("aerobe", "anaerobe"))
expect_equal(as.character(table(mo_pathogenicity(example_isolates$mo))),
c("1561", "422", "1", "16"))

View File

@ -47,7 +47,7 @@ antibiogram(
\item{antibiotics}{vector of any antibiotic name or code (will be evaluated with \code{\link[=as.ab]{as.ab()}}, column name of \code{x}, or (any combinations of) \link[=antibiotic_class_selectors]{antibiotic selectors} such as \code{\link[=aminoglycosides]{aminoglycosides()}} or \code{\link[=carbapenems]{carbapenems()}}. For combination antibiograms, this can also be set to values separated with \code{"+"}, such as "TZP+TOB" or "cipro + genta", given that columns resembling such antibiotics exist in \code{x}. See \emph{Examples}.}
\item{mo_transform}{a character to transform microorganism input - must be "name", "shortname", "gramstain", or one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed". Can also be \code{NULL} to not transform the input.}
\item{mo_transform}{a character to transform microorganism input - must be "name", "shortname", "gramstain", or one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed". Can also be \code{NULL} to not transform the input.}
\item{ab_transform}{a character to transform antibiotic input - must be one of the column names of the \link{antibiotics} data set: "ab", "cid", "name", "group", "atc", "atc_group1", "atc_group2", "abbreviations", "synonyms", "oral_ddd", "oral_units", "iv_ddd", "iv_units", or "loinc". Can also be \code{NULL} to not transform the input.}

View File

@ -121,13 +121,14 @@ The coercion rules consider the prevalence of microorganisms in humans, which is
\item Berends MS \emph{et al.} (2022). \strong{AMR: An R Package for Working with Antimicrobial Resistance Data}. \emph{Journal of Statistical Software}, 104(3), 1-31; \doi{10.18637/jss.v104.i03}
\item Becker K \emph{et al.} (2014). \strong{Coagulase-Negative Staphylococci.} \emph{Clin Microbiol Rev.} 27(4): 870-926; \doi{10.1128/CMR.00109-13}
\item Becker K \emph{et al.} (2019). \strong{Implications of identifying the recently defined members of the \emph{S. aureus} complex, \emph{S. argenteus} and \emph{S. schweitzeri}: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).} \emph{Clin Microbiol Infect}; \doi{10.1016/j.cmi.2019.02.028}
\item Becker K \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
\item Lancefield RC (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci}. \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571}
\item Berends MS \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019} \emph{Microorganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801}
\item Becker K \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci.} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
\item Lancefield RC (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci.} \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571}
\item Berends MS \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/} \emph{Micro.rganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801}
\item Parte, AC \emph{et al.} (2020). \strong{List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.} International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \doi{10.1099/ijsem.0.004332}. Accessed from \url{https://lpsn.dsmz.de} on 11 December, 2022.
\item GBIF Secretariat (2022). GBIF Backbone Taxonomy. Checklist dataset \doi{10.15468/39omei}. Accessed from \url{https://www.gbif.org} on 11 December, 2022.
\item Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS). US Edition of SNOMED CT from 1 September 2020. Value Set Name 'Microoganism', OID 2.16.840.1.114222.4.11.1009 (v12). URL: \url{https://phinvads.cdc.gov}
\item Bartlett A \emph{et al.} (2022). \strong{A comprehensive list of bacterial pathogens infecting humans} \emph{Microbiology} 168:001269; \doi{10.1099/mic.0.001269}
\item Reimer \emph{et al.} (2022). \strong{\emph{BacDive} in 2022: the knowledge base for standardized bacterial and archaeal data.} \emph{Nucleic Acids Res.} 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
}
}

View File

@ -60,7 +60,7 @@ eucast_rules(df, rules = "custom", custom_rules = x, info = FALSE)
\subsection{Using taxonomic properties in rules}{
There is one exception in columns used for the rules: all column names of the \link{microorganisms} data set can also be used, but do not have to exist in the data set. These column names are: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", and "snomed". Thus, this next example will work as well, despite the fact that the \code{df} data set does not contain a column \code{genus}:
There is one exception in columns used for the rules: all column names of the \link{microorganisms} data set can also be used, but do not have to exist in the data set. These column names are: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", and "snomed". Thus, this next example will work as well, despite the fact that the \code{df} data set does not contain a column \code{genus}:
\if{html}{\out{<div class="sourceCode r">}}\preformatted{y <- custom_eucast_rules(TZP == "S" & genus == "Klebsiella" ~ aminopenicillins == "S",
TZP == "R" & genus == "Klebsiella" ~ aminopenicillins == "R")

View File

@ -5,7 +5,7 @@
\alias{microorganisms}
\title{Data Set with 52 151 Microorganisms}
\format{
A \link[tibble:tibble]{tibble} with 52 151 observations and 22 variables:
A \link[tibble:tibble]{tibble} with 52 151 observations and 23 variables:
\itemize{
\item \code{mo}\cr ID of microorganism as used by this package
\item \code{fullname}\cr Full name, like \code{"Escherichia coli"}. For the taxonomic ranks genus, species and subspecies, this is the 'pasted' text of genus, species, and subspecies. For all taxonomic ranks higher than genus, this is the name of the taxon.
@ -14,6 +14,7 @@ A \link[tibble:tibble]{tibble} with 52 151 observations and 22 variables:
\item \code{rank}\cr Text of the taxonomic rank of the microorganism, such as \code{"species"} or \code{"genus"}
\item \code{ref}\cr Author(s) and year of related scientific publication. This contains only the \emph{first surname} and year of the \emph{latest} authors, e.g. "Wallis \emph{et al.} 2006 \emph{emend.} Smith and Jones 2018" becomes "Smith \emph{et al.}, 2018". This field is directly retrieved from the source specified in the column \code{source}. Moreover, accents were removed to comply with CRAN that only allows ASCII characters, e.g. "Váňová" becomes "Vanova".
\item \code{lpsn}\cr Identifier ('Record number') of the List of Prokaryotic names with Standing in Nomenclature (LPSN). This will be the first/highest LPSN identifier to keep one identifier per row. For example, \emph{Acetobacter ascendens} has LPSN Record number 7864 and 11011. Only the first is available in the \code{microorganisms} data set.
\item \code{oxygen_tolerance} \cr Oxygen tolerance, either "aerobe", "anaerobe", "anaerobe/microaerophile", "facultative anaerobe", "likely facultative anaerobe", or "microaerophile". These data were retrieved from BacDive (see \emph{Source}). Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus to guess the oxygen tolerance. Currently 73.4\% of all ~36 000 bacteria in the data set contain an oxygen tolerance.
\item \code{lpsn_parent}\cr LPSN identifier of the parent taxon
\item \code{lpsn_renamed_to}\cr LPSN identifier of the currently valid taxon
\item \code{gbif}\cr Identifier ('taxonID') of the Global Biodiversity Information Facility (GBIF)
@ -31,6 +32,7 @@ A \link[tibble:tibble]{tibble} with 52 151 observations and 22 variables:
\item Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS). US Edition of SNOMED CT from 1 September 2020. Value Set Name 'Microoganism', OID 2.16.840.1.114222.4.11.1009 (v12). URL: \url{https://phinvads.cdc.gov}
\item Grimont \emph{et al.} (2007). Antigenic Formulae of the Salmonella Serovars, 9th Edition. WHO Collaborating Centre for Reference and Research on \emph{Salmonella} (WHOCC-SALM).
\item Bartlett \emph{et al.} (2022). \strong{A comprehensive list of bacterial pathogens infecting humans} \emph{Microbiology} 168:001269; \doi{10.1099/mic.0.001269}
\item Reimer \emph{et al.} (2022). \strong{\emph{BacDive} in 2022: the knowledge base for standardized bacterial and archaeal data.} \emph{Nucleic Acids Res.} 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
}
}
\usage{

View File

@ -22,6 +22,8 @@
\alias{mo_is_gram_positive}
\alias{mo_is_yeast}
\alias{mo_is_intrinsic_resistant}
\alias{mo_oxygen_tolerance}
\alias{mo_is_anaerobic}
\alias{mo_snomed}
\alias{mo_ref}
\alias{mo_authors}
@ -177,6 +179,20 @@ mo_is_intrinsic_resistant(
...
)
mo_oxygen_tolerance(
x,
language = get_AMR_locale(),
keep_synonyms = getOption("AMR_keep_synonyms", FALSE),
...
)
mo_is_anaerobic(
x,
language = get_AMR_locale(),
keep_synonyms = getOption("AMR_keep_synonyms", FALSE),
...
)
mo_snomed(
x,
language = get_AMR_locale(),
@ -278,7 +294,7 @@ mo_property(
\item{open}{browse the URL using \code{\link[utils:browseURL]{browseURL()}}}
\item{property}{one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed", or must be \code{"shortname"}}
\item{property}{one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed", or must be \code{"shortname"}}
}
\value{
\itemize{
@ -313,6 +329,8 @@ Determination of yeasts (\code{\link[=mo_is_yeast]{mo_is_yeast()}}) will be base
Determination of intrinsic resistance (\code{\link[=mo_is_intrinsic_resistant]{mo_is_intrinsic_resistant()}}) will be based on the \link{intrinsic_resistant} data set, which is based on \href{https://www.eucast.org/expert_rules_and_expected_phenotypes/}{'EUCAST Expert Rules' and 'EUCAST Intrinsic Resistance and Unusual Phenotypes' v3.3} (2021). The \code{\link[=mo_is_intrinsic_resistant]{mo_is_intrinsic_resistant()}} function can be vectorised over both argument \code{x} (input for microorganisms) and \code{ab} (input for antibiotics).
Determination of bacterial oxygen tolerance (\code{\link[=mo_oxygen_tolerance]{mo_oxygen_tolerance()}}) will be based on BacDive, see \emph{Source}. The function \code{\link[=mo_is_anaerobic]{mo_is_anaerobic()}} only returns \code{TRUE} if the oxygen tolerance is \code{"anaerobe"}, indicting an obligate anaerobic species or genus. It always returns \code{FALSE} for species outside the taxonomic kingdom of Bacteria.
The function \code{\link[=mo_url]{mo_url()}} will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species.
SNOMED codes (\code{\link[=mo_snomed]{mo_snomed()}}) are from the version of 1 July, 2021. See \emph{Source} and the \link{microorganisms} data set for more info.
@ -332,13 +350,14 @@ This function uses \code{\link[=as.mo]{as.mo()}} internally, which uses an advan
\item Berends MS \emph{et al.} (2022). \strong{AMR: An R Package for Working with Antimicrobial Resistance Data}. \emph{Journal of Statistical Software}, 104(3), 1-31; \doi{10.18637/jss.v104.i03}
\item Becker K \emph{et al.} (2014). \strong{Coagulase-Negative Staphylococci.} \emph{Clin Microbiol Rev.} 27(4): 870-926; \doi{10.1128/CMR.00109-13}
\item Becker K \emph{et al.} (2019). \strong{Implications of identifying the recently defined members of the \emph{S. aureus} complex, \emph{S. argenteus} and \emph{S. schweitzeri}: A position paper of members of the ESCMID Study Group for staphylococci and Staphylococcal Diseases (ESGS).} \emph{Clin Microbiol Infect}; \doi{10.1016/j.cmi.2019.02.028}
\item Becker K \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
\item Lancefield RC (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci}. \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571}
\item Berends MS \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019} \emph{Microorganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801}
\item Becker K \emph{et al.} (2020). \strong{Emergence of coagulase-negative staphylococci.} \emph{Expert Rev Anti Infect Ther.} 18(4):349-366; \doi{10.1080/14787210.2020.1730813}
\item Lancefield RC (1933). \strong{A serological differentiation of human and other groups of hemolytic streptococci.} \emph{J Exp Med.} 57(4): 571-95; \doi{10.1084/jem.57.4.571}
\item Berends MS \emph{et al.} (2022). \strong{Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/} \emph{Micro.rganisms} 10(9), 1801; \doi{10.3390/microorganisms10091801}
\item Parte, AC \emph{et al.} (2020). \strong{List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ.} International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \doi{10.1099/ijsem.0.004332}. Accessed from \url{https://lpsn.dsmz.de} on 11 December, 2022.
\item GBIF Secretariat (2022). GBIF Backbone Taxonomy. Checklist dataset \doi{10.15468/39omei}. Accessed from \url{https://www.gbif.org} on 11 December, 2022.
\item Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS). US Edition of SNOMED CT from 1 September 2020. Value Set Name 'Microoganism', OID 2.16.840.1.114222.4.11.1009 (v12). URL: \url{https://phinvads.cdc.gov}
\item Bartlett A \emph{et al.} (2022). \strong{A comprehensive list of bacterial pathogens infecting humans} \emph{Microbiology} 168:001269; \doi{10.1099/mic.0.001269}
\item Reimer \emph{et al.} (2022). \strong{\emph{BacDive} in 2022: the knowledge base for standardized bacterial and archaeal data.} \emph{Nucleic Acids Res.} 2022 Jan 7;50(D1):D741-D746; \doi{10.1093/nar/gkab961}
}
}

View File

@ -50,6 +50,7 @@ download_txt <- function(filename) {
feather <- paste0(filename, ".feather")
parquet <- paste0(filename, ".parquet")
sas <- paste0(filename, ".sas")
xpt <- paste0(filename, ".xpt")
spss <- paste0(filename, ".sav")
stata <- paste0(filename, ".dta")
create_txt <- function(filename, type, software, exists) {
@ -70,6 +71,7 @@ download_txt <- function(filename) {
file.exists(feather),
file.exists(parquet),
file.exists(sas),
file.exists(xpt),
file.exists(spss),
file.exists(stata)
)) {
@ -80,7 +82,8 @@ download_txt <- function(filename) {
create_txt(excel, "xlsx", "Microsoft Excel workbook", file.exists(excel)),
create_txt(feather, "feather", "Apache Feather file", file.exists(feather)),
create_txt(parquet, "parquet", "Apache Parquet file", file.exists(parquet)),
create_txt(sas, "sas", "SAS data file", file.exists(sas)),
create_txt(sas, "sas", "SAS data (SAS) file", file.exists(sas)),
create_txt(xpt, "xpt", "SAS transport (XPT) file", file.exists(xpt)),
create_txt(spss, "sav", "IBM SPSS Statistics data file", file.exists(spss)),
create_txt(stata, "dta", "Stata DTA file", file.exists(stata))
)
@ -123,7 +126,7 @@ This data set is in R available as `microorganisms`, after you load the `AMR` pa
`r download_txt("microorganisms")`
**NOTE: The exported files for SAS, SPSS and Stata contain only the first 50 SNOMED codes per record, as their file size would otherwise exceed 100 MB; the file size limit of GitHub.** Advice? Use R instead.
**NOTE: The exported files for SAS, SPSS and Stata contain only the first 50 SNOMED codes per record, as their file size would otherwise exceed 100 MB; the file size limit of GitHub.** Their file structures and compression techniques are very inefficient. Advice? Use R instead. It's free and much better in many ways.
The tab-separated text file and Microsoft Excel workbook both contain all SNOMED codes as comma separated values.