(v1.1.0.9020) updated taxonomy

This commit is contained in:
dr. M.S. (Matthijs) Berends 2020-05-27 16:37:49 +02:00
parent ae1969b941
commit 86d44054f0
55 changed files with 68063 additions and 70233 deletions

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 1.1.0.9019
Date: 2020-05-25
Version: 1.1.0.9020
Date: 2020-05-27
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(role = c("aut", "cre"),

16
NEWS.md
View File

@ -1,5 +1,5 @@
# AMR 1.1.0.9019
## <small>Last updated: 25-May-2020</small>
# AMR 1.1.0.9020
## <small>Last updated: 27-May-2020</small>
### Breaking
* Removed code dependency on all other R packages, making this package fully independent of the development process of others. This is a major code change, but will probably not be noticeable by most users.
@ -13,8 +13,17 @@
* For developers: classes `mo` and `ab` now both also inherit class `character`, to support any data transformation. This change invalidates code that checks for class length == 1.
### Changed
* Taxonomy:
* Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020)
* Removed the Catalogue of Life IDs (like 776351), since they now work with a species ID (hexadecimal string)
* EUCAST rules:
* The `eucast_rules()` function no longer applies "other" rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzym inhibitor = R). The default input value for `rules` is now `c("breakpoints", "expert")` instead of `"all"`, but this can be changed by the user. To return to the old behaviour, set `options(AMR.eucast_rules = "all")`.
* The `eucast_rules()` function no longer applies "other" rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzyme inhibitor = R). The default input value for `rules` is now `c("breakpoints", "expert")` instead of `"all"`, but this can be changed by the user. To return to the old behaviour, set `options(AMR.eucast_rules = "all")`.
* Fixed a bug where checking antimicrobial results in the original data were not regarded as valid R/SI values
* All "other" rules now apply for all drug combinations in the `antibiotics` data set these two rules:
1. A drug **with** enzyme inhibitor will be set to S if the drug **without** enzyme inhibitor is S
2. A drug **without** enzyme inhibitor will be set to R if the drug **with** enzyme inhibitor is R
This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/avibactam, trimethoprim/sulfamethoxazole, etc.
* Added official drug names to verbose output of `eucast_rules()`
* Added function `ab_url()` to return the direct URL of an antimicrobial agent from the official WHO website
* Improvements for algorithm in `as.ab()`, so that e.g. `as.ab("ampi sul")` and `ab_name("ampi sul")` work
@ -22,6 +31,7 @@
* Small fix for some text input that could not be coerced as valid MIC values
* Fix for interpretation of generic CLSI interpretation rules (thanks to Anthony Underwood)
* Fix for `set_mo_source()` to make sure that column `mo` will always be the second column
* Added abbreviation "cfsc" for Cefoxitin and "cfav" for Ceftazidime/avibactam
### Other
* Removed previously deprecated function `p.symbol()` - it was replaced with `p_symbol()`

View File

@ -78,8 +78,7 @@ check_dataset_integrity <- function() {
check_microorganisms <- all(c("mo", "fullname", "kingdom", "phylum",
"class", "order", "family", "genus",
"species", "subspecies", "rank",
"col_id", "species_id", "source",
"ref", "prevalence", "snomed") %in% colnames(microorganisms),
"species_id", "source", "ref", "prevalence") %in% colnames(microorganisms),
na.rm = TRUE) & NROW(microorganisms) == NROW(MO_lookup)
check_antibiotics <- all(c("ab", "atc", "cid", "name", "group",
"atc_group1", "atc_group2", "abbreviations",

2
R/ab.R
View File

@ -347,7 +347,7 @@ is.ab <- function(x) {
#' @export
#' @noRd
print.ab <- function(x, ...) {
cat("Class 'ab'\n")
cat("Class <ab>\n")
print(as.character(x), quote = FALSE)
}

View File

@ -50,9 +50,9 @@
#'
#'
#' # Get a note when a species was renamed
#' mo_shortname("Chlamydia psittaci")
#' # Note: 'Chlamydia psittaci' (Page, 1968) was renamed
#' # 'Chlamydophila psittaci' (Everett et al., 1999)
#' mo_shortname("Chlamydophila psittaci")
#' # Note: 'Chlamydophila psittaci' (Everett et al., 1999) was renamed back to
#' # 'Chlamydia psittaci' (Page, 1968)
#' # [1] "C. psittaci"
#'
#' # Get any property from the entire taxonomic tree for all included species
@ -70,9 +70,9 @@
#'
#' # Do not get mistaken - this package is about microorganisms
#' mo_kingdom("C. elegans")
#' # [1] "Bacteria" # Bacteria?!
#' # [1] "Fungi" # Fungi?!
#' mo_name("C. elegans")
#' # [1] "Chroococcus limneticus elegans" # Because a microorganism was found
#' # [1] "Cladosporium elegans" # Because a microorganism was found
NULL
#' Version info of included Catalogue of Life

View File

@ -82,7 +82,6 @@
#' @inheritSection catalogue_of_life Catalogue of Life
#' @format A [`data.frame`] with `r format(nrow(microorganisms), big.mark = ",")` observations and `r ncol(microorganisms)` variables:
#' - `mo`\cr ID of microorganism as used by this package
#' - `col_id`\cr Catalogue of Life ID
#' - `fullname`\cr Full name, like `"Escherichia coli"`
#' - `kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species`, `subspecies`\cr Taxonomic rank of the microorganism
#' - `rank`\cr Text of the taxonomic rank of the microorganism, like `"species"` or `"genus"`
@ -113,6 +112,8 @@
#'
#' From: <https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date/complete-list-readme>
#' @source Catalogue of Life: Annual Checklist (public online taxonomic database), <http://www.catalogueoflife.org> (check included annual version with [catalogue_of_life_version()]).
#'
#' Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
#'
#' Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, <https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date> (check included version with [catalogue_of_life_version()]).
#' @inheritSection AMR Read more on our website!
@ -120,11 +121,11 @@
"microorganisms"
catalogue_of_life <- list(
year = 2018,
year = 2019,
version = "Catalogue of Life: {year} Annual Checklist",
url_CoL = "http://www.catalogueoflife.org/annual-checklist/{year}/",
url_DSMZ = "https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date/prokaryotic-nomenclature-up-to-date/genus-search",
yearmonth_DSMZ = "August 2019"
url_CoL = "http://www.catalogueoflife.org/col/",
url_DSMZ = "https://lpsn.dsmz.de",
yearmonth_DSMZ = "May 2020"
)
#' Data set with previously accepted taxonomic names
@ -132,17 +133,18 @@ catalogue_of_life <- list(
#' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by [as.mo()].
#' @inheritSection catalogue_of_life Catalogue of Life
#' @format A [`data.frame`] with `r format(nrow(microorganisms.old), big.mark = ",")` observations and `r ncol(microorganisms.old)` variables:
#' - `col_id`\cr Catalogue of Life ID that was originally given
#' - `col_id_new`\cr New Catalogue of Life ID that responds to an entry in the [microorganisms] data set
#' - `fullname`\cr Old full taxonomic name of the microorganism
#' - `fullname_new`\cr New full taxonomic name of the microorganism
#' - `ref`\cr Author(s) and year of concerning scientific publication
#' - `prevalence`\cr Prevalence of the microorganism, see [as.mo()]
#' @source Catalogue of Life: Annual Checklist (public online taxonomic database), <http://www.catalogueoflife.org> (check included annual version with [catalogue_of_life_version()]).
#'
#' Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
#' @inheritSection AMR Read more on our website!
#' @seealso [as.mo()] [mo_property()] [microorganisms]
"microorganisms.old"
#' Translation table for common microorganism codes
#' Translation table with `r format(nrow(microorganisms.codes), big.mark = ",")` common microorganism codes
#'
#' A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with [set_mo_source()]. They will all be searched when using [as.mo()] and consequently all the [`mo_*`][mo_property()] functions.
#' @format A [`data.frame`] with `r format(nrow(microorganisms.codes), big.mark = ",")` observations and `r ncol(microorganisms.codes)` variables:

View File

@ -99,7 +99,7 @@ is.disk <- function(x) {
#' @export
#' @noRd
print.disk <- function(x, ...) {
cat("Class 'disk'\n")
cat("Class <disk>\n")
print(as.integer(x), quote = FALSE)
}

View File

@ -245,6 +245,7 @@ eucast_rules <- function(x,
}
warned <- FALSE
warn_lacking_rsi_class <- FALSE
txt_error <- function() {
if (info == TRUE) cat("", font_red_bg(font_white(" ERROR ")), "\n\n")
@ -410,6 +411,7 @@ eucast_rules <- function(x,
RID <- cols_ab["RID"]
RIF <- cols_ab["RIF"]
RXT <- cols_ab["RXT"]
SAM <- cols_ab["SAM"]
SIS <- cols_ab["SIS"]
SXT <- cols_ab["SXT"]
TCY <- cols_ab["TCY"]
@ -440,7 +442,9 @@ eucast_rules <- function(x,
cols <- unique(cols[!is.na(cols) & !is.null(cols)])
if (length(rows) > 0 & length(cols) > 0) {
before_df <- x_original
if (any(!sapply(x[, cols, drop = FALSE], is.rsi), na.rm = TRUE)) {
warn_lacking_rsi_class <<- TRUE
}
tryCatch(
# insert into original table
x_original[rows, cols] <<- to,
@ -599,14 +603,79 @@ eucast_rules <- function(x,
}
}
if (info == TRUE & !any(c("other", "all") %in% rules, na.rm = TRUE)) {
cat(font_red("\nSkipping inheritance rules defined by this package, such as setting trimethoprim (TMP) = R where trimethoprim/sulfamethoxazole (SXT) = R.\nUse eucast_rules(..., rules = \"all\") to also apply those rules.\n"))
}
eucast_notification_shown <- FALSE
eucast_rules_df <- eucast_rules_file # internal data file
as.rsi_no_warning <- function(x) suppressWarnings(as.rsi(x))
no_added <- 0
no_changed <- 0
# Other rules: enzyme inhibitors ------------------------------------------
if (any(c("all", "other") %in% rules)) {
if (info == TRUE) {
cat(font_bold(paste0("\nRules by this AMR package (",
font_red(paste0("v", utils::packageVersion("AMR"), ", ",
format(utils::packageDate("AMR"), "%Y"))), ")\n")))
}
ab_enzyme <- subset(antibiotics, name %like% "/")[, c("ab", "name")]
ab_enzyme$base_name <- gsub("^([a-zA-Z0-9]+).*", "\\1", ab_enzyme$name)
ab_enzyme$base_ab <- as.ab(ab_enzyme$base_name)
for (i in seq_len(nrow(ab_enzyme))) {
if (all(c(ab_enzyme[i, ]$ab, ab_enzyme[i, ]$base_ab) %in% names(cols_ab), na.rm = TRUE)) {
ab_name_base <- ab_name(cols_ab[ab_enzyme[i, ]$base_ab], language = NULL, tolower = TRUE)
ab_name_enzyme <- ab_name(cols_ab[ab_enzyme[i, ]$ab], language = NULL, tolower = TRUE)
# Set base to R where base + enzyme inhibitor is R
rule_current <- paste0("Set ", ab_name_base, " (", cols_ab[ab_enzyme[i, ]$base_ab], ") = R where ",
ab_name_enzyme, " (", cols_ab[ab_enzyme[i, ]$ab], ") = R")
if (info == TRUE) {
cat(rule_current)
}
run_changes <- edit_rsi(to = "R",
rule = c(rule_current, "Other rules", ""),
rows = which(as.rsi_no_warning(x[, cols_ab[ab_enzyme[i, ]$ab]]) == "R"),
cols = cols_ab[ab_enzyme[i, ]$base_ab])
no_added <- no_added + run_changes$added
no_changed <- no_changed + run_changes$changed
# Print number of new changes
if (info == TRUE) {
# print only on last one of rules in this group
txt_ok(no_added = no_added, no_changed = no_changed)
# and reset counters
no_added <- 0
no_changed <- 0
}
# Set base + enzyme inhibitor to S where base is S
rule_current <- paste0("Set ", ab_name_enzyme, " (", cols_ab[ab_enzyme[i, ]$ab], ") = S where ",
ab_name_base, " (", cols_ab[ab_enzyme[i, ]$base_ab], ") = S")
if (info == TRUE) {
cat(rule_current)
}
run_changes <- edit_rsi(to = "S",
rule = c(rule_current, "Other rules", ""),
rows = which(as.rsi_no_warning(x[, cols_ab[ab_enzyme[i, ]$base_ab]]) == "S"),
cols = cols_ab[ab_enzyme[i, ]$ab])
no_added <- no_added + run_changes$added
no_changed <- no_changed + run_changes$changed
# Print number of new changes
if (info == TRUE) {
# print only on last one of rules in this group
txt_ok(no_added = no_added, no_changed = no_changed)
# and reset counters
no_added <- 0
no_changed <- 0
}
}
}
} else {
if (info == TRUE) {
cat(font_red("\nSkipping inheritance rules defined by this package, such as setting trimethoprim (TMP) = R where trimethoprim/sulfamethoxazole (SXT) = R.\nUse eucast_rules(..., rules = \"all\") to also apply those rules.\n"))
}
}
# Official EUCAST rules ---------------------------------------------------
eucast_notification_shown <- FALSE
eucast_rules_df <- eucast_rules_file # internal data file
for (i in seq_len(nrow(eucast_rules_df))) {
rule_previous <- eucast_rules_df[max(1, i - 1), "reference.rule"]
@ -637,18 +706,14 @@ eucast_rules <- function(x,
if (rule_group_current %like% "expert" & !any(c("all", "expert") %in% rules)) {
next
}
if (rule_group_current %like% "other" & !any(c("all", "other") %in% rules)) {
next
}
if (info == TRUE & !rule_group_current %like% "other" & eucast_notification_shown == FALSE) {
cat(paste0(
"\n----\nRules by the ", font_bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)"),
cat(paste0("\n", font_grey(strrep("-", options()$width - 1)),
"\nRules by the ", font_bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)"),
"\n", font_blue("http://eucast.org/"), "\n"))
eucast_notification_shown <- TRUE
}
if (info == TRUE) {
# Print rule (group) ------------------------------------------------------
if (rule_group_current != rule_group_previous) {
@ -662,7 +727,7 @@ eucast_rules <- function(x,
rule_group_current %like% "expert",
paste0("\nEUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (",
font_red(paste0("v", EUCAST_VERSION_EXPERT_RULES)), ")\n"),
"\nOther rules by this AMR package\n"))))
""))))
}
# Print rule -------------------------------------------------------------
if (rule_current != rule_previous) {
@ -733,18 +798,18 @@ eucast_rules <- function(x,
rows <- integer(0)
} else if (length(source_antibiotics) == 1) {
rows <- tryCatch(which(x[, col_mo_property] %like% mo_value
& x[, source_antibiotics[1L]] == source_value[1L]),
& as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]),
error = function(e) integer(0))
} else if (length(source_antibiotics) == 2) {
rows <- tryCatch(which(x[, col_mo_property] %like% mo_value
& x[, source_antibiotics[1L]] == source_value[1L]
& x[, source_antibiotics[2L]] == source_value[2L]),
& as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]
& as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]),
error = function(e) integer(0))
} else if (length(source_antibiotics) == 3) {
rows <- tryCatch(which(x[, col_mo_property] %like% mo_value
& x[, source_antibiotics[1L]] == source_value[1L]
& x[, source_antibiotics[2L]] == source_value[2L]
& x[, source_antibiotics[3L]] == source_value[3L]),
& as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]
& as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]
& as.rsi_no_warning(x[, source_antibiotics[3L]]) == source_value[3L]),
error = function(e) integer(0))
} else {
stop("only 3 antibiotics supported for source_antibiotics ", call. = FALSE)
@ -784,7 +849,7 @@ eucast_rules <- function(x,
arrange(row, rule_group, rule_name, col)
cat(paste0("\n", font_grey(strrep("-", options()$width - 1)), "\n"))
cat(font_bold(paste("EUCAST rules", paste0(wouldve, "affected"),
cat(font_bold(paste("The rules", paste0(wouldve, "affected"),
formatnr(n_distinct(verbose_info$row)),
"out of", formatnr(nrow(x_original)),
"rows, making a total of", formatnr(nrow(verbose_info)), "edits\n")))
@ -846,6 +911,12 @@ eucast_rules <- function(x,
}
}
if (isTRUE(warn_lacking_rsi_class)) {
warning("Not all columns with antimicrobial results are of class <rsi>.\n",
"Transform eligible columns to class <rsi> on beforehand: your_data %>% mutate_if(is.rsi.eligible, as.rsi)",
call. = FALSE)
}
# Return data set ---------------------------------------------------------
if (verbose == TRUE) {
rownames(verbose_info) <- NULL

View File

@ -154,7 +154,7 @@ joins_check_df <- function(x, by) {
by <- "mo"
x[, "mo"] <- as.mo(x[, "mo"])
} else {
stop("Cannot join - no column found with name or class `mo`.", call. = FALSE)
stop("Cannot join - no column found with name or class <mo>.", call. = FALSE)
}
}
message('Joining, by = "', by, '"') # message same as dplyr::join functions

View File

@ -174,7 +174,7 @@ droplevels.mic <- function(x, exclude = ifelse(anyNA(levels(x)), NULL, NA), ...)
#' @export
#' @noRd
print.mic <- function(x, ...) {
cat("Class 'mic'\n")
cat("Class <mic>\n")
print(as.character(x), quote = FALSE)
}

96
R/mo.R
View File

@ -126,7 +126,6 @@
#' as.mo("MRSA") # Methicillin Resistant S. aureus
#' as.mo("VISA") # Vancomycin Intermediate S. aureus
#' as.mo("VRSA") # Vancomycin Resistant S. aureus
#' as.mo(22242419) # Catalogue of Life ID
#' as.mo(115329001) # SNOMED CT code
#'
#' # Dyslexia is no problem - these all work:
@ -556,20 +555,44 @@ exec_as.mo <- function(x,
if (initial_search == TRUE) {
progress$tick()
}
if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)" | tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) {
# empty and nonsense values, ignore without warning
x[i] <- lookup(mo == "UNKNOWN")
next
}
# valid MO code ---
# valid MO code ----
found <- lookup(mo == toupper(x_backup[i]))
if (!is.na(found)) {
x[i] <- found[1L]
next
}
# valid fullname ----
found <- lookup(fullname_lower %in% gsub("[^a-zA-Z0-9_. -]", "", tolower(c(x_backup[i], x_backup_without_spp[i]))))
# added the gsub() for "(unknown fungus)", since fullname_lower does not contain brackets
if (!is.na(found)) {
x[i] <- found[1L]
next
}
# old fullname ----
found <- lookup(fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])),
column = NULL, # all columns
haystack = MO.old_lookup)
if (!all(is.na(found))) {
# when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so:
# mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning)
# mo_ref() of "Chlamydophila psittaci" will be "Everett et al., 1999"
if (property == "ref") {
x[i] <- found["ref"]
} else {
x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup)
}
options(mo_renamed_last_run = found["fullname"])
was_renamed(name_old = found["fullname"],
name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup),
ref_old = found["ref"],
ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup),
mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup))
next
}
# old mo code, used in previous versions of this package ----
if (x_backup[i] %in% microorganisms.translation$mo_old) {
old_mo_warning <- TRUE
@ -582,10 +605,9 @@ exec_as.mo <- function(x,
}
}
found <- lookup(fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])))
# most probable: is exact match in fullname
if (!is.na(found)) {
x[i] <- found[1L]
if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)" | tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) {
# empty and nonsense values, ignore without warning
x[i] <- lookup(mo == "UNKNOWN")
next
}
@ -614,13 +636,6 @@ exec_as.mo <- function(x,
next
}
# valid Catalogue of Life ID ---
found <- lookup(col_id == x_backup[i])
if (!is.na(found)) {
x[i] <- found[1L]
next
}
# WHONET and other common LIS codes ----
found <- lookup(code %in% toupper(c(x_backup_untouched[i], x_backup[i], x_backup_without_spp[i])),
column = "mo",
@ -943,21 +958,20 @@ exec_as.mo <- function(x,
column = NULL, # all columns
haystack = data.old_to_check)
if (!all(is.na(found))) {
col_id_new <- found["col_id_new"]
# when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so:
# mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning)
# mo_ref() of "Chlamydophila psittaci" will be "Everett et al., 1999"
if (property == "ref") {
x[i] <- found["ref"]
} else {
x[i] <- lookup(col_id == found["col_id_new"], haystack = MO_lookup)
x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup)
}
options(mo_renamed_last_run = found["fullname"])
was_renamed(name_old = found["fullname"],
name_new = lookup(col_id == found["col_id_new"], "fullname", haystack = MO_lookup),
name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup),
ref_old = found["ref"],
ref_new = lookup(col_id == found["col_id_new"], "ref", haystack = MO_lookup),
mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup))
ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup),
mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup))
return(x[i])
}
@ -997,18 +1011,18 @@ exec_as.mo <- function(x,
# mo_ref("Chlamydophila psittaci) = "Everett et al., 1999"
x <- found["ref"]
} else {
x <- lookup(col_id == found["col_id_new"], haystack = MO_lookup)
x <- lookup(fullname == found["fullname_new"], haystack = MO_lookup)
}
was_renamed(name_old = found["fullname"],
name_new = lookup(col_id == found["col_id_new"], "fullname", haystack = MO_lookup),
name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup),
ref_old = found["ref"],
ref_new = lookup(col_id == found["col_id_new"], "ref", haystack = MO_lookup),
mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup))
ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup),
mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup))
options(mo_renamed_last_run = found["fullname"])
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup,
result_mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup)))
result_mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)))
return(x)
}
@ -1366,6 +1380,10 @@ exec_as.mo <- function(x,
failures <- c(failures, x_backup[i])
}
}
if (initial_search == TRUE) {
close(progress)
}
}
# handling failures ----
@ -1494,7 +1512,7 @@ exec_as.mo <- function(x,
if (property == "mo") {
x <- to_class_mo(x)
}
if (length(mo_renamed()) > 0) {
print(mo_renamed())
}
@ -1552,7 +1570,7 @@ format_uncertainty_as_df <- function(uncertainty_level,
#' @export
#' @noRd
print.mo <- function(x, ...) {
cat("Class 'mo'\n")
cat("Class <mo>\n")
x_names <- names(x)
x <- as.character(x)
names(x) <- x_names
@ -1711,6 +1729,9 @@ print.mo_renamed <- function(x, ...) {
font_italic(x$old_name[i]), ifelse(x$old_ref[i] %in% c("", NA), "",
paste0(" (", gsub("et al.", font_italic("et al."), x$old_ref[i]), ")")),
" was renamed ",
ifelse(as.integer(gsub("[^0-9]", "", x$new_ref[i])) < as.integer(gsub("[^0-9]", "", x$old_ref[i])),
font_bold("back to "),
""),
font_italic(x$new_name[i]), ifelse(x$new_ref[i] %in% c("", NA), "",
paste0(" (", gsub("et al.", font_italic("et al."), x$new_ref[i]), ")")),
" [", x$mo[i], "]")))
@ -1747,9 +1768,14 @@ translate_allow_uncertain <- function(allow_uncertain) {
}
get_mo_failures_uncertainties_renamed <- function() {
list(failures = getOption("mo_failures"),
uncertainties = getOption("mo_uncertainties"),
renamed = getOption("mo_renamed"))
remember <- list(failures = getOption("mo_failures"),
uncertainties = getOption("mo_uncertainties"),
renamed = getOption("mo_renamed"))
# empty them, otherwise mo_shortname("Chlamydophila psittaci") will give 3 notes
options("mo_failures" = NULL)
options("mo_uncertainties" = NULL)
options("mo_renamed" = NULL)
remember
}
load_mo_failures_uncertainties_renamed <- function(metadata) {

View File

@ -149,6 +149,7 @@ mo_fullname <- mo_name
#' @export
mo_shortname <- function(x, language = get_locale(), ...) {
x.mo <- as.mo(x, ...)
metadata <- get_mo_failures_uncertainties_renamed()
replace_empty <- function(x) {
@ -158,7 +159,7 @@ mo_shortname <- function(x, language = get_locale(), ...) {
# get first char of genus and complete species in English
shortnames <- paste0(substr(mo_genus(x.mo, language = NULL), 1, 1), ". ", replace_empty(mo_species(x.mo, language = NULL)))
# exceptions for Staphylococci
shortnames[shortnames == "S. coagulase-negative"] <- "CoNS"
shortnames[shortnames == "S. coagulase-positive"] <- "CoPS"
@ -315,9 +316,9 @@ mo_synonyms <- function(x, ...) {
x <- as.mo(x, ...)
metadata <- get_mo_failures_uncertainties_renamed()
IDs <- mo_property(x = x, property = "col_id", language = NULL)
syns <- lapply(IDs, function(col_id) {
res <- sort(microorganisms.old[which(microorganisms.old$col_id_new == col_id), "fullname"])
IDs <- mo_name(x = x, language = NULL)
syns <- lapply(IDs, function(newname) {
res <- sort(microorganisms.old[which(microorganisms.old$fullname_new == newname), "fullname"])
if (length(res) == 0) {
NULL
} else {
@ -368,14 +369,9 @@ mo_url <- function(x, open = FALSE, ...) {
df <- data.frame(mo, stringsAsFactors = FALSE) %>%
left_join(select(microorganisms, mo, source, species_id), by = "mo")
df$url <- ifelse(df$source == "CoL",
paste0(gsub("{year}",
catalogue_of_life$year,
catalogue_of_life$url_CoL,
fixed = TRUE),
"details/species/id/",
df$species_id),
paste0(catalogue_of_life$url_CoL, "details/species/id/", df$species_id, "/"),
ifelse(df$source == "DSMZ",
paste0(catalogue_of_life$url_DSMZ, "/", unlist(lapply(strsplit(mo_names, ""), function(x) x[1]))),
paste0(catalogue_of_life$url_DSMZ, "/advanced_search?adv[taxon-name]=", gsub(" ", "+", mo_names), "/"),
NA_character_))
u <- df$url
names(u) <- mo_names

View File

@ -533,7 +533,7 @@ is.rsi.eligible <- function(x, threshold = 0.05) {
#' @export
#' @noRd
print.rsi <- function(x, ...) {
cat("Class 'rsi'\n")
cat("Class <rsi>\n")
print(as.character(x), quote = FALSE)
}

View File

@ -128,7 +128,7 @@ rsi_calc <- function(...,
}
if (print_warning == TRUE) {
warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_if(is.rsi.eligible, as.rsi)",
warning("Increase speed by transforming to class <rsi> on beforehand: your_data %>% mutate_if(is.rsi.eligible, as.rsi)",
call. = FALSE)
}
@ -177,7 +177,7 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both"
}
if (!any(sapply(data, is.rsi), na.rm = TRUE)) {
stop("No columns with class 'rsi' found. See ?as.rsi.", call. = FALSE)
stop("No columns with class <rsi> found. See ?as.rsi.", call. = FALSE)
}
if (as.character(translate_ab) %in% c("TRUE", "official")) {

Binary file not shown.

View File

@ -89,7 +89,7 @@
"CTF" "J01DC07" 43708 "Cefotiam" "Cephalosporins (2nd gen.)" "Other beta-lactam antibacterials" "Second-generation cephalosporins" "" "c(\"cefotiam\", \"cefotiam?\", \"cefotiamum\", \"ceradolan\", \"ceradon\", \"haloapor\")" 1.2 "g" 4 "g"
"CHE" 125846 "Cefotiam hexetil" "Cephalosporins (3rd gen.)" "" "c(\"cefotiam cilexetil\", \"pansporin t\")"
"FOV" 9578573 "Cefovecin" "Cephalosporins (3rd gen.)" "" ""
"FOX" "J01DC01" 441199 "Cefoxitin" "Cephalosporins (2nd gen.)" "Other beta-lactam antibacterials" "Second-generation cephalosporins" "c(\"cfox\", \"cfx\", \"cfxt\", \"cx\", \"fox\", \"fx\")" "c(\"cefoxitin\", \"cefoxitina\", \"cefoxitine\", \"cefoxitinum\", \"cefoxotin\", \"cephoxitin\", \"mefoxin\", \"mefoxitin\", \"rephoxitin\")" 6 "g" "c(\"25240-3\", \"3448-8\")"
"FOX" "J01DC01" 441199 "Cefoxitin" "Cephalosporins (2nd gen.)" "Other beta-lactam antibacterials" "Second-generation cephalosporins" "c(\"cfox\", \"cfsc\", \"cfx\", \"cfxt\", \"cx\", \"fox\", \"fx\")" "c(\"cefoxitin\", \"cefoxitina\", \"cefoxitine\", \"cefoxitinum\", \"cefoxotin\", \"cephoxitin\", \"mefoxin\", \"mefoxitin\", \"rephoxitin\")" 6 "g" "c(\"25240-3\", \"3448-8\")"
"ZOP" 9571080 "Cefozopran" "Cephalosporins (4th gen.)" "" "cefozopran"
"CFZ" 68597 "Cefpimizole" "Cephalosporins (3rd gen.)" "" "c(\"cefpimizol\", \"cefpimizole\", \"cefpimizole sodium\", \"cefpimizolum\")"
"CPM" "J01DD11" 636405 "Cefpiramide" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "" "c(\"cefpiramide\", \"cefpiramide acid\", \"cefpiramido\", \"cefpiramidum\")" 2 "g"
@ -105,7 +105,7 @@
"CPT" "J01DI02" 56841980 "Ceftaroline" "Cephalosporins (5th gen.)" "c(\"\", \"cfro\")" "c(\"teflaro\", \"zinforo\")"
"CPA" "Ceftaroline/avibactam" "Cephalosporins (5th gen.)" "" ""
"CAZ" "J01DD02" 5481173 "Ceftazidime" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "c(\"caz\", \"cefta\", \"cfta\", \"cftz\", \"taz\", \"tz\", \"xtz\")" "c(\"ceftazidim\", \"ceftazidima\", \"ceftazidime\", \"ceftazidimum\", \"ceptaz\", \"fortaz\", \"fortum\", \"pentacef\", \"tazicef\", \"tazidime\")" 4 "g" "c(\"21151-6\", \"3449-6\", \"80960-8\")"
"CZA" "Ceftazidime/avibactam" "Cephalosporins (3rd gen.)" "" ""
"CZA" "Ceftazidime/avibactam" "Cephalosporins (3rd gen.)" "c(\"\", \"cfav\")" ""
"CCV" "J01DD52" 9575352 "Ceftazidime/clavulanic acid" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "c(\"czcl\", \"xtzl\")" ""
"CEM" 6537431 "Cefteram" "Cephalosporins (3rd gen.)" "" "c(\"cefteram\", \"cefterame\", \"cefteramum\", \"ceftetrame\")"
"CPL" 5362114 "Cefteram pivoxil" "Cephalosporins (3rd gen.)" "" "c(\"cefteram pivoxil\", \"tomiron\")"

BIN
data-raw/data_dsmz.rds Normal file

Binary file not shown.

View File

@ -9,22 +9,6 @@
# >>>>> IF YOU WANT TO IMPORT THIS FILE INTO YOUR OWN SOFTWARE, HAVE THE FIRST 10 LINES SKIPPED <<<<<
# -------------------------------------------------------------------------------------------------------------------------------
if_mo_property like.is.one_of this_value and_these_antibiotics have_these_values then_change_these_antibiotics to_value reference.rule reference.rule_group
genus like .* AMP S AMX S Non-EUCAST: inherit ampicillin results for unavailable amoxicillin Other rules
genus like .* AMP I AMX I Non-EUCAST: inherit ampicillin results for unavailable amoxicillin Other rules
genus like .* AMP R AMX R Non-EUCAST: inherit ampicillin results for unavailable amoxicillin Other rules
genus like .* AMX S AMP S Non-EUCAST: inherit amoxicillin results for unavailable ampicillin Other rules
genus like .* AMX I AMP I Non-EUCAST: inherit amoxicillin results for unavailable ampicillin Other rules
genus like .* AMX R AMP R Non-EUCAST: inherit amoxicillin results for unavailable ampicillin Other rules
genus like .* AMC R AMP, AMX R Non-EUCAST: set ampicillin = R where amoxicillin/clav acid = R Other rules
genus like .* SAM R AMP, AMX R Non-EUCAST: set ampicillin = R where ampicillin/sulbactam = R Other rules
genus like .* TZP R PIP R Non-EUCAST: set piperacillin = R where piperacillin/tazobactam = R Other rules
genus like .* SXT R TMP R Non-EUCAST: set trimethoprim = R where trimethoprim/sulfa = R Other rules
genus like .* AMP S AMC S Non-EUCAST: set amoxicillin/clav acid = S where ampicillin = S Other rules
genus like .* AMX S AMC S Non-EUCAST: set amoxicillin/clav acid = S where ampicillin = S Other rules
genus like .* AMP S SAM S Non-EUCAST: set ampicillin/sulbactam = S where ampicillin = S Other rules
genus like .* AMX S SAM S Non-EUCAST: set ampicillin/sulbactam = S where ampicillin = S Other rules
genus like .* PIP S TZP S Non-EUCAST: set piperacillin/tazobactam = S where piperacillin = S Other rules
genus like .* TMP S SXT S Non-EUCAST: set trimethoprim/sulfa = S where trimethoprim = S Other rules
order is Enterobacterales AMP S AMX S Enterobacterales (Order) Breakpoints
order is Enterobacterales AMP I AMX I Enterobacterales (Order) Breakpoints
order is Enterobacterales AMP R AMX R Enterobacterales (Order) Breakpoints

Can't render this file because it contains an unexpected character in line 6 and column 96.

File diff suppressed because it is too large Load Diff

View File

@ -322,7 +322,7 @@ antibiotics[which(antibiotics$ab == as.ab("cefuroxim")), "abbreviations"][[1]] <
antibiotics[which(antibiotics$ab == as.ab("cefotaxim")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefotaxim")), "abbreviations"][[1]], "cftx"))
antibiotics[which(antibiotics$ab == as.ab("ceftazidime")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("ceftazidime")), "abbreviations"][[1]], "cftz"))
antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]], "cfpi"))
antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]], "cfxt"))
antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]], "cfxt", "cfsc"))
# More GLIMS codes
antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz"))
antibiotics[which(antibiotics$ab == "CRO"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CRO"), "abbreviations"][[1]], "cftr"))
@ -377,6 +377,7 @@ antibiotics[which(antibiotics$ab == "CTX"), "abbreviations"][[1]] <- list(c(anti
antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz"))
antibiotics[which(antibiotics$ab == "CFM"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CFM"), "abbreviations"][[1]], "cfxm"))
antibiotics[which(antibiotics$ab == "FOX"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "FOX"), "abbreviations"][[1]], "cfxt"))
antibiotics[which(antibiotics$ab == "CZA"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CZA"), "abbreviations"][[1]], "cfav"))
antibiotics[which(antibiotics$ab == "CZO"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CZO"), "abbreviations"][[1]], "cfzl"))
antibiotics[which(antibiotics$ab == "CZX"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CZX"), "abbreviations"][[1]], "cfzx"))
antibiotics[which(antibiotics$ab == "CHL"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CHL"), "abbreviations"][[1]], "chlo"))
@ -577,10 +578,10 @@ antibiotics <- antibiotics %>%
# set as data.frame again
antibiotics <- as.data.frame(antibiotics, stringsAsFactors = FALSE)
class(antibiotics$ab) <- "ab"
class(antibiotics$ab) <- c("ab", "character")
antibiotics <- antibiotics %>% arrange(name)
# make all abbreviations and synonyms lower case, unique and alphabetically sorted
# make all abbreviations and synonyms lower case, unique and alphabetically sorted ----
for (i in 1:nrow(antibiotics)) {
abb <- sort(unique(tolower(antibiotics[i, "abbreviations"][[1]])))
syn <- sort(unique(tolower(antibiotics[i, "synonyms"][[1]])))

View File

@ -23,94 +23,134 @@
# Data retrieved from the Catalogue of Life (CoL) through the Encyclopaedia of Life:
# https://opendata.eol.org/dataset/catalogue-of-life/
# Data retrieved from the Global Biodiversity Information Facility (GBIF):
# https://doi.org/10.15468/rffz4x
# (download the resource file with a name like "Catalogue of Life yyyy-mm-dd")
# and from the Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures
# https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads.html
# (download the latest "Complete List" as xlsx file)
#
# And from the Leibniz Institute: German Collection of Microorganisms and Cell Cultures (DSMZ)
# (register first at https://bacdive.dsmz.de/api/pnu/registration/register/ and use API as done below)
library(dplyr)
library(AMR)
# also needed: data.table, httr, jsonlite, cleaner, stringr
# unzip and extract taxon.tab (around 1.5 GB) from the CoL archive, then:
# data_col <- data.table::fread("data-raw/taxon.tab")
data_col <- data.table::fread("data-raw/taxa.txt", quote = "")
# unzip and extract taxa.txt (both around 1.5 GB, 3.7-3.9M rows) from Col and GBIF, then:
data_col_raw <- data.table::fread("data-raw/taxon.tab", quote = "")
data_gbif <- data.table::fread("data-raw/taxa.txt", quote = "")
# read the xlsx file from DSMZ (only around 2.5 MB):
data_dsmz <- readxl::read_xlsx("data-raw/DSMZ_bactnames.xlsx")
# merge the two
data_col <- data_gbif %>%
rename(referenceID = identifier) %>%
bind_rows(data_col_raw) %>%
distinct(scientificName, kingdom, genus, specificEpithet, infraspecificEpithet, .keep_all = TRUE)
rm(data_col_raw)
rm(data_gbif)
# read the data from the DSMZ API (around 19000 rows)
dsmz_username <- ""
dsmz_password <- ""
GET_df <- function(url) {
result <- httr::GET(url, httr::authenticate(dsmz_username, dsmz_password))
httr::stop_for_status(result)
result %>%
httr::content(type = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON(flatten = TRUE)
}
dsmz_first <- GET_df("https://bacdive.dsmz.de/api/pnu/species?page=1&format=json")
data_dsmz <- dsmz_first$results
# this next process will take appr. `dsmz_first$count / 100 * 5 / 60` minutes
for (i in 2:round((dsmz_first$count / 100) + 0.5)) {
data_dsmz <<- rbind(data_dsmz,
GET_df(paste0("https://bacdive.dsmz.de/api/pnu/species/?page=", i, "&format=json"))$results)
cat(i, "-", AMR:::percentage(i / round((dsmz_first$count / 100) + 0.5)), "\n")
}
rm(dsmz_first)
# the CoL data is over 3.7M rows:
data_col %>% freq(kingdom)
data_col %>% cleaner::freq(kingdom)
# Item Count Percent Cum. Count Cum. Percent
# --- ---------- ---------- -------- ----------- -------------
# 1 Animalia 2,225,627 59.1% 2,225,627 59.1%
# 2 Plantae 1,177,412 31.3% 3,403,039 90.4%
# 3 Fungi 290,145 7.7% 3,693,184 98.1%
# 4 Chromista 47,126 1.3% 3,740,310 99.3%
# 5 Bacteria 14,478 0.4% 3,754,788 99.7%
# 6 Protozoa 6,060 0.2% 3,760,848 99.9%
# 7 Viruses 3,827 0.1% 3,764,675 100.0%
# 8 Archaea 610 0.0% 3,765,285 100.0%
# 1 Animalia 2,494,992 55.43% 2,494,992 55.43%
# 2 Plantae 1,379,674 30.65% 3,874,666 86.08%
# 3 Fungi 547,619 12.17% 4,422,285 98.24%
# 4 Chromista 51,475 1.14% 4,473,760 99.39%
# 5 Bacteria 14,442 0.32% 4,488,202 99.71%
# 6 Protozoa 8,750 0.19% 4,496,952 99.90%
# 7 Viruses 3,805 0.08% 4,500,757 99.99%
# 8 Archaea 609 0.01% 4,501,366 100.00%
# clean data_col
data_col <- data_col %>%
data_col.bak <- data_col
data_col_old <- data_col %>%
# filter: has new accepted name
filter(!is.na(acceptedNameUsageID)) %>%
as_tibble() %>%
select(col_id = taxonID,
col_id_new = acceptedNameUsageID,
fullname = scientificName,
kingdom,
phylum,
class,
order,
family,
genus,
species = specificEpithet,
subspecies = infraspecificEpithet,
rank = taxonRank,
ref = scientificNameAuthorship,
species_id = references)
data_col$source <- "CoL"
transmute(fullname = trimws(stringr::str_replace(scientificName,
pattern = stringr::fixed(scientificNameAuthorship),
replacement = "")),
fullname_new = trimws(paste(ifelse(is.na(genus), "", genus),
ifelse(is.na(specificEpithet), "", specificEpithet),
ifelse(is.na(infraspecificEpithet), "", infraspecificEpithet))),
ref = scientificNameAuthorship,
prevalence = NA_integer_)
data_col <- data_col %>%
# filter: has no new accepted name
filter(is.na(acceptedNameUsageID)) %>%
as_tibble() %>%
transmute(fullname = "",
kingdom,
phylum,
class,
order,
family,
genus,
species = specificEpithet,
subspecies = infraspecificEpithet,
rank = taxonRank,
ref = scientificNameAuthorship,
species_id = referenceID,
source = "CoL")
# clean data_dsmz
data_dsmz <- data_dsmz %>%
data_dsmz.bak <- data_dsmz
data_dsmz_old <- data_dsmz %>%
# filter: correct name is not NULL
filter(!sapply(correct_name, is.null)) %>%
as_tibble() %>%
transmute(col_id = NA_integer_,
col_id_new = NA_integer_,
fullname = "",
# kingdom = "",
# phylum = "",
# class = "",
# order = "",
# family = "",
genus = ifelse(is.na(GENUS), "", GENUS),
species = ifelse(is.na(SPECIES), "", SPECIES),
subspecies = ifelse(is.na(SUBSPECIES), "", SUBSPECIES),
transmute(fullname = trimws(paste(ifelse(is.na(genus), "", genus),
ifelse(is.na(species_epithet), "", species_epithet),
ifelse(is.na(subspecies_epithet), "", subspecies_epithet))),
fullname_new = sapply(correct_name, function(x) x[2L]),
ref = authors,
prevalence = NA_integer_)
data_dsmz <- data_dsmz %>%
# filter: correct name is NULL
filter(sapply(correct_name, is.null)) %>%
as_tibble() %>%
transmute(fullname = "",
kingdom = regio,
phylum,
class = classis,
# order = "", # does not contain order, will add later based on CoL
family = familia,
genus = ifelse(is.na(genus), "", genus),
species = ifelse(is.na(species_epithet), "", species_epithet),
subspecies = ifelse(is.na(subspecies_epithet), "", subspecies_epithet),
rank = ifelse(species == "", "genus", "species"),
ref = AUTHORS,
species_id = as.character(RECORD_NO),
ref = authors,
species_id = as.character(pnu_no),
source = "DSMZ")
# DSMZ only contains genus/(sub)species, try to find taxonomic properties based on genus and data_col
ref_taxonomy <- data_col %>%
filter(genus %in% data_dsmz$genus,
kingdom %in% c("Bacteria", "Chromista", "Archaea", "Protozoa", "Fungi"),
family != "") %>%
mutate(kingdom = factor(kingdom,
# in the left_join following, try Bacteria first, then Chromista, ...
levels = c("Bacteria", "Chromista", "Archaea", "Protozoa", "Fungi"),
ordered = TRUE)) %>%
filter(family %in% data_dsmz$family & family != "") %>%
arrange(kingdom) %>%
distinct(genus, .keep_all = TRUE) %>%
select(kingdom, phylum, class, order, family, genus)
distinct(family, .keep_all = TRUE) %>%
select(family, order)
data_dsmz <- data_dsmz %>%
left_join(ref_taxonomy, by = "genus") %>%
mutate(kingdom = "Bacteria",
phylum = ifelse(is.na(phylum), "(unknown phylum)", phylum),
class = ifelse(is.na(class), "(unknown class)", class),
order = ifelse(is.na(order), "(unknown order)", order),
family = ifelse(is.na(family), "(unknown family)", family),
)
left_join(ref_taxonomy, by = "family") # NAs will later become "(unknown ...)"
# combine everything
data_total <- data_col %>%
@ -119,6 +159,8 @@ data_total <- data_col %>%
rm(data_col)
rm(data_dsmz)
rm(ref_taxonomy)
rm(data_col.bak)
rm(data_dsmz.bak)
mo_found_in_NL <- c("Absidia", "Acremonium", "Actinotignum", "Aedes", "Alternaria", "Anaerosalibacter", "Ancylostoma",
"Angiostrongylus", "Anisakis", "Anopheles", "Apophysomyces", "Arachnia", "Ascaris", "Aspergillus",
@ -158,8 +200,6 @@ MOs <- data_total %>%
)
# or the genus has to be one of the genera we found in our hospitals last decades (Northern Netherlands, 2002-2018)
| genus %in% mo_found_in_NL
# or the taxonomic entry is old - the species was renamed
| !is.na(col_id_new)
) %>%
# really no Plantae (e.g. Dracunculus exist both as worm and as plant)
filter(kingdom != "Plantae") %>%
@ -174,59 +214,56 @@ MOs <- MOs %>% bind_rows(data_total %>%
| (family %in% MOs$family & rank == "family")
| (genus %in% MOs$genus & rank == "genus")))
# filter old taxonomic names so only the ones with an existing reference will be kept
MOs <- MOs %>%
filter(is.na(col_id_new) | (!is.na(col_id_new) & col_id_new %in% MOs$col_id))
get_author_year <- function(ref) {
# Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011'
authors2 <- iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT")
# remove leading and trailing brackets
authors2 <- gsub("^[(](.*)[)]$", "\\1", authors2)
# only take part after brackets if there's a name
authors2 <- ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
gsub(".*[)] (.*)", "\\1", authors2),
authors2)
# get year from last 4 digits
lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2))
# can never be later than now
lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
NA,
lastyear)
# get authors without last year
authors <- gsub("(.*)[0-9]{4}$", "\\1", authors2)
# remove nonsense characters from names
authors <- gsub("[^a-zA-Z,'& -]", "", authors)
# remove trailing and leading spaces
authors <- trimws(authors)
# only keep first author and replace all others by 'et al'
authors <- gsub("(,| and| et| &| ex| emend\\.?) .*", " et al.", authors)
# et al. always with ending dot
authors <- gsub(" et al\\.?", " et al.", authors)
authors <- gsub(" ?,$", "", authors)
# don't start with 'sensu' or 'ehrenb'
authors <- gsub("^(sensu|Ehrenb.?) ", "", authors, ignore.case = TRUE)
# no initials, only surname
authors <- gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE)
# combine author and year if year is available
ref <- ifelse(!is.na(lastyear),
paste0(authors, ", ", lastyear),
authors)
# fix beginning and ending
ref <- gsub(", $", "", ref)
ref <- gsub("^, ", "", ref)
ref <- gsub("^(emend|et al.,?)", "", ref)
ref <- trimws(ref)
# a lot start with a lowercase character - fix that
ref[!grepl("^d[A-Z]", ref)] <- gsub("^([a-z])", "\\U\\1", ref[!grepl("^d[A-Z]", ref)], perl = TRUE)
# specific one for the French that are named dOrbigny
ref[grepl("^d[A-Z]", ref)] <- gsub("^d", "d'", ref[grepl("^d[A-Z]", ref)])
ref <- gsub(" +", " ", ref)
ref
}
MOs <- MOs %>%
# remove text if it contains 'Not assigned' like phylum in viruses
mutate_all(~gsub("(Not assigned|\\[homonym\\]|\\[mistake\\])", "", ., ignore.case = TRUE))
MOs <- MOs %>%
# Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011':
mutate(authors2 = iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT"),
# remove leading and trailing brackets
authors2 = gsub("^[(](.*)[)]$", "\\1", authors2),
# only take part after brackets if there's a name
authors2 = ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
gsub(".*[)] (.*)", "\\1", authors2),
authors2),
# get year from last 4 digits
lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2)),
# can never be later than now
lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
NA,
lastyear),
# get authors without last year
authors = gsub("(.*)[0-9]{4}$", "\\1", authors2),
# remove nonsense characters from names
authors = gsub("[^a-zA-Z,'& -]", "", authors),
# remove trailing and leading spaces
authors = trimws(authors),
# only keep first author and replace all others by 'et al'
authors = gsub("(,| and| et| &| ex| emend\\.?) .*", " et al.", authors),
# et al. always with ending dot
authors = gsub(" et al\\.?", " et al.", authors),
authors = gsub(" ?,$", "", authors),
# don't start with 'sensu' or 'ehrenb'
authors = gsub("^(sensu|Ehrenb.?) ", "", authors, ignore.case = TRUE),
# no initials, only surname
authors = gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE),
# combine author and year if year is available
ref = ifelse(!is.na(lastyear),
paste0(authors, ", ", lastyear),
authors),
# fix beginning and ending
ref = gsub(", $", "", ref),
ref = gsub("^, ", "", ref),
ref = gsub("^(emend|et al.,?)", "", ref),
ref = trimws(ref)
)
# a lot start with a lowercase character - fix that
MOs$ref[!grepl("^d[A-Z]", MOs$ref)] <- gsub("^([a-z])", "\\U\\1", MOs$ref[!grepl("^d[A-Z]", MOs$ref)], perl = TRUE)
# specific one for the French that are named dOrbigny
MOs$ref[grepl("^d[A-Z]", MOs$ref)] <- gsub("^d", "d'", MOs$ref[grepl("^d[A-Z]", MOs$ref)])
MOs <- MOs %>% mutate(ref = gsub(" +", " ", ref))
MOs <- MOs %>% mutate(ref = get_author_year(ref))
# Remove non-ASCII characters (these are not allowed by CRAN)
MOs <- MOs %>%
@ -235,53 +272,58 @@ MOs <- MOs %>%
# remove invalid characters
mutate_all(~gsub("[\"'`]+", "", .))
# Split old taxonomic names - they refer in the original data to a new `taxonID` with `acceptedNameUsageID`
MOs.old <- MOs %>%
filter(!is.na(col_id_new),
ref != "",
source != "DSMZ") %>%
transmute(col_id,
col_id_new,
fullname =
trimws(
gsub("(.*)[(].*", "\\1",
stringr::str_replace(
string = fullname,
pattern = stringr::fixed(authors2),
replacement = "")) %>%
gsub(" (var|f|subsp)[.]", "", .)),
ref) %>%
filter(!is.na(fullname)) %>%
distinct(fullname, .keep_all = TRUE) %>%
arrange(col_id)
# set new fullnames
MOs <- MOs %>%
mutate(fullname = trimws(case_when(rank == "family" ~ family,
rank == "order" ~ order,
rank == "class" ~ class,
rank == "phylum" ~ phylum,
rank == "kingdom" ~ kingdom,
TRUE ~ paste(genus, species, subspecies))),
fullname = gsub(" (var|f|subsp)[.]", "", fullname)) %>%
# remove text if it contains 'Not assigned', etc.
mutate_all(function(x) ifelse(x %like% "(not assigned|homonym|mistake)", NA, x)) %>%
# clean taxonomy
mutate(kingdom = ifelse(is.na(kingdom) | trimws(kingdom) == "", "(unknown kingdom)", trimws(kingdom)),
phylum = ifelse(is.na(phylum) | trimws(phylum) == "", "(unknown phylum)", trimws(phylum)),
class = ifelse(is.na(class) | trimws(class) == "", "(unknown class)", trimws(class)),
order = ifelse(is.na(order) | trimws(order) == "", "(unknown order)", trimws(order)),
family = ifelse(is.na(family) | trimws(family) == "", "(unknown family)", trimws(family)))
MO.bak <- MOs
# Split old taxonomic names
MOs.old <- data_col_old %>%
filter(!gsub(" (var|f|subsp)[.]", "", fullname_new) %in% data_dsmz_old$fullname) %>%
bind_rows(data_dsmz_old) %>%
mutate(fullname_new = gsub(" (var|f|subsp)[.]", "", fullname_new),
fullname = gsub(" (var|f|subsp)[.]", "", fullname)) %>%
# for cases like Chlamydia pneumoniae -> Chlamydophila pneumoniae -> Chlamydia pneumoniae:
filter(!fullname %in% fullname_new &
fullname_new %in% MOs$fullname &
!is.na(fullname) &
fullname != fullname_new) %>%
distinct(fullname, .keep_all = TRUE) %>%
arrange(fullname) %>%
mutate(ref = get_author_year(ref))
MOs <- MOs %>%
filter(is.na(col_id_new) | source == "DSMZ") %>%
transmute(col_id,
fullname = trimws(case_when(rank == "family" ~ family,
rank == "order" ~ order,
rank == "class" ~ class,
rank == "phylum" ~ phylum,
rank == "kingdom" ~ kingdom,
TRUE ~ paste(genus, species, subspecies))),
# remove entries that are old and in MOs.old
filter(!fullname %in% MOs.old$fullname) %>%
# mark up
transmute(fullname,
kingdom,
phylum,
class,
order,
family,
genus = gsub(":", "", genus),
genus,
species,
subspecies,
rank,
ref,
species_id = gsub(".*/([a-f0-9]+)", "\\1", species_id),
species_id = gsub("[^a-zA-Z0-9].*", "", species_id),
source) %>%
#distinct(fullname, .keep_all = TRUE) %>%
filter(!grepl("unassigned", fullname, ignore.case = TRUE)) %>%
# prefer DSMZ over CoL, since that's more recent
arrange(desc(source)) %>%
# prefer known taxonomy over unknown taxonomy, then DSMZ over CoL (= desc)
arrange(desc(kingdom, genus, species, source)) %>%
distinct(kingdom, fullname, .keep_all = TRUE)
# remove all genera that have no species - they are irrelevant for microbiology and almost all from the kingdom of Animalia
@ -296,43 +338,45 @@ to_remove <- MOs %>%
MOs <- MOs %>% filter(!(paste(kingdom, genus) %in% to_remove))
rm(to_remove)
# add CoL's col_id, source and ref from MOs.bak, for the cases where DSMZ took preference
# add all mssing genera, families and orders
MOs <- MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
left_join(MO.bak %>%
filter(is.na(col_id_new), !is.na(col_id)) %>%
transmute(col_id, species_id, source, ref, kingdom_fullname = trimws(paste(kingdom, genus, species, subspecies))),
by = "kingdom_fullname",
suffix = c("_dsmz", "_col")) %>%
mutate(col_id = col_id_col,
species_id = ifelse(!is.na(species_id_col) & ref_col == ref_dsmz,
gsub(".*/(.*)$", "\\1", species_id_col),
species_id_dsmz),
source = ifelse(!is.na(species_id_col) & ref_col == ref_dsmz,
source_col,
source_dsmz),
ref = ifelse(!is.na(species_id_col) & ref_col == ref_dsmz,
ref_col,
ref_dsmz)) %>%
select(-matches("(_col|_dsmz|kingdom_fullname)"))
bind_rows(MOs %>%
arrange(genus, species) %>%
distinct(genus, .keep_all = TRUE) %>%
filter(rank == "species") %>%
mutate(fullname = genus,
species = "",
rank = "genus",
species_id = "",
ref = NA_character_)) %>%
bind_rows(MOs %>%
arrange(family, genus) %>%
distinct(family, .keep_all = TRUE) %>%
filter(rank == "genus") %>%
mutate(fullname = family,
genus = "",
rank = "family",
species_id = "",
ref = NA_character_)) %>%
bind_rows(MOs %>%
arrange(order, family) %>%
distinct(family, .keep_all = TRUE) %>%
filter(rank == "family") %>%
mutate(fullname = order,
family = "",
rank = "order",
species_id = "",
ref = NA_character_))
MOs.old <- MOs.old %>%
# remove the ones that are in the MOs data set
filter(col_id_new %in% MOs$col_id) %>%
# and remove the ones that have the exact same fullname in the MOs data set, like Moraxella catarrhalis
left_join(MOs, by = "fullname") %>%
filter(col_id_new != col_id.y | is.na(col_id.y)) %>%
select(col_id = col_id.x, col_id_new, fullname, ref = ref.x)
# remove the records that are in MOs.old
sum(MOs.old$fullname %in% MOs$fullname)
MOs <- MOs %>% filter(!fullname %in% MOs.old$fullname)
sum(MOs.old$fullname %in% MOs$fullname)
# remove the empty ones
MOs <- MOs %>%
mutate(fullname = gsub(",.*", "", fullname)) %>%
distinct(kingdom, fullname, .keep_all = TRUE) %>%
filter(fullname != "")
# what characters are in the fullnames?
table(sort(unlist(strsplit(x = paste(MOs$fullname, collapse = ""), split = ""))))
MOs %>% filter(!fullname %like% "^[a-z ]+$") %>% View()
MOs %>% filter(!fullname %like% "^[a-z ]+$") %>% arrange(fullname) %>% View()
table(MOs$kingdom, MOs$rank)
table(AMR::microorganisms$kingdom, AMR::microorganisms$rank)
@ -436,7 +480,6 @@ MOs <- MOs %>%
bind_rows(
# Unknowns
data.frame(mo = "UNKNOWN",
col_id = NA_integer_,
fullname = "(unknown name)",
kingdom = "(unknown kingdom)",
phylum = "(unknown phylum)",
@ -453,7 +496,6 @@ MOs <- MOs %>%
prevalence = 1,
stringsAsFactors = FALSE),
data.frame(mo = "B_GRAMN",
col_id = NA_integer_,
fullname = "(unknown Gram-negatives)",
kingdom = "Bacteria",
phylum = "(unknown phylum)",
@ -470,7 +512,6 @@ MOs <- MOs %>%
prevalence = 1,
stringsAsFactors = FALSE),
data.frame(mo = "B_GRAMP",
col_id = NA_integer_,
fullname = "(unknown Gram-positives)",
kingdom = "Bacteria",
phylum = "(unknown phylum)",
@ -487,7 +528,6 @@ MOs <- MOs %>%
prevalence = 1,
stringsAsFactors = FALSE),
data.frame(mo = "F_YEAST",
col_id = NA_integer_,
fullname = "(unknown yeast)",
kingdom = "Fungi",
phylum = "(unknown phylum)",
@ -504,7 +544,6 @@ MOs <- MOs %>%
prevalence = 2,
stringsAsFactors = FALSE),
data.frame(mo = "F_FUNGUS",
col_id = NA_integer_,
fullname = "(unknown fungus)",
kingdom = "Fungi",
phylum = "(unknown phylum)",
@ -524,7 +563,6 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_CONS", mo),
col_id = NA_integer_,
species = "coagulase-negative",
fullname = "Coagulase-negative Staphylococcus (CoNS)",
ref = NA_character_,
@ -534,7 +572,6 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_COPS", mo),
col_id = NA_integer_,
species = "coagulase-positive",
fullname = "Coagulase-positive Staphylococcus (CoPS)",
ref = NA_character_,
@ -558,7 +595,6 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Streptococcus", species == "dysgalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPC", mo),
col_id = NA_integer_,
species = "group C" ,
fullname = "Streptococcus group C",
ref = NA_character_,
@ -567,7 +603,6 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPD", mo),
col_id = NA_integer_,
species = "group D" ,
fullname = "Streptococcus group D",
ref = NA_character_,
@ -576,7 +611,6 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPF", mo),
col_id = NA_integer_,
species = "group F" ,
fullname = "Streptococcus group F",
ref = NA_character_,
@ -585,7 +619,6 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPG", mo),
col_id = NA_integer_,
species = "group G" ,
fullname = "Streptococcus group G",
ref = NA_character_,
@ -594,7 +627,6 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPH", mo),
col_id = NA_integer_,
species = "group H" ,
fullname = "Streptococcus group H",
ref = NA_character_,
@ -603,7 +635,6 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPK", mo),
col_id = NA_integer_,
species = "group K" ,
fullname = "Streptococcus group K",
ref = NA_character_,
@ -613,7 +644,6 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_HAEM", mo),
col_id = NA_integer_,
species = "beta-haemolytic" ,
fullname = "Beta-haemolytic Streptococcus",
ref = NA_character_,
@ -623,7 +653,6 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_VIRI", mo),
col_id = NA_integer_,
species = "viridans" ,
fullname = "Viridans Group Streptococcus (VGS)",
ref = NA_character_,
@ -633,7 +662,6 @@ MOs <- MOs %>%
MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_MILL", mo),
col_id = NA_integer_,
species = "milleri" ,
fullname = "Milleri Group Streptococcus (MGS)",
ref = NA_character_,
@ -646,7 +674,6 @@ MOs <- MOs %>%
mutate(mo = paste0(mo, "_HMNS"),
fullname = paste(fullname, "hominis"),
species = "hominis",
col_id = NA,
source = "manually added",
ref = NA_character_,
species_id = ""),
@ -654,7 +681,6 @@ MOs <- MOs %>%
MOs %>%
filter(fullname == "Dientamoeba") %>%
mutate(mo = gsub("(.*?)_.*", "\\1_THMNS", mo),
col_id = NA,
fullname = "Trichomonas",
family = "Trichomonadidae",
genus = "Trichomonas",
@ -664,7 +690,6 @@ MOs <- MOs %>%
MOs %>%
filter(fullname == "Dientamoeba fragilis") %>%
mutate(mo = gsub("(.*?)_.*", "\\1_THMNS_VAG", mo),
col_id = NA,
fullname = "Trichomonas vaginalis",
family = "Trichomonadidae",
genus = "Trichomonas",
@ -675,7 +700,6 @@ MOs <- MOs %>%
MOs %>% # add family as such too
filter(fullname == "Monocercomonadidae") %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_TRCHMNDD", mo),
col_id = NA,
fullname = "Trichomonadidae",
family = "Trichomonadidae",
rank = "family",
@ -760,33 +784,37 @@ new_families <- MOs %>%
filter(order == "Enterobacterales") %>%
pull(family) %>%
unique()
class(MOs$mo) <- "character"
MOs <- rbind(MOs %>% filter(!(rank == "family" & fullname %in% new_families)),
AMR::microorganisms %>%
select(-snomed) %>%
filter(family == "Enterobacteriaceae" & rank == "family") %>%
rbind(., ., ., ., ., ., .) %>%
mutate(fullname = new_families,
source = "manually added",
ref = "Adeolu et al., 2016",
family = fullname, mo = paste0("B_[FAM]_",
toupper(abbreviate(new_families,
minlength = 8,
use.classes = TRUE,
method = "both.sides",
strict = FALSE)))))
MOs <- MOs %>%
filter(!(rank == "family" & fullname %in% new_families)) %>%
bind_rows(tibble(mo = paste0("B_[FAM]_",
toupper(abbreviate(new_families,
minlength = 8,
use.classes = TRUE,
method = "both.sides",
strict = FALSE))),
fullname = new_families,
kingdom = "Bacteria",
phylum = "Proteobacteria",
class = "Gammaproteobacteria",
order = "Enterobacterales",
family = new_families,
genus = "",
species = "",
subspecies = "",
rank = "family",
ref = "Adeolu et al., 2016",
species_id = NA_character_,
source = "manually added",
prevalence = 1))
MOs[which(MOs$order == "Enterobacteriales"), "order"] <- "Enterobacterales"
MOs[which(MOs$fullname == "Enterobacteriales"), "fullname"] <- "Enterobacterales"
MOs <- MOs %>%
group_by(kingdom) %>%
distinct(fullname, .keep_all = TRUE) %>%
ungroup() %>%
filter(fullname != "")
# add prevalence to old taxonomic names
MOs.old <- MOs.old %>%
left_join(MOs %>% select(col_id, prevalence), by = c("col_id_new" = "col_id"))
select(-prevalence) %>%
left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname"))
# everything distinct?
sum(duplicated(MOs$mo))
@ -797,18 +825,105 @@ colnames(MOs)
MOs %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms$fullname) %>% View()
MOs.old %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms.old$fullname) %>% View()
# and the ones we lost:
AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View() # based on fullname
AMR::microorganisms %>% filter(!mo %in% MOs$mo) %>% View() # based on mo
AMR::microorganisms %>% filter(!mo %in% MOs$mo & !fullname %in% MOs$fullname) %>% View()
# AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View() # based on fullname
AMR::microorganisms %>% filter(!fullname %in% c(MOs$fullname, MOs.old$fullname)) %>% View() # excluding renamed ones
# AMR::microorganisms %>% filter(!mo %in% MOs$mo) %>% View() # based on mo
# AMR::microorganisms %>% filter(!mo %in% MOs$mo & !fullname %in% MOs$fullname) %>% View()
# and these IDs have changed:
old_new <- MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
filter(kingdom_fullname %in% (AMR::microorganisms %>% mutate(kingdom_fullname = paste(kingdom, fullname)) %>% pull(kingdom_fullname))) %>%
left_join(AMR::microorganisms %>% mutate(kingdom_fullname = paste(kingdom, fullname)) %>% select(mo, kingdom_fullname), by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
filter(kingdom_fullname %in% (AMR::microorganisms %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
pull(kingdom_fullname))) %>%
left_join(AMR::microorganisms %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
select(mo, kingdom_fullname), by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
filter(mo_new != mo_old) %>%
select(mo_old, mo_new, everything())
View(old_new)
# set new MO codes as names to existing data sets
rsi_translation$mo <- mo_name(rsi_translation$mo, language = NULL)
microorganisms.codes$mo <- mo_name(microorganisms.codes$mo, language = NULL)
microorganisms.translation <- AMR:::microorganisms.translation %>%
bind_rows(tibble(mo_old = AMR:::microorganisms.translation$mo_new, mo_new = mo_old)) %>%
filter(!mo_old %in% MOs$mo) %>%
mutate(mo_new = mo_name(mo_new, language = NULL)) %>%
bind_rows(old_new %>% select(mo_old, mo_new)) %>%
distinct(mo_old, .keep_all = TRUE)
# arrange the data sets to save
MOs <- MOs %>% arrange(fullname)
MOs.old <- MOs.old %>% arrange(fullname)
# transform
MOs <- as.data.frame(MOs, stringsAsFactors = FALSE)
MOs.old <- as.data.frame(MOs.old, stringsAsFactors = FALSE)
microorganisms.codes <- as.data.frame(microorganisms.codes, stringsAsFactors = FALSE)
class(MOs$mo) <- c("mo", "character")
# SAVE
### for same server
microorganisms <- dataset_UTF8_to_ASCII(MOs)
microorganisms.old <- dataset_UTF8_to_ASCII(MOs.old)
### for other server
saveRDS(MOs, "microorganisms.rds")
saveRDS(MOs.old, "microorganisms.old.rds")
saveRDS(microorganisms.codes, "microorganisms.codes.rds")
# on the server, do:
usethis::use_data(microorganisms, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2)
rm(microorganisms)
rm(microorganisms.old)
# load new data sets
devtools::load_all(".")
# reset previously changed mo codes
rsi_translation$mo <- as.mo(rsi_translation$mo)
microorganisms.codes$mo <- as.mo(microorganisms.codes$mo)
class(microorganisms.codes$mo) <- c("mo", "character")
microorganisms.translation <- microorganisms.translation %>%
left_join(microorganisms.old[, c("fullname", "fullname_new")], # microorganisms.old is now new and loaded
by = c("mo_new" = "fullname")) %>%
mutate(name = ifelse(!is.na(fullname_new), fullname_new, mo_new)) %>%
left_join(microorganisms[, c("fullname", "mo")], # as is microorganisms
by = c("name" = "fullname")) %>%
select(mo_old, mo_new = mo) %>%
filter(!is.na(mo_old), !is.na(mo_new))
class(microorganisms.translation$mo_old) <- "character" # no class <mo> since those aren't valid MO codes
class(microorganisms.translation$mo_new) <- c("mo", "character")
# save those to the package
usethis::use_data(rsi_translation, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2)
saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
# to save microorganisms.translation internally to the package
source("data-raw/internals.R")
# load new data sets again
devtools::load_all(".")
# and check: these codes should not be missing (will otherwise throw a unit test error):
AMR::microorganisms.codes %>% filter(!mo %in% MOs$mo)
AMR::rsi_translation %>% filter(!mo %in% MOs$mo)
AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo)
# update the example_isolates data set
example_isolates$mo <- as.mo(example_isolates$mo)
usethis::use_data(example_isolates, overwrite = TRUE)
# Don't forget to add SNOMED codes! (data-raw/snomed.R)
# run the unit tests
testthat::test_file("tests/testthat/test-data.R")
testthat::test_file("tests/testthat/test-mo.R")
testthat::test_file("tests/testthat/test-mo_property.R")
# OLD CODE ----------------------------------------------------------------
# to keep all the old IDs:
# MOs <- MOs %>% filter(!mo %in% old_new$mo_new) %>%
# rbind(microorganisms %>%
@ -816,79 +931,32 @@ View(old_new)
# select(mo, fullname) %>%
# left_join(MOs %>%
# select(-mo), by = "fullname"))
# and these codes are now missing (which will throw a unit test error):
AMR::microorganisms.codes %>% filter(!mo %in% MOs$mo)
AMR::rsi_translation %>% filter(!mo %in% MOs$mo)
AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo) %>% View()
# this is how to fix it
microorganisms.codes <- AMR::microorganisms.codes %>%
left_join(MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new),
by = c("mo" = "mo_old")) %>%
select(code, mo = mo_new) %>%
filter(!is.na(mo))
microorganisms.codes %>% filter(!mo %in% MOs$mo)
# and for microorganisms.translation:
microorganisms.translation <- AMR:::microorganisms.translation %>%
select(mo = mo_new) %>%
left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new)
left_join(MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new),
by = c("mo" = "mo_old")) %>%
select(code, mo = mo_new) %>%
filter(!is.na(mo))
microorganisms.codes %>% filter(!mo %in% MOs$mo)
# arrange
MOs <- MOs %>% arrange(fullname)
MOs.old <- MOs.old %>% arrange(fullname)
microorganisms.codes <- microorganisms.codes %>% arrange(code)
# transform
MOs <- as.data.frame(MOs, stringsAsFactors = FALSE)
MOs.old <- as.data.frame(MOs.old, stringsAsFactors = FALSE)
microorganisms.codes <- as.data.frame(microorganisms.codes, stringsAsFactors = FALSE)
class(MOs$mo) <- "mo"
class(microorganisms.codes$mo) <- "mo"
MOs$col_id <- as.integer(MOs$col_id)
MOs.old$col_id <- as.integer(MOs.old$col_id)
MOs.old$col_id_new <- as.integer(MOs.old$col_id_new)
# SAVE
### for other server
saveRDS(MOs, "microorganisms.rds")
saveRDS(MOs.old, "microorganisms.old.rds")
saveRDS(microorganisms.codes, "microorganisms.codes.rds")
### for same server
microorganisms <- MOs
microorganisms.old <- MOs.old
microorganisms.translation <- old_new %>% select(mo_old, mo_new) %>% as.data.frame()
class(microorganisms.translation$mo_old) <- "mo"
class(microorganisms.translation$mo_new) <- "mo"
# on the server, do:
usethis::use_data(microorganisms, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2)
saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2) # this one will be covered in data-raw/internals.R
rm(microorganisms)
rm(microorganisms.old)
rm(microorganisms.codes)
rm(microorganisms.translation)
devtools::load_all(".")
# TO DO AFTER THIS
# * Rerun data-raw/reproduction_of_rsi_translation.R
# * Run unit tests
# microorganisms.codes <- AMR::microorganisms.codes %>%
# left_join(MOs %>%
# mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
# left_join(AMR::microorganisms %>%
# transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
# by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
# select(mo_old, mo_new),
# by = c("mo" = "mo_old")) %>%
# select(code, mo = mo_new) %>%
# filter(!is.na(mo))
# microorganisms.codes %>% filter(!mo %in% MOs$mo)
# # and for microorganisms.translation:
# microorganisms.translation <- AMR:::microorganisms.translation %>%
# select(mo = mo_new) %>%
# left_join(AMR::microorganisms %>%
# transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
# by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
# select(mo_old, mo_new)
# left_join(MOs %>%
# mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
# left_join(AMR::microorganisms %>%
# transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
# by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
# select(mo_old, mo_new),
# by = c("mo" = "mo_old")) %>%
# select(code, mo = mo_new) %>%
# filter(!is.na(mo))
# microorganisms.codes %>% filter(!mo %in% MOs$mo)

File diff suppressed because it is too large Load Diff

View File

@ -22,16 +22,17 @@
library(AMR)
library(tidyverse)
# go to https://www.nictiz.nl/standaardisatie/terminologiecentrum/referentielijsten/micro-organismen/
# go to https://www.nictiz.nl/standaardisatie/terminologiecentrum/referentielijsten/micro-organismen/ (Ctrl/Cmd + A in table)
# read the table from clipboard
snomed <- clipr::read_clip_tbl()
# snomed <- snomed %>%
# transmute(fullname = trimws(gsub("^genus", "", Omschrijving, ignore.case = TRUE)),
# snomed = as.integer(Id))
snomed <- clipr::read_clip_tbl(skip = 2)
snomed <- snomed %>%
transmute(fullname = mo_name(Omschrijving),
dplyr::filter(gsub("(^genus |^familie |^stam |ss.? |subsp.? |subspecies )", "",
Omschrijving.,
ignore.case = TRUE) %in% c(microorganisms$fullname,
microorganisms.old$fullname)) %>%
dplyr::transmute(fullname = mo_name(Omschrijving.),
snomed = as.integer(Id)) %>%
filter(!fullname %like% "unknown")
dplyr::filter(!fullname %like% "unknown")
snomed_trans <- snomed %>%
group_by(fullname) %>%
mutate(snomed_list = list(snomed)) %>%
@ -51,59 +52,59 @@ rm(microorganisms)
# OLD ---------------------------------------------------------------------
baseUrl <- 'https://browser.ihtsdotools.org/snowstorm/snomed-ct'
edition <- 'MAIN'
version <- '2019-07-31'
microorganisms.snomed <- data.frame(conceptid = character(0),
mo = character(0),
stringsAsFactors = FALSE)
microorganisms$snomed <- ""
# for (i in 1:50) {
for (i in 1:1000) {
if (i %% 10 == 0) {
cat(paste0(i, " - ", cleaner::percentage(i / nrow(microorganisms)), "\n"))
}
mo_data <- microorganisms %>%
filter(mo == microorganisms$mo[i]) %>%
as.list()
if (!mo_data$rank %in% c("genus", "species")) {
next
}
searchTerm <- paste0(
ifelse(mo_data$rank == "genus", "Genus ", ""),
mo_data$fullname,
" (organism)")
url <- paste0(baseUrl, '/browser/',
edition, '/',
version,
'/descriptions?term=', curl::curl_escape(searchTerm),
'&mode=fullText&activeFilter=true&limit=', 250)
results <- url %>%
httr::GET() %>%
httr::content(type = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON(flatten = TRUE) %>%
.$items
if (NROW(results) == 0) {
next
} else {
message("Adding ", crayon::italic(mo_data$fullname))
}
tryCatch(
microorganisms$snomed[i] <- results %>% filter(term == searchTerm) %>% pull(concept.conceptId),
error = function(e) invisible()
)
if (nrow(results) > 1) {
microorganisms.snomed <- microorganisms.snomed %>%
bind_rows(tibble(conceptid = results %>% filter(term != searchTerm) %>% pull(concept.conceptId) %>% unique(),
mo = as.character(mo_data$mo)))
}
}
# baseUrl <- 'https://browser.ihtsdotools.org/snowstorm/snomed-ct'
# edition <- 'MAIN'
# version <- '2019-07-31'
#
# microorganisms.snomed <- data.frame(conceptid = character(0),
# mo = character(0),
# stringsAsFactors = FALSE)
# microorganisms$snomed <- ""
#
# # for (i in 1:50) {
# for (i in 1:1000) {
#
# if (i %% 10 == 0) {
# cat(paste0(i, " - ", cleaner::percentage(i / nrow(microorganisms)), "\n"))
# }
#
# mo_data <- microorganisms %>%
# filter(mo == microorganisms$mo[i]) %>%
# as.list()
#
# if (!mo_data$rank %in% c("genus", "species")) {
# next
# }
#
# searchTerm <- paste0(
# ifelse(mo_data$rank == "genus", "Genus ", ""),
# mo_data$fullname,
# " (organism)")
#
# url <- paste0(baseUrl, '/browser/',
# edition, '/',
# version,
# '/descriptions?term=', curl::curl_escape(searchTerm),
# '&mode=fullText&activeFilter=true&limit=', 250)
# results <- url %>%
# httr::GET() %>%
# httr::content(type = "text", encoding = "UTF-8") %>%
# jsonlite::fromJSON(flatten = TRUE) %>%
# .$items
# if (NROW(results) == 0) {
# next
# } else {
# message("Adding ", crayon::italic(mo_data$fullname))
# }
#
# tryCatch(
# microorganisms$snomed[i] <- results %>% filter(term == searchTerm) %>% pull(concept.conceptId),
# error = function(e) invisible()
# )
#
# if (nrow(results) > 1) {
# microorganisms.snomed <- microorganisms.snomed %>%
# bind_rows(tibble(conceptid = results %>% filter(term != searchTerm) %>% pull(concept.conceptId) %>% unique(),
# mo = as.character(mo_data$mo)))
# }
# }

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>

View File

@ -43,7 +43,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>
@ -221,7 +221,7 @@ A methods paper about this package has been preprinted at bioRxiv (DOI: 10.1101/
<a href="#what-can-you-do-with-this-package" class="anchor"></a>What can you do with this package?</h3>
<p>This package can be used for:</p>
<ul>
<li>Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the <a href="http://www.catalogueoflife.org">Catalogue of Life</a> (<a href="./reference/mo_property.html">manual</a>)</li>
<li>Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the <a href="http://www.catalogueoflife.org">Catalogue of Life</a> and <a href="https://lpsn.dsmz.de">List of Prokaryotic names with Standing in Nomenclature</a> (<a href="./reference/mo_property.html">manual</a>)</li>
<li>Interpreting raw MIC and disk diffusion values, based on the latest CLSI or EUCAST guidelines (<a href="./reference/as.rsi.html">manual</a>)</li>
<li>Determining first isolates to be used for AMR analysis (<a href="./reference/first_isolate.html">manual</a>)</li>
<li>Calculating antimicrobial resistance (<a href="./articles/AMR.html">tutorial</a>)</li>
@ -268,7 +268,7 @@ A methods paper about this package has been preprinted at bioRxiv (DOI: 10.1101/
<div id="microbial-taxonomic-reference-data" class="section level4">
<h4 class="hasAnchor">
<a href="#microbial-taxonomic-reference-data" class="anchor"></a>Microbial (taxonomic) reference data</h4>
<p>This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, <a href="http://www.catalogueoflife.org">www.catalogueoflife.org</a>), supplemented by data from the Deutsche Sammlung von Mikroorganismen und Zellkulturen (DSMZ, <a href="https://www.dsmz.de">www.dsmz.de</a>). This supplementation is needed until the <a href="https://github.com/Sp2000/colplus">CoL+ project</a> is finished, which we await. With <code><a href="reference/catalogue_of_life_version.html">catalogue_of_life_version()</a></code> can be checked which version of the CoL is included in this package.</p>
<p>This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, <a href="http://www.catalogueoflife.org">www.catalogueoflife.org</a>), supplemented by data from the List of Prokaryotic names with Standing in Nomenclature (LPSN, <a href="https://lpsn.dsmz.de">lpsn.dsmz.de</a>). This supplementation is needed until the <a href="https://github.com/Sp2000/colplus">CoL+ project</a> is finished, which we await. With <code><a href="reference/catalogue_of_life_version.html">catalogue_of_life_version()</a></code> can be checked which version of the CoL is included in this package.</p>
<p>Read more about which data from the Catalogue of Life <a href="./reference/catalogue_of_life.html">in our manual</a>.</p>
</div>
<div id="antimicrobial-reference-data" class="section level4">

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>
@ -229,13 +229,13 @@
<small>Source: <a href='https://gitlab.com/msberends/AMR/blob/master/NEWS.md'><code>NEWS.md</code></a></small>
</div>
<div id="amr-1-1-0-9019" class="section level1">
<h1 class="page-header" data-toc-text="1.1.0.9019">
<a href="#amr-1-1-0-9019" class="anchor"></a>AMR 1.1.0.9019<small> Unreleased </small>
<div id="amr-1-1-0-9020" class="section level1">
<h1 class="page-header" data-toc-text="1.1.0.9020">
<a href="#amr-1-1-0-9020" class="anchor"></a>AMR 1.1.0.9020<small> Unreleased </small>
</h1>
<div id="last-updated-25-may-2020" class="section level2">
<div id="last-updated-27-may-2020" class="section level2">
<h2 class="hasAnchor">
<a href="#last-updated-25-may-2020" class="anchor"></a><small>Last updated: 25-May-2020</small>
<a href="#last-updated-27-may-2020" class="anchor"></a><small>Last updated: 27-May-2020</small>
</h2>
<div id="breaking" class="section level3">
<h3 class="hasAnchor">
@ -258,9 +258,22 @@ Negative effects of this change are:
<h3 class="hasAnchor">
<a href="#changed" class="anchor"></a>Changed</h3>
<ul>
<li>Taxonomy:
<ul>
<li>Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020)</li>
<li>Removed the Catalogue of Life IDs (like 776351), since they now work with a species ID (hexadecimal string)</li>
</ul>
</li>
<li>EUCAST rules:
<ul>
<li>The <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code> function no longer applies “other” rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzym inhibitor = R). The default input value for <code>rules</code> is now <code><a href="https://rdrr.io/r/base/c.html">c("breakpoints", "expert")</a></code> instead of <code>"all"</code>, but this can be changed by the user. To return to the old behaviour, set <code><a href="https://rdrr.io/r/base/options.html">options(AMR.eucast_rules = "all")</a></code>.</li>
<li>The <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code> function no longer applies “other” rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzyme inhibitor = R). The default input value for <code>rules</code> is now <code><a href="https://rdrr.io/r/base/c.html">c("breakpoints", "expert")</a></code> instead of <code>"all"</code>, but this can be changed by the user. To return to the old behaviour, set <code><a href="https://rdrr.io/r/base/options.html">options(AMR.eucast_rules = "all")</a></code>.</li>
<li>Fixed a bug where checking antimicrobial results in the original data were not regarded as valid R/SI values</li>
<li>All “other” rules now apply for all drug combinations in the <code>antibiotics</code> data set these two rules:
<ol>
<li>A drug <strong>with</strong> enzyme inhibitor will be set to S if the drug <strong>without</strong> enzyme inhibitor is S</li>
<li>A drug <strong>without</strong> enzyme inhibitor will be set to R if the drug <strong>with</strong> enzyme inhibitor is R</li>
</ol>
This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/avibactam, trimethoprim/sulfamethoxazole, etc.</li>
<li>Added official drug names to verbose output of <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code>
</li>
</ul>
@ -271,6 +284,7 @@ Negative effects of this change are:
<li>Small fix for some text input that could not be coerced as valid MIC values</li>
<li>Fix for interpretation of generic CLSI interpretation rules (thanks to Anthony Underwood)</li>
<li>Fix for <code><a href="../reference/mo_source.html">set_mo_source()</a></code> to make sure that column <code>mo</code> will always be the second column</li>
<li>Added abbreviation “cfsc” for Cefoxitin and “cfav” for Ceftazidime/avibactam</li>
</ul>
</div>
<div id="other" class="section level3">

View File

@ -10,7 +10,7 @@ articles:
WHONET: WHONET.html
benchmarks: benchmarks.html
resistance_predict: resistance_predict.html
last_built: 2020-05-24T22:55Z
last_built: 2020-05-27T14:37Z
urls:
reference: https://msberends.gitlab.io/AMR/reference
article: https://msberends.gitlab.io/AMR/articles

View File

@ -82,7 +82,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>

View File

@ -82,7 +82,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>

View File

@ -82,7 +82,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>
@ -399,7 +399,6 @@ This package contains the complete taxonomic tree of almost all microorganisms (
<span class='fu'>as.mo</span>(<span class='st'>"MRSA"</span>) <span class='co'># Methicillin Resistant S. aureus</span>
<span class='fu'>as.mo</span>(<span class='st'>"VISA"</span>) <span class='co'># Vancomycin Intermediate S. aureus</span>
<span class='fu'>as.mo</span>(<span class='st'>"VRSA"</span>) <span class='co'># Vancomycin Resistant S. aureus</span>
<span class='fu'>as.mo</span>(<span class='fl'>22242419</span>) <span class='co'># Catalogue of Life ID</span>
<span class='fu'>as.mo</span>(<span class='fl'>115329001</span>) <span class='co'># SNOMED CT code</span>
<span class='co'># Dyslexia is no problem - these all work:</span>

View File

@ -82,7 +82,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>

View File

@ -82,7 +82,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>
@ -275,9 +275,9 @@ Function <code><a href='as.mo.html'>as.mo()</a></code> to use the data for intel
<span class='co'># Get a note when a species was renamed</span>
<span class='fu'><a href='mo_property.html'>mo_shortname</a></span>(<span class='st'>"Chlamydia psittaci"</span>)
<span class='co'># Note: 'Chlamydia psittaci' (Page, 1968) was renamed</span>
<span class='co'># 'Chlamydophila psittaci' (Everett et al., 1999)</span>
<span class='fu'><a href='mo_property.html'>mo_shortname</a></span>(<span class='st'>"Chlamydophila psittaci"</span>)
<span class='co'># Note: 'Chlamydophila psittaci' (Everett et al., 1999) was renamed back to</span>
<span class='co'># 'Chlamydia psittaci' (Page, 1968)</span>
<span class='co'># [1] "C. psittaci"</span>
<span class='co'># Get any property from the entire taxonomic tree for all included species</span>
@ -295,9 +295,9 @@ Function <code><a href='as.mo.html'>as.mo()</a></code> to use the data for intel
<span class='co'># Do not get mistaken - this package is about microorganisms</span>
<span class='fu'><a href='mo_property.html'>mo_kingdom</a></span>(<span class='st'>"C. elegans"</span>)
<span class='co'># [1] "Bacteria" # Bacteria?!</span>
<span class='co'># [1] "Fungi" # Fungi?!</span>
<span class='fu'><a href='mo_property.html'>mo_name</a></span>(<span class='st'>"C. elegans"</span>)
<span class='co'># [1] "Chroococcus limneticus elegans" # Because a microorganism was found</span></pre>
<span class='co'># [1] "Cladosporium elegans" # Because a microorganism was found</span></pre>
</div>
<div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
<nav id="toc" data-toggle="toc" class="sticky-top">
@ -313,7 +313,7 @@ Function <code><a href='as.mo.html'>as.mo()</a></code> to use the data for intel
</div>
<div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.0.</p>
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.1.</p>
</div>
</footer>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>
@ -472,7 +472,7 @@
<td>
<p><code><a href="microorganisms.html">microorganisms</a></code> </p>
</td>
<td><p>Data set with 69,447 microorganisms</p></td>
<td><p>Data set with 67,107 microorganisms</p></td>
</tr><tr>
<td>
@ -502,7 +502,7 @@
<td>
<p><code><a href="microorganisms.codes.html">microorganisms.codes</a></code> </p>
</td>
<td><p>Translation table for common microorganism codes</p></td>
<td><p>Translation table with 5,582 common microorganism codes</p></td>
</tr><tr>
<td>

View File

@ -6,7 +6,7 @@
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Translation table for common microorganism codes — microorganisms.codes • AMR (for R)</title>
<title>Translation table with 5,582 common microorganism codes — microorganisms.codes • AMR (for R)</title>
<!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
@ -48,7 +48,7 @@
<link href="../extra.css" rel="stylesheet">
<script src="../extra.js"></script>
<meta property="og:title" content="Translation table for common microorganism codes — microorganisms.codes" />
<meta property="og:title" content="Translation table with 5,582 common microorganism codes — microorganisms.codes" />
<meta property="og:description" content="A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with set_mo_source(). They will all be searched when using as.mo() and consequently all the mo_* functions." />
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.svg" />
@ -82,7 +82,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>
@ -226,7 +226,7 @@
<div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>Translation table for common microorganism codes</h1>
<h1>Translation table with 5,582 common microorganism codes</h1>
<small class="dont-index">Source: <a href='https://gitlab.com/msberends/AMR/blob/master/R/data.R'><code>R/data.R</code></a></small>
<div class="hidden name"><code>microorganisms.codes.Rd</code></div>
</div>
@ -240,7 +240,7 @@
<h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>
<p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 5,585 observations and 2 variables:</p><ul>
<p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 5,582 observations and 2 variables:</p><ul>
<li><p><code>code</code><br /> Commonly used code of a microorganism</p></li>
<li><p><code>mo</code><br /> ID of the microorganism in the <a href='microorganisms.html'>microorganisms</a> data set</p></li>
</ul>
@ -276,7 +276,7 @@ This package contains the complete taxonomic tree of almost all microorganisms (
</div>
<div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.0.</p>
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.1.</p>
</div>
</footer>

View File

@ -6,7 +6,7 @@
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Data set with 69,447 microorganisms — microorganisms • AMR (for R)</title>
<title>Data set with 67,107 microorganisms — microorganisms • AMR (for R)</title>
<!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
@ -48,7 +48,7 @@
<link href="../extra.css" rel="stylesheet">
<script src="../extra.js"></script>
<meta property="og:title" content="Data set with 69,447 microorganisms — microorganisms" />
<meta property="og:title" content="Data set with 67,107 microorganisms — microorganisms" />
<meta property="og:description" content="A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using as.mo()." />
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.svg" />
@ -82,7 +82,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9004</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>
@ -226,7 +226,7 @@
<div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>Data set with 69,447 microorganisms</h1>
<h1>Data set with 67,107 microorganisms</h1>
<small class="dont-index">Source: <a href='https://gitlab.com/msberends/AMR/blob/master/R/data.R'><code>R/data.R</code></a></small>
<div class="hidden name"><code>microorganisms.Rd</code></div>
</div>
@ -240,9 +240,8 @@
<h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>
<p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 69,447 observations and 17 variables:</p><ul>
<p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 67,107 observations and 16 variables:</p><ul>
<li><p><code>mo</code><br /> ID of microorganism as used by this package</p></li>
<li><p><code>col_id</code><br /> Catalogue of Life ID</p></li>
<li><p><code>fullname</code><br /> Full name, like <code>"Escherichia coli"</code></p></li>
<li><p><code>kingdom</code>, <code>phylum</code>, <code>class</code>, <code>order</code>, <code>family</code>, <code>genus</code>, <code>species</code>, <code>subspecies</code><br /> Taxonomic rank of the microorganism</p></li>
<li><p><code>rank</code><br /> Text of the taxonomic rank of the microorganism, like <code>"species"</code> or <code>"genus"</code></p></li>
@ -256,6 +255,7 @@
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>
<p>Catalogue of Life: Annual Checklist (public online taxonomic database), <a href='http://www.catalogueoflife.org'>http://www.catalogueoflife.org</a> (check included annual version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p>
<p>Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786</p>
<p>Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, <a href='https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date'>https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date</a> (check included version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
@ -266,7 +266,7 @@
<li><p>1 entry of <em>Blastocystis</em> (<em>Blastocystis hominis</em>), although it officially does not exist (Noel <em>et al.</em> 2005, PMID 15634993)</p></li>
<li><p>5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus)</p></li>
<li><p>6 families under the Enterobacterales order, according to Adeolu <em>et al.</em> (2016, PMID 27620848), that are not (yet) in the Catalogue of Life</p></li>
<li><p>12,600 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications</p></li>
<li><p>7,368 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications</p></li>
</ul>
<h3>Direct download</h3>

View File

@ -82,7 +82,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span>
</div>
@ -240,10 +240,9 @@
<h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>
<p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 24,253 observations and 5 variables:</p><ul>
<li><p><code>col_id</code><br /> Catalogue of Life ID that was originally given</p></li>
<li><p><code>col_id_new</code><br /> New Catalogue of Life ID that responds to an entry in the <a href='microorganisms.html'>microorganisms</a> data set</p></li>
<p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 12,709 observations and 4 variables:</p><ul>
<li><p><code>fullname</code><br /> Old full taxonomic name of the microorganism</p></li>
<li><p><code>fullname_new</code><br /> New full taxonomic name of the microorganism</p></li>
<li><p><code>ref</code><br /> Author(s) and year of concerning scientific publication</p></li>
<li><p><code>prevalence</code><br /> Prevalence of the microorganism, see <code><a href='as.mo.html'>as.mo()</a></code></p></li>
</ul>
@ -251,6 +250,7 @@
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>
<p>Catalogue of Life: Annual Checklist (public online taxonomic database), <a href='http://www.catalogueoflife.org'>http://www.catalogueoflife.org</a> (check included annual version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p>
<p>Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786</p>
<h2 class="hasAnchor" id="catalogue-of-life"><a class="anchor" href="#catalogue-of-life"></a>Catalogue of Life</h2>
@ -282,7 +282,7 @@ This package contains the complete taxonomic tree of almost all microorganisms (
</div>
<div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.0.</p>
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.1.</p>
</div>
</footer>

View File

@ -36,7 +36,7 @@ The development of this package is part of, related to, or made possible by:
This package can be used for:
* Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the [Catalogue of Life](http://www.catalogueoflife.org) ([manual](./reference/mo_property.html))
* Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the [Catalogue of Life](http://www.catalogueoflife.org) and [List of Prokaryotic names with Standing in Nomenclature](https://lpsn.dsmz.de) ([manual](./reference/mo_property.html))
* Interpreting raw MIC and disk diffusion values, based on the latest CLSI or EUCAST guidelines ([manual](./reference/as.rsi.html))
* Determining first isolates to be used for AMR analysis ([manual](./reference/first_isolate.html))
* Calculating antimicrobial resistance ([tutorial](./articles/AMR.html))
@ -82,7 +82,7 @@ To find out how to conduct AMR analysis, please [continue reading here to get st
#### Microbial (taxonomic) reference data
This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, [www.catalogueoflife.org](http://www.catalogueoflife.org)), supplemented by data from the Deutsche Sammlung von Mikroorganismen und Zellkulturen (DSMZ, [www.dsmz.de](https://www.dsmz.de)). This supplementation is needed until the [CoL+ project](https://github.com/Sp2000/colplus) is finished, which we await. With `catalogue_of_life_version()` can be checked which version of the CoL is included in this package.
This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, [www.catalogueoflife.org](http://www.catalogueoflife.org)), supplemented by data from the List of Prokaryotic names with Standing in Nomenclature (LPSN, [lpsn.dsmz.de](https://lpsn.dsmz.de)). This supplementation is needed until the [CoL+ project](https://github.com/Sp2000/colplus) is finished, which we await. With `catalogue_of_life_version()` can be checked which version of the CoL is included in this package.
Read more about which data from the Catalogue of Life [in our manual](./reference/catalogue_of_life.html).

View File

@ -166,7 +166,6 @@ as.mo("Zthafilokkoockus oureuz") # handles incorrect spelling
as.mo("MRSA") # Methicillin Resistant S. aureus
as.mo("VISA") # Vancomycin Intermediate S. aureus
as.mo("VRSA") # Vancomycin Resistant S. aureus
as.mo(22242419) # Catalogue of Life ID
as.mo(115329001) # SNOMED CT code
# Dyslexia is no problem - these all work:

View File

@ -42,9 +42,9 @@ catalogue_of_life_version()
# Get a note when a species was renamed
mo_shortname("Chlamydia psittaci")
# Note: 'Chlamydia psittaci' (Page, 1968) was renamed
# 'Chlamydophila psittaci' (Everett et al., 1999)
mo_shortname("Chlamydophila psittaci")
# Note: 'Chlamydophila psittaci' (Everett et al., 1999) was renamed back to
# 'Chlamydia psittaci' (Page, 1968)
# [1] "C. psittaci"
# Get any property from the entire taxonomic tree for all included species
@ -62,9 +62,9 @@ mo_ref("E. coli")
# Do not get mistaken - this package is about microorganisms
mo_kingdom("C. elegans")
# [1] "Bacteria" # Bacteria?!
# [1] "Fungi" # Fungi?!
mo_name("C. elegans")
# [1] "Chroococcus limneticus elegans" # Because a microorganism was found
# [1] "Cladosporium elegans" # Because a microorganism was found
}
\seealso{
Data set \link{microorganisms} for the actual data. \cr

View File

@ -3,12 +3,11 @@
\docType{data}
\name{microorganisms}
\alias{microorganisms}
\title{Data set with 69,447 microorganisms}
\title{Data set with 67,107 microorganisms}
\format{
A \code{\link{data.frame}} with 69,447 observations and 17 variables:
A \code{\link{data.frame}} with 67,107 observations and 16 variables:
\itemize{
\item \code{mo}\cr ID of microorganism as used by this package
\item \code{col_id}\cr Catalogue of Life ID
\item \code{fullname}\cr Full name, like \code{"Escherichia coli"}
\item \code{kingdom}, \code{phylum}, \code{class}, \code{order}, \code{family}, \code{genus}, \code{species}, \code{subspecies}\cr Taxonomic rank of the microorganism
\item \code{rank}\cr Text of the taxonomic rank of the microorganism, like \code{"species"} or \code{"genus"}
@ -22,6 +21,8 @@ A \code{\link{data.frame}} with 69,447 observations and 17 variables:
\source{
Catalogue of Life: Annual Checklist (public online taxonomic database), \url{http://www.catalogueoflife.org} (check included annual version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}).
Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, \url{https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date} (check included version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}).
}
\usage{
@ -39,7 +40,7 @@ Manually added were:
\item 1 entry of \emph{Blastocystis} (\emph{Blastocystis hominis}), although it officially does not exist (Noel \emph{et al.} 2005, PMID 15634993)
\item 5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus)
\item 6 families under the Enterobacterales order, according to Adeolu \emph{et al.} (2016, PMID 27620848), that are not (yet) in the Catalogue of Life
\item 12,600 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications
\item 7,368 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications
}
\subsection{Direct download}{

View File

@ -3,9 +3,9 @@
\docType{data}
\name{microorganisms.codes}
\alias{microorganisms.codes}
\title{Translation table for common microorganism codes}
\title{Translation table with 5,582 common microorganism codes}
\format{
A \code{\link{data.frame}} with 5,585 observations and 2 variables:
A \code{\link{data.frame}} with 5,582 observations and 2 variables:
\itemize{
\item \code{code}\cr Commonly used code of a microorganism
\item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set

View File

@ -5,17 +5,18 @@
\alias{microorganisms.old}
\title{Data set with previously accepted taxonomic names}
\format{
A \code{\link{data.frame}} with 24,253 observations and 5 variables:
A \code{\link{data.frame}} with 12,709 observations and 4 variables:
\itemize{
\item \code{col_id}\cr Catalogue of Life ID that was originally given
\item \code{col_id_new}\cr New Catalogue of Life ID that responds to an entry in the \link{microorganisms} data set
\item \code{fullname}\cr Old full taxonomic name of the microorganism
\item \code{fullname_new}\cr New full taxonomic name of the microorganism
\item \code{ref}\cr Author(s) and year of concerning scientific publication
\item \code{prevalence}\cr Prevalence of the microorganism, see \code{\link[=as.mo]{as.mo()}}
}
}
\source{
Catalogue of Life: Annual Checklist (public online taxonomic database), \url{http://www.catalogueoflife.org} (check included annual version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}).
Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
}
\usage{
microorganisms.old

View File

@ -56,11 +56,11 @@ test_that("creation of data sets is valid", {
expect_lt(nrow(df[which(df$prevalence == 2), ]), nrow(df[which(df$prevalence == 3), ]))
expect_true(all(c("mo", "fullname",
"kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies",
"rank", "col_id", "species_id", "source", "ref", "prevalence",
"rank", "ref", "species_id", "source", "prevalence", "snomed",
"kingdom_index", "fullname_lower", "g_species") %in% colnames(df)))
olddf <- create_MO.old_lookup()
expect_true(all(c("col_id", "col_id_new", "fullname", "ref", "prevalence",
expect_true(all(c("fullname", "fullname_new", "ref", "prevalence",
"fullname_lower", "g_species") %in% colnames(olddf)))
old <- make_trans_tbl()

View File

@ -34,7 +34,6 @@ test_that("as.mo works", {
expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
expect_equal(as.character(as.mo(22242416)), "B_ESCHR_COLI")
expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI")
expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR")
expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR")
@ -45,7 +44,7 @@ test_that("as.mo works", {
expect_equal(as.character(as.mo("Klebsiella")), "B_KLBSL")
expect_equal(as.character(as.mo("K. pneu rhino")), "B_KLBSL_PNMN_RHNS") # K. pneumoniae subspp. rhinoscleromatis
expect_equal(as.character(as.mo("Bartonella")), "B_BRTNL")
expect_equal(as.character(as.mo("C. difficile")), "B_CTRDM_DFFC")
expect_equal(as.character(as.mo("C. difficile")), "B_CRDDS_DFFC")
expect_equal(as.character(as.mo("L. pneumophila")), "B_LGNLL_PNMP")
expect_equal(as.character(as.mo("Strepto")), "B_STRPT")
expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus
@ -99,11 +98,11 @@ test_that("as.mo works", {
# unprevalent MO
expect_identical(
as.character(
as.mo(c("burnod",
"B. nodosa",
"B nodosa",
"Burkholderia nodosa"))),
rep("B_BRKHL_NODS", 4))
as.mo(c("parnod",
"P. nodosa",
"P nodosa",
"Paraburkholderia nodosa"))),
rep("B_PRBRK_NODS", 4))
# empty values
expect_identical(as.character(as.mo(c("", NA, NaN))), rep(NA_character_, 3))
@ -239,7 +238,7 @@ test_that("as.mo works", {
# Salmonella (City) are all actually Salmonella enterica spp (City)
expect_equal(suppressWarnings(mo_name(c("Salmonella Goettingen", "Salmonella Typhimurium", "Salmonella Group A"))),
c("Salmonella enterica", "Salmonella typhimurium", "Salmonella"))
c("Salmonella enterica", "Salmonella enterica", "Salmonella"))
# no virusses
expect_equal(as.character(as.mo("Virus")), NA_character_)

View File

@ -93,8 +93,7 @@ test_that("mo_property works", {
expect_identical(suppressWarnings(mo_ref("Chlamydia psittaci")), "Page, 1968")
expect_identical(mo_ref("Chlamydophila psittaci"), "Everett et al., 1999")
expect_equal(mo_snomed("Escherichia coli"),
c(112283007, 116395006, 116396007, 103429008, 83285000, 116394005, 407166006, 457914007))
expect_equal(mo_snomed("Escherichia coli"), 112283007)
# old codes must throw a warning in mo_* family
expect_warning(mo_name(c("B_ESCHR_COL", "B_STPHY_AUR")))