1
0
mirror of https://github.com/msberends/AMR.git synced 2024-12-24 19:26:12 +01:00

(v1.1.0.9020) updated taxonomy

This commit is contained in:
dr. M.S. (Matthijs) Berends 2020-05-27 16:37:49 +02:00
parent ae1969b941
commit 86d44054f0
55 changed files with 68063 additions and 70233 deletions

View File

@ -1,6 +1,6 @@
Package: AMR Package: AMR
Version: 1.1.0.9019 Version: 1.1.0.9020
Date: 2020-05-25 Date: 2020-05-27
Title: Antimicrobial Resistance Analysis Title: Antimicrobial Resistance Analysis
Authors@R: c( Authors@R: c(
person(role = c("aut", "cre"), person(role = c("aut", "cre"),

16
NEWS.md
View File

@ -1,5 +1,5 @@
# AMR 1.1.0.9019 # AMR 1.1.0.9020
## <small>Last updated: 25-May-2020</small> ## <small>Last updated: 27-May-2020</small>
### Breaking ### Breaking
* Removed code dependency on all other R packages, making this package fully independent of the development process of others. This is a major code change, but will probably not be noticeable by most users. * Removed code dependency on all other R packages, making this package fully independent of the development process of others. This is a major code change, but will probably not be noticeable by most users.
@ -13,8 +13,17 @@
* For developers: classes `mo` and `ab` now both also inherit class `character`, to support any data transformation. This change invalidates code that checks for class length == 1. * For developers: classes `mo` and `ab` now both also inherit class `character`, to support any data transformation. This change invalidates code that checks for class length == 1.
### Changed ### Changed
* Taxonomy:
* Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020)
* Removed the Catalogue of Life IDs (like 776351), since they now work with a species ID (hexadecimal string)
* EUCAST rules: * EUCAST rules:
* The `eucast_rules()` function no longer applies "other" rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzym inhibitor = R). The default input value for `rules` is now `c("breakpoints", "expert")` instead of `"all"`, but this can be changed by the user. To return to the old behaviour, set `options(AMR.eucast_rules = "all")`. * The `eucast_rules()` function no longer applies "other" rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzyme inhibitor = R). The default input value for `rules` is now `c("breakpoints", "expert")` instead of `"all"`, but this can be changed by the user. To return to the old behaviour, set `options(AMR.eucast_rules = "all")`.
* Fixed a bug where checking antimicrobial results in the original data were not regarded as valid R/SI values
* All "other" rules now apply for all drug combinations in the `antibiotics` data set these two rules:
1. A drug **with** enzyme inhibitor will be set to S if the drug **without** enzyme inhibitor is S
2. A drug **without** enzyme inhibitor will be set to R if the drug **with** enzyme inhibitor is R
This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/avibactam, trimethoprim/sulfamethoxazole, etc.
* Added official drug names to verbose output of `eucast_rules()` * Added official drug names to verbose output of `eucast_rules()`
* Added function `ab_url()` to return the direct URL of an antimicrobial agent from the official WHO website * Added function `ab_url()` to return the direct URL of an antimicrobial agent from the official WHO website
* Improvements for algorithm in `as.ab()`, so that e.g. `as.ab("ampi sul")` and `ab_name("ampi sul")` work * Improvements for algorithm in `as.ab()`, so that e.g. `as.ab("ampi sul")` and `ab_name("ampi sul")` work
@ -22,6 +31,7 @@
* Small fix for some text input that could not be coerced as valid MIC values * Small fix for some text input that could not be coerced as valid MIC values
* Fix for interpretation of generic CLSI interpretation rules (thanks to Anthony Underwood) * Fix for interpretation of generic CLSI interpretation rules (thanks to Anthony Underwood)
* Fix for `set_mo_source()` to make sure that column `mo` will always be the second column * Fix for `set_mo_source()` to make sure that column `mo` will always be the second column
* Added abbreviation "cfsc" for Cefoxitin and "cfav" for Ceftazidime/avibactam
### Other ### Other
* Removed previously deprecated function `p.symbol()` - it was replaced with `p_symbol()` * Removed previously deprecated function `p.symbol()` - it was replaced with `p_symbol()`

View File

@ -78,8 +78,7 @@ check_dataset_integrity <- function() {
check_microorganisms <- all(c("mo", "fullname", "kingdom", "phylum", check_microorganisms <- all(c("mo", "fullname", "kingdom", "phylum",
"class", "order", "family", "genus", "class", "order", "family", "genus",
"species", "subspecies", "rank", "species", "subspecies", "rank",
"col_id", "species_id", "source", "species_id", "source", "ref", "prevalence") %in% colnames(microorganisms),
"ref", "prevalence", "snomed") %in% colnames(microorganisms),
na.rm = TRUE) & NROW(microorganisms) == NROW(MO_lookup) na.rm = TRUE) & NROW(microorganisms) == NROW(MO_lookup)
check_antibiotics <- all(c("ab", "atc", "cid", "name", "group", check_antibiotics <- all(c("ab", "atc", "cid", "name", "group",
"atc_group1", "atc_group2", "abbreviations", "atc_group1", "atc_group2", "abbreviations",

2
R/ab.R
View File

@ -347,7 +347,7 @@ is.ab <- function(x) {
#' @export #' @export
#' @noRd #' @noRd
print.ab <- function(x, ...) { print.ab <- function(x, ...) {
cat("Class 'ab'\n") cat("Class <ab>\n")
print(as.character(x), quote = FALSE) print(as.character(x), quote = FALSE)
} }

View File

@ -50,9 +50,9 @@
#' #'
#' #'
#' # Get a note when a species was renamed #' # Get a note when a species was renamed
#' mo_shortname("Chlamydia psittaci") #' mo_shortname("Chlamydophila psittaci")
#' # Note: 'Chlamydia psittaci' (Page, 1968) was renamed #' # Note: 'Chlamydophila psittaci' (Everett et al., 1999) was renamed back to
#' # 'Chlamydophila psittaci' (Everett et al., 1999) #' # 'Chlamydia psittaci' (Page, 1968)
#' # [1] "C. psittaci" #' # [1] "C. psittaci"
#' #'
#' # Get any property from the entire taxonomic tree for all included species #' # Get any property from the entire taxonomic tree for all included species
@ -70,9 +70,9 @@
#' #'
#' # Do not get mistaken - this package is about microorganisms #' # Do not get mistaken - this package is about microorganisms
#' mo_kingdom("C. elegans") #' mo_kingdom("C. elegans")
#' # [1] "Bacteria" # Bacteria?! #' # [1] "Fungi" # Fungi?!
#' mo_name("C. elegans") #' mo_name("C. elegans")
#' # [1] "Chroococcus limneticus elegans" # Because a microorganism was found #' # [1] "Cladosporium elegans" # Because a microorganism was found
NULL NULL
#' Version info of included Catalogue of Life #' Version info of included Catalogue of Life

View File

@ -82,7 +82,6 @@
#' @inheritSection catalogue_of_life Catalogue of Life #' @inheritSection catalogue_of_life Catalogue of Life
#' @format A [`data.frame`] with `r format(nrow(microorganisms), big.mark = ",")` observations and `r ncol(microorganisms)` variables: #' @format A [`data.frame`] with `r format(nrow(microorganisms), big.mark = ",")` observations and `r ncol(microorganisms)` variables:
#' - `mo`\cr ID of microorganism as used by this package #' - `mo`\cr ID of microorganism as used by this package
#' - `col_id`\cr Catalogue of Life ID
#' - `fullname`\cr Full name, like `"Escherichia coli"` #' - `fullname`\cr Full name, like `"Escherichia coli"`
#' - `kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species`, `subspecies`\cr Taxonomic rank of the microorganism #' - `kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species`, `subspecies`\cr Taxonomic rank of the microorganism
#' - `rank`\cr Text of the taxonomic rank of the microorganism, like `"species"` or `"genus"` #' - `rank`\cr Text of the taxonomic rank of the microorganism, like `"species"` or `"genus"`
@ -113,6 +112,8 @@
#' #'
#' From: <https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date/complete-list-readme> #' From: <https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date/complete-list-readme>
#' @source Catalogue of Life: Annual Checklist (public online taxonomic database), <http://www.catalogueoflife.org> (check included annual version with [catalogue_of_life_version()]). #' @source Catalogue of Life: Annual Checklist (public online taxonomic database), <http://www.catalogueoflife.org> (check included annual version with [catalogue_of_life_version()]).
#'
#' Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
#' #'
#' Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, <https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date> (check included version with [catalogue_of_life_version()]). #' Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, <https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date> (check included version with [catalogue_of_life_version()]).
#' @inheritSection AMR Read more on our website! #' @inheritSection AMR Read more on our website!
@ -120,11 +121,11 @@
"microorganisms" "microorganisms"
catalogue_of_life <- list( catalogue_of_life <- list(
year = 2018, year = 2019,
version = "Catalogue of Life: {year} Annual Checklist", version = "Catalogue of Life: {year} Annual Checklist",
url_CoL = "http://www.catalogueoflife.org/annual-checklist/{year}/", url_CoL = "http://www.catalogueoflife.org/col/",
url_DSMZ = "https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date/prokaryotic-nomenclature-up-to-date/genus-search", url_DSMZ = "https://lpsn.dsmz.de",
yearmonth_DSMZ = "August 2019" yearmonth_DSMZ = "May 2020"
) )
#' Data set with previously accepted taxonomic names #' Data set with previously accepted taxonomic names
@ -132,17 +133,18 @@ catalogue_of_life <- list(
#' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by [as.mo()]. #' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by [as.mo()].
#' @inheritSection catalogue_of_life Catalogue of Life #' @inheritSection catalogue_of_life Catalogue of Life
#' @format A [`data.frame`] with `r format(nrow(microorganisms.old), big.mark = ",")` observations and `r ncol(microorganisms.old)` variables: #' @format A [`data.frame`] with `r format(nrow(microorganisms.old), big.mark = ",")` observations and `r ncol(microorganisms.old)` variables:
#' - `col_id`\cr Catalogue of Life ID that was originally given
#' - `col_id_new`\cr New Catalogue of Life ID that responds to an entry in the [microorganisms] data set
#' - `fullname`\cr Old full taxonomic name of the microorganism #' - `fullname`\cr Old full taxonomic name of the microorganism
#' - `fullname_new`\cr New full taxonomic name of the microorganism
#' - `ref`\cr Author(s) and year of concerning scientific publication #' - `ref`\cr Author(s) and year of concerning scientific publication
#' - `prevalence`\cr Prevalence of the microorganism, see [as.mo()] #' - `prevalence`\cr Prevalence of the microorganism, see [as.mo()]
#' @source Catalogue of Life: Annual Checklist (public online taxonomic database), <http://www.catalogueoflife.org> (check included annual version with [catalogue_of_life_version()]). #' @source Catalogue of Life: Annual Checklist (public online taxonomic database), <http://www.catalogueoflife.org> (check included annual version with [catalogue_of_life_version()]).
#'
#' Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
#' @inheritSection AMR Read more on our website! #' @inheritSection AMR Read more on our website!
#' @seealso [as.mo()] [mo_property()] [microorganisms] #' @seealso [as.mo()] [mo_property()] [microorganisms]
"microorganisms.old" "microorganisms.old"
#' Translation table for common microorganism codes #' Translation table with `r format(nrow(microorganisms.codes), big.mark = ",")` common microorganism codes
#' #'
#' A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with [set_mo_source()]. They will all be searched when using [as.mo()] and consequently all the [`mo_*`][mo_property()] functions. #' A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with [set_mo_source()]. They will all be searched when using [as.mo()] and consequently all the [`mo_*`][mo_property()] functions.
#' @format A [`data.frame`] with `r format(nrow(microorganisms.codes), big.mark = ",")` observations and `r ncol(microorganisms.codes)` variables: #' @format A [`data.frame`] with `r format(nrow(microorganisms.codes), big.mark = ",")` observations and `r ncol(microorganisms.codes)` variables:

View File

@ -99,7 +99,7 @@ is.disk <- function(x) {
#' @export #' @export
#' @noRd #' @noRd
print.disk <- function(x, ...) { print.disk <- function(x, ...) {
cat("Class 'disk'\n") cat("Class <disk>\n")
print(as.integer(x), quote = FALSE) print(as.integer(x), quote = FALSE)
} }

View File

@ -245,6 +245,7 @@ eucast_rules <- function(x,
} }
warned <- FALSE warned <- FALSE
warn_lacking_rsi_class <- FALSE
txt_error <- function() { txt_error <- function() {
if (info == TRUE) cat("", font_red_bg(font_white(" ERROR ")), "\n\n") if (info == TRUE) cat("", font_red_bg(font_white(" ERROR ")), "\n\n")
@ -410,6 +411,7 @@ eucast_rules <- function(x,
RID <- cols_ab["RID"] RID <- cols_ab["RID"]
RIF <- cols_ab["RIF"] RIF <- cols_ab["RIF"]
RXT <- cols_ab["RXT"] RXT <- cols_ab["RXT"]
SAM <- cols_ab["SAM"]
SIS <- cols_ab["SIS"] SIS <- cols_ab["SIS"]
SXT <- cols_ab["SXT"] SXT <- cols_ab["SXT"]
TCY <- cols_ab["TCY"] TCY <- cols_ab["TCY"]
@ -440,7 +442,9 @@ eucast_rules <- function(x,
cols <- unique(cols[!is.na(cols) & !is.null(cols)]) cols <- unique(cols[!is.na(cols) & !is.null(cols)])
if (length(rows) > 0 & length(cols) > 0) { if (length(rows) > 0 & length(cols) > 0) {
before_df <- x_original before_df <- x_original
if (any(!sapply(x[, cols, drop = FALSE], is.rsi), na.rm = TRUE)) {
warn_lacking_rsi_class <<- TRUE
}
tryCatch( tryCatch(
# insert into original table # insert into original table
x_original[rows, cols] <<- to, x_original[rows, cols] <<- to,
@ -599,14 +603,79 @@ eucast_rules <- function(x,
} }
} }
if (info == TRUE & !any(c("other", "all") %in% rules, na.rm = TRUE)) { as.rsi_no_warning <- function(x) suppressWarnings(as.rsi(x))
cat(font_red("\nSkipping inheritance rules defined by this package, such as setting trimethoprim (TMP) = R where trimethoprim/sulfamethoxazole (SXT) = R.\nUse eucast_rules(..., rules = \"all\") to also apply those rules.\n"))
}
eucast_notification_shown <- FALSE
eucast_rules_df <- eucast_rules_file # internal data file
no_added <- 0 no_added <- 0
no_changed <- 0 no_changed <- 0
# Other rules: enzyme inhibitors ------------------------------------------
if (any(c("all", "other") %in% rules)) {
if (info == TRUE) {
cat(font_bold(paste0("\nRules by this AMR package (",
font_red(paste0("v", utils::packageVersion("AMR"), ", ",
format(utils::packageDate("AMR"), "%Y"))), ")\n")))
}
ab_enzyme <- subset(antibiotics, name %like% "/")[, c("ab", "name")]
ab_enzyme$base_name <- gsub("^([a-zA-Z0-9]+).*", "\\1", ab_enzyme$name)
ab_enzyme$base_ab <- as.ab(ab_enzyme$base_name)
for (i in seq_len(nrow(ab_enzyme))) {
if (all(c(ab_enzyme[i, ]$ab, ab_enzyme[i, ]$base_ab) %in% names(cols_ab), na.rm = TRUE)) {
ab_name_base <- ab_name(cols_ab[ab_enzyme[i, ]$base_ab], language = NULL, tolower = TRUE)
ab_name_enzyme <- ab_name(cols_ab[ab_enzyme[i, ]$ab], language = NULL, tolower = TRUE)
# Set base to R where base + enzyme inhibitor is R
rule_current <- paste0("Set ", ab_name_base, " (", cols_ab[ab_enzyme[i, ]$base_ab], ") = R where ",
ab_name_enzyme, " (", cols_ab[ab_enzyme[i, ]$ab], ") = R")
if (info == TRUE) {
cat(rule_current)
}
run_changes <- edit_rsi(to = "R",
rule = c(rule_current, "Other rules", ""),
rows = which(as.rsi_no_warning(x[, cols_ab[ab_enzyme[i, ]$ab]]) == "R"),
cols = cols_ab[ab_enzyme[i, ]$base_ab])
no_added <- no_added + run_changes$added
no_changed <- no_changed + run_changes$changed
# Print number of new changes
if (info == TRUE) {
# print only on last one of rules in this group
txt_ok(no_added = no_added, no_changed = no_changed)
# and reset counters
no_added <- 0
no_changed <- 0
}
# Set base + enzyme inhibitor to S where base is S
rule_current <- paste0("Set ", ab_name_enzyme, " (", cols_ab[ab_enzyme[i, ]$ab], ") = S where ",
ab_name_base, " (", cols_ab[ab_enzyme[i, ]$base_ab], ") = S")
if (info == TRUE) {
cat(rule_current)
}
run_changes <- edit_rsi(to = "S",
rule = c(rule_current, "Other rules", ""),
rows = which(as.rsi_no_warning(x[, cols_ab[ab_enzyme[i, ]$base_ab]]) == "S"),
cols = cols_ab[ab_enzyme[i, ]$ab])
no_added <- no_added + run_changes$added
no_changed <- no_changed + run_changes$changed
# Print number of new changes
if (info == TRUE) {
# print only on last one of rules in this group
txt_ok(no_added = no_added, no_changed = no_changed)
# and reset counters
no_added <- 0
no_changed <- 0
}
}
}
} else {
if (info == TRUE) {
cat(font_red("\nSkipping inheritance rules defined by this package, such as setting trimethoprim (TMP) = R where trimethoprim/sulfamethoxazole (SXT) = R.\nUse eucast_rules(..., rules = \"all\") to also apply those rules.\n"))
}
}
# Official EUCAST rules ---------------------------------------------------
eucast_notification_shown <- FALSE
eucast_rules_df <- eucast_rules_file # internal data file
for (i in seq_len(nrow(eucast_rules_df))) { for (i in seq_len(nrow(eucast_rules_df))) {
rule_previous <- eucast_rules_df[max(1, i - 1), "reference.rule"] rule_previous <- eucast_rules_df[max(1, i - 1), "reference.rule"]
@ -637,18 +706,14 @@ eucast_rules <- function(x,
if (rule_group_current %like% "expert" & !any(c("all", "expert") %in% rules)) { if (rule_group_current %like% "expert" & !any(c("all", "expert") %in% rules)) {
next next
} }
if (rule_group_current %like% "other" & !any(c("all", "other") %in% rules)) {
next
}
if (info == TRUE & !rule_group_current %like% "other" & eucast_notification_shown == FALSE) { if (info == TRUE & !rule_group_current %like% "other" & eucast_notification_shown == FALSE) {
cat(paste0( cat(paste0("\n", font_grey(strrep("-", options()$width - 1)),
"\n----\nRules by the ", font_bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)"), "\nRules by the ", font_bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)"),
"\n", font_blue("http://eucast.org/"), "\n")) "\n", font_blue("http://eucast.org/"), "\n"))
eucast_notification_shown <- TRUE eucast_notification_shown <- TRUE
} }
if (info == TRUE) { if (info == TRUE) {
# Print rule (group) ------------------------------------------------------ # Print rule (group) ------------------------------------------------------
if (rule_group_current != rule_group_previous) { if (rule_group_current != rule_group_previous) {
@ -662,7 +727,7 @@ eucast_rules <- function(x,
rule_group_current %like% "expert", rule_group_current %like% "expert",
paste0("\nEUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (", paste0("\nEUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (",
font_red(paste0("v", EUCAST_VERSION_EXPERT_RULES)), ")\n"), font_red(paste0("v", EUCAST_VERSION_EXPERT_RULES)), ")\n"),
"\nOther rules by this AMR package\n")))) ""))))
} }
# Print rule ------------------------------------------------------------- # Print rule -------------------------------------------------------------
if (rule_current != rule_previous) { if (rule_current != rule_previous) {
@ -733,18 +798,18 @@ eucast_rules <- function(x,
rows <- integer(0) rows <- integer(0)
} else if (length(source_antibiotics) == 1) { } else if (length(source_antibiotics) == 1) {
rows <- tryCatch(which(x[, col_mo_property] %like% mo_value rows <- tryCatch(which(x[, col_mo_property] %like% mo_value
& x[, source_antibiotics[1L]] == source_value[1L]), & as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]),
error = function(e) integer(0)) error = function(e) integer(0))
} else if (length(source_antibiotics) == 2) { } else if (length(source_antibiotics) == 2) {
rows <- tryCatch(which(x[, col_mo_property] %like% mo_value rows <- tryCatch(which(x[, col_mo_property] %like% mo_value
& x[, source_antibiotics[1L]] == source_value[1L] & as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]
& x[, source_antibiotics[2L]] == source_value[2L]), & as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]),
error = function(e) integer(0)) error = function(e) integer(0))
} else if (length(source_antibiotics) == 3) { } else if (length(source_antibiotics) == 3) {
rows <- tryCatch(which(x[, col_mo_property] %like% mo_value rows <- tryCatch(which(x[, col_mo_property] %like% mo_value
& x[, source_antibiotics[1L]] == source_value[1L] & as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]
& x[, source_antibiotics[2L]] == source_value[2L] & as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]
& x[, source_antibiotics[3L]] == source_value[3L]), & as.rsi_no_warning(x[, source_antibiotics[3L]]) == source_value[3L]),
error = function(e) integer(0)) error = function(e) integer(0))
} else { } else {
stop("only 3 antibiotics supported for source_antibiotics ", call. = FALSE) stop("only 3 antibiotics supported for source_antibiotics ", call. = FALSE)
@ -784,7 +849,7 @@ eucast_rules <- function(x,
arrange(row, rule_group, rule_name, col) arrange(row, rule_group, rule_name, col)
cat(paste0("\n", font_grey(strrep("-", options()$width - 1)), "\n")) cat(paste0("\n", font_grey(strrep("-", options()$width - 1)), "\n"))
cat(font_bold(paste("EUCAST rules", paste0(wouldve, "affected"), cat(font_bold(paste("The rules", paste0(wouldve, "affected"),
formatnr(n_distinct(verbose_info$row)), formatnr(n_distinct(verbose_info$row)),
"out of", formatnr(nrow(x_original)), "out of", formatnr(nrow(x_original)),
"rows, making a total of", formatnr(nrow(verbose_info)), "edits\n"))) "rows, making a total of", formatnr(nrow(verbose_info)), "edits\n")))
@ -846,6 +911,12 @@ eucast_rules <- function(x,
} }
} }
if (isTRUE(warn_lacking_rsi_class)) {
warning("Not all columns with antimicrobial results are of class <rsi>.\n",
"Transform eligible columns to class <rsi> on beforehand: your_data %>% mutate_if(is.rsi.eligible, as.rsi)",
call. = FALSE)
}
# Return data set --------------------------------------------------------- # Return data set ---------------------------------------------------------
if (verbose == TRUE) { if (verbose == TRUE) {
rownames(verbose_info) <- NULL rownames(verbose_info) <- NULL

View File

@ -154,7 +154,7 @@ joins_check_df <- function(x, by) {
by <- "mo" by <- "mo"
x[, "mo"] <- as.mo(x[, "mo"]) x[, "mo"] <- as.mo(x[, "mo"])
} else { } else {
stop("Cannot join - no column found with name or class `mo`.", call. = FALSE) stop("Cannot join - no column found with name or class <mo>.", call. = FALSE)
} }
} }
message('Joining, by = "', by, '"') # message same as dplyr::join functions message('Joining, by = "', by, '"') # message same as dplyr::join functions

View File

@ -174,7 +174,7 @@ droplevels.mic <- function(x, exclude = ifelse(anyNA(levels(x)), NULL, NA), ...)
#' @export #' @export
#' @noRd #' @noRd
print.mic <- function(x, ...) { print.mic <- function(x, ...) {
cat("Class 'mic'\n") cat("Class <mic>\n")
print(as.character(x), quote = FALSE) print(as.character(x), quote = FALSE)
} }

96
R/mo.R
View File

@ -126,7 +126,6 @@
#' as.mo("MRSA") # Methicillin Resistant S. aureus #' as.mo("MRSA") # Methicillin Resistant S. aureus
#' as.mo("VISA") # Vancomycin Intermediate S. aureus #' as.mo("VISA") # Vancomycin Intermediate S. aureus
#' as.mo("VRSA") # Vancomycin Resistant S. aureus #' as.mo("VRSA") # Vancomycin Resistant S. aureus
#' as.mo(22242419) # Catalogue of Life ID
#' as.mo(115329001) # SNOMED CT code #' as.mo(115329001) # SNOMED CT code
#' #'
#' # Dyslexia is no problem - these all work: #' # Dyslexia is no problem - these all work:
@ -556,20 +555,44 @@ exec_as.mo <- function(x,
if (initial_search == TRUE) { if (initial_search == TRUE) {
progress$tick() progress$tick()
} }
if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)" | tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) { # valid MO code ----
# empty and nonsense values, ignore without warning
x[i] <- lookup(mo == "UNKNOWN")
next
}
# valid MO code ---
found <- lookup(mo == toupper(x_backup[i])) found <- lookup(mo == toupper(x_backup[i]))
if (!is.na(found)) { if (!is.na(found)) {
x[i] <- found[1L] x[i] <- found[1L]
next next
} }
# valid fullname ----
found <- lookup(fullname_lower %in% gsub("[^a-zA-Z0-9_. -]", "", tolower(c(x_backup[i], x_backup_without_spp[i]))))
# added the gsub() for "(unknown fungus)", since fullname_lower does not contain brackets
if (!is.na(found)) {
x[i] <- found[1L]
next
}
# old fullname ----
found <- lookup(fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])),
column = NULL, # all columns
haystack = MO.old_lookup)
if (!all(is.na(found))) {
# when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so:
# mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning)
# mo_ref() of "Chlamydophila psittaci" will be "Everett et al., 1999"
if (property == "ref") {
x[i] <- found["ref"]
} else {
x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup)
}
options(mo_renamed_last_run = found["fullname"])
was_renamed(name_old = found["fullname"],
name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup),
ref_old = found["ref"],
ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup),
mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup))
next
}
# old mo code, used in previous versions of this package ---- # old mo code, used in previous versions of this package ----
if (x_backup[i] %in% microorganisms.translation$mo_old) { if (x_backup[i] %in% microorganisms.translation$mo_old) {
old_mo_warning <- TRUE old_mo_warning <- TRUE
@ -582,10 +605,9 @@ exec_as.mo <- function(x,
} }
} }
found <- lookup(fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i]))) if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)" | tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) {
# most probable: is exact match in fullname # empty and nonsense values, ignore without warning
if (!is.na(found)) { x[i] <- lookup(mo == "UNKNOWN")
x[i] <- found[1L]
next next
} }
@ -614,13 +636,6 @@ exec_as.mo <- function(x,
next next
} }
# valid Catalogue of Life ID ---
found <- lookup(col_id == x_backup[i])
if (!is.na(found)) {
x[i] <- found[1L]
next
}
# WHONET and other common LIS codes ---- # WHONET and other common LIS codes ----
found <- lookup(code %in% toupper(c(x_backup_untouched[i], x_backup[i], x_backup_without_spp[i])), found <- lookup(code %in% toupper(c(x_backup_untouched[i], x_backup[i], x_backup_without_spp[i])),
column = "mo", column = "mo",
@ -943,21 +958,20 @@ exec_as.mo <- function(x,
column = NULL, # all columns column = NULL, # all columns
haystack = data.old_to_check) haystack = data.old_to_check)
if (!all(is.na(found))) { if (!all(is.na(found))) {
col_id_new <- found["col_id_new"]
# when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so:
# mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning) # mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning)
# mo_ref() of "Chlamydophila psittaci" will be "Everett et al., 1999" # mo_ref() of "Chlamydophila psittaci" will be "Everett et al., 1999"
if (property == "ref") { if (property == "ref") {
x[i] <- found["ref"] x[i] <- found["ref"]
} else { } else {
x[i] <- lookup(col_id == found["col_id_new"], haystack = MO_lookup) x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup)
} }
options(mo_renamed_last_run = found["fullname"]) options(mo_renamed_last_run = found["fullname"])
was_renamed(name_old = found["fullname"], was_renamed(name_old = found["fullname"],
name_new = lookup(col_id == found["col_id_new"], "fullname", haystack = MO_lookup), name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup),
ref_old = found["ref"], ref_old = found["ref"],
ref_new = lookup(col_id == found["col_id_new"], "ref", haystack = MO_lookup), ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup),
mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup)) mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup))
return(x[i]) return(x[i])
} }
@ -997,18 +1011,18 @@ exec_as.mo <- function(x,
# mo_ref("Chlamydophila psittaci) = "Everett et al., 1999" # mo_ref("Chlamydophila psittaci) = "Everett et al., 1999"
x <- found["ref"] x <- found["ref"]
} else { } else {
x <- lookup(col_id == found["col_id_new"], haystack = MO_lookup) x <- lookup(fullname == found["fullname_new"], haystack = MO_lookup)
} }
was_renamed(name_old = found["fullname"], was_renamed(name_old = found["fullname"],
name_new = lookup(col_id == found["col_id_new"], "fullname", haystack = MO_lookup), name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup),
ref_old = found["ref"], ref_old = found["ref"],
ref_new = lookup(col_id == found["col_id_new"], "ref", haystack = MO_lookup), ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup),
mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup)) mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup))
options(mo_renamed_last_run = found["fullname"]) options(mo_renamed_last_run = found["fullname"])
uncertainties <<- rbind(uncertainties, uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
input = a.x_backup, input = a.x_backup,
result_mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup))) result_mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)))
return(x) return(x)
} }
@ -1366,6 +1380,10 @@ exec_as.mo <- function(x,
failures <- c(failures, x_backup[i]) failures <- c(failures, x_backup[i])
} }
} }
if (initial_search == TRUE) {
close(progress)
}
} }
# handling failures ---- # handling failures ----
@ -1494,7 +1512,7 @@ exec_as.mo <- function(x,
if (property == "mo") { if (property == "mo") {
x <- to_class_mo(x) x <- to_class_mo(x)
} }
if (length(mo_renamed()) > 0) { if (length(mo_renamed()) > 0) {
print(mo_renamed()) print(mo_renamed())
} }
@ -1552,7 +1570,7 @@ format_uncertainty_as_df <- function(uncertainty_level,
#' @export #' @export
#' @noRd #' @noRd
print.mo <- function(x, ...) { print.mo <- function(x, ...) {
cat("Class 'mo'\n") cat("Class <mo>\n")
x_names <- names(x) x_names <- names(x)
x <- as.character(x) x <- as.character(x)
names(x) <- x_names names(x) <- x_names
@ -1711,6 +1729,9 @@ print.mo_renamed <- function(x, ...) {
font_italic(x$old_name[i]), ifelse(x$old_ref[i] %in% c("", NA), "", font_italic(x$old_name[i]), ifelse(x$old_ref[i] %in% c("", NA), "",
paste0(" (", gsub("et al.", font_italic("et al."), x$old_ref[i]), ")")), paste0(" (", gsub("et al.", font_italic("et al."), x$old_ref[i]), ")")),
" was renamed ", " was renamed ",
ifelse(as.integer(gsub("[^0-9]", "", x$new_ref[i])) < as.integer(gsub("[^0-9]", "", x$old_ref[i])),
font_bold("back to "),
""),
font_italic(x$new_name[i]), ifelse(x$new_ref[i] %in% c("", NA), "", font_italic(x$new_name[i]), ifelse(x$new_ref[i] %in% c("", NA), "",
paste0(" (", gsub("et al.", font_italic("et al."), x$new_ref[i]), ")")), paste0(" (", gsub("et al.", font_italic("et al."), x$new_ref[i]), ")")),
" [", x$mo[i], "]"))) " [", x$mo[i], "]")))
@ -1747,9 +1768,14 @@ translate_allow_uncertain <- function(allow_uncertain) {
} }
get_mo_failures_uncertainties_renamed <- function() { get_mo_failures_uncertainties_renamed <- function() {
list(failures = getOption("mo_failures"), remember <- list(failures = getOption("mo_failures"),
uncertainties = getOption("mo_uncertainties"), uncertainties = getOption("mo_uncertainties"),
renamed = getOption("mo_renamed")) renamed = getOption("mo_renamed"))
# empty them, otherwise mo_shortname("Chlamydophila psittaci") will give 3 notes
options("mo_failures" = NULL)
options("mo_uncertainties" = NULL)
options("mo_renamed" = NULL)
remember
} }
load_mo_failures_uncertainties_renamed <- function(metadata) { load_mo_failures_uncertainties_renamed <- function(metadata) {

View File

@ -149,6 +149,7 @@ mo_fullname <- mo_name
#' @export #' @export
mo_shortname <- function(x, language = get_locale(), ...) { mo_shortname <- function(x, language = get_locale(), ...) {
x.mo <- as.mo(x, ...) x.mo <- as.mo(x, ...)
metadata <- get_mo_failures_uncertainties_renamed() metadata <- get_mo_failures_uncertainties_renamed()
replace_empty <- function(x) { replace_empty <- function(x) {
@ -158,7 +159,7 @@ mo_shortname <- function(x, language = get_locale(), ...) {
# get first char of genus and complete species in English # get first char of genus and complete species in English
shortnames <- paste0(substr(mo_genus(x.mo, language = NULL), 1, 1), ". ", replace_empty(mo_species(x.mo, language = NULL))) shortnames <- paste0(substr(mo_genus(x.mo, language = NULL), 1, 1), ". ", replace_empty(mo_species(x.mo, language = NULL)))
# exceptions for Staphylococci # exceptions for Staphylococci
shortnames[shortnames == "S. coagulase-negative"] <- "CoNS" shortnames[shortnames == "S. coagulase-negative"] <- "CoNS"
shortnames[shortnames == "S. coagulase-positive"] <- "CoPS" shortnames[shortnames == "S. coagulase-positive"] <- "CoPS"
@ -315,9 +316,9 @@ mo_synonyms <- function(x, ...) {
x <- as.mo(x, ...) x <- as.mo(x, ...)
metadata <- get_mo_failures_uncertainties_renamed() metadata <- get_mo_failures_uncertainties_renamed()
IDs <- mo_property(x = x, property = "col_id", language = NULL) IDs <- mo_name(x = x, language = NULL)
syns <- lapply(IDs, function(col_id) { syns <- lapply(IDs, function(newname) {
res <- sort(microorganisms.old[which(microorganisms.old$col_id_new == col_id), "fullname"]) res <- sort(microorganisms.old[which(microorganisms.old$fullname_new == newname), "fullname"])
if (length(res) == 0) { if (length(res) == 0) {
NULL NULL
} else { } else {
@ -368,14 +369,9 @@ mo_url <- function(x, open = FALSE, ...) {
df <- data.frame(mo, stringsAsFactors = FALSE) %>% df <- data.frame(mo, stringsAsFactors = FALSE) %>%
left_join(select(microorganisms, mo, source, species_id), by = "mo") left_join(select(microorganisms, mo, source, species_id), by = "mo")
df$url <- ifelse(df$source == "CoL", df$url <- ifelse(df$source == "CoL",
paste0(gsub("{year}", paste0(catalogue_of_life$url_CoL, "details/species/id/", df$species_id, "/"),
catalogue_of_life$year,
catalogue_of_life$url_CoL,
fixed = TRUE),
"details/species/id/",
df$species_id),
ifelse(df$source == "DSMZ", ifelse(df$source == "DSMZ",
paste0(catalogue_of_life$url_DSMZ, "/", unlist(lapply(strsplit(mo_names, ""), function(x) x[1]))), paste0(catalogue_of_life$url_DSMZ, "/advanced_search?adv[taxon-name]=", gsub(" ", "+", mo_names), "/"),
NA_character_)) NA_character_))
u <- df$url u <- df$url
names(u) <- mo_names names(u) <- mo_names

View File

@ -533,7 +533,7 @@ is.rsi.eligible <- function(x, threshold = 0.05) {
#' @export #' @export
#' @noRd #' @noRd
print.rsi <- function(x, ...) { print.rsi <- function(x, ...) {
cat("Class 'rsi'\n") cat("Class <rsi>\n")
print(as.character(x), quote = FALSE) print(as.character(x), quote = FALSE)
} }

View File

@ -128,7 +128,7 @@ rsi_calc <- function(...,
} }
if (print_warning == TRUE) { if (print_warning == TRUE) {
warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_if(is.rsi.eligible, as.rsi)", warning("Increase speed by transforming to class <rsi> on beforehand: your_data %>% mutate_if(is.rsi.eligible, as.rsi)",
call. = FALSE) call. = FALSE)
} }
@ -177,7 +177,7 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both"
} }
if (!any(sapply(data, is.rsi), na.rm = TRUE)) { if (!any(sapply(data, is.rsi), na.rm = TRUE)) {
stop("No columns with class 'rsi' found. See ?as.rsi.", call. = FALSE) stop("No columns with class <rsi> found. See ?as.rsi.", call. = FALSE)
} }
if (as.character(translate_ab) %in% c("TRUE", "official")) { if (as.character(translate_ab) %in% c("TRUE", "official")) {

Binary file not shown.

View File

@ -89,7 +89,7 @@
"CTF" "J01DC07" 43708 "Cefotiam" "Cephalosporins (2nd gen.)" "Other beta-lactam antibacterials" "Second-generation cephalosporins" "" "c(\"cefotiam\", \"cefotiam?\", \"cefotiamum\", \"ceradolan\", \"ceradon\", \"haloapor\")" 1.2 "g" 4 "g" "CTF" "J01DC07" 43708 "Cefotiam" "Cephalosporins (2nd gen.)" "Other beta-lactam antibacterials" "Second-generation cephalosporins" "" "c(\"cefotiam\", \"cefotiam?\", \"cefotiamum\", \"ceradolan\", \"ceradon\", \"haloapor\")" 1.2 "g" 4 "g"
"CHE" 125846 "Cefotiam hexetil" "Cephalosporins (3rd gen.)" "" "c(\"cefotiam cilexetil\", \"pansporin t\")" "CHE" 125846 "Cefotiam hexetil" "Cephalosporins (3rd gen.)" "" "c(\"cefotiam cilexetil\", \"pansporin t\")"
"FOV" 9578573 "Cefovecin" "Cephalosporins (3rd gen.)" "" "" "FOV" 9578573 "Cefovecin" "Cephalosporins (3rd gen.)" "" ""
"FOX" "J01DC01" 441199 "Cefoxitin" "Cephalosporins (2nd gen.)" "Other beta-lactam antibacterials" "Second-generation cephalosporins" "c(\"cfox\", \"cfx\", \"cfxt\", \"cx\", \"fox\", \"fx\")" "c(\"cefoxitin\", \"cefoxitina\", \"cefoxitine\", \"cefoxitinum\", \"cefoxotin\", \"cephoxitin\", \"mefoxin\", \"mefoxitin\", \"rephoxitin\")" 6 "g" "c(\"25240-3\", \"3448-8\")" "FOX" "J01DC01" 441199 "Cefoxitin" "Cephalosporins (2nd gen.)" "Other beta-lactam antibacterials" "Second-generation cephalosporins" "c(\"cfox\", \"cfsc\", \"cfx\", \"cfxt\", \"cx\", \"fox\", \"fx\")" "c(\"cefoxitin\", \"cefoxitina\", \"cefoxitine\", \"cefoxitinum\", \"cefoxotin\", \"cephoxitin\", \"mefoxin\", \"mefoxitin\", \"rephoxitin\")" 6 "g" "c(\"25240-3\", \"3448-8\")"
"ZOP" 9571080 "Cefozopran" "Cephalosporins (4th gen.)" "" "cefozopran" "ZOP" 9571080 "Cefozopran" "Cephalosporins (4th gen.)" "" "cefozopran"
"CFZ" 68597 "Cefpimizole" "Cephalosporins (3rd gen.)" "" "c(\"cefpimizol\", \"cefpimizole\", \"cefpimizole sodium\", \"cefpimizolum\")" "CFZ" 68597 "Cefpimizole" "Cephalosporins (3rd gen.)" "" "c(\"cefpimizol\", \"cefpimizole\", \"cefpimizole sodium\", \"cefpimizolum\")"
"CPM" "J01DD11" 636405 "Cefpiramide" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "" "c(\"cefpiramide\", \"cefpiramide acid\", \"cefpiramido\", \"cefpiramidum\")" 2 "g" "CPM" "J01DD11" 636405 "Cefpiramide" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "" "c(\"cefpiramide\", \"cefpiramide acid\", \"cefpiramido\", \"cefpiramidum\")" 2 "g"
@ -105,7 +105,7 @@
"CPT" "J01DI02" 56841980 "Ceftaroline" "Cephalosporins (5th gen.)" "c(\"\", \"cfro\")" "c(\"teflaro\", \"zinforo\")" "CPT" "J01DI02" 56841980 "Ceftaroline" "Cephalosporins (5th gen.)" "c(\"\", \"cfro\")" "c(\"teflaro\", \"zinforo\")"
"CPA" "Ceftaroline/avibactam" "Cephalosporins (5th gen.)" "" "" "CPA" "Ceftaroline/avibactam" "Cephalosporins (5th gen.)" "" ""
"CAZ" "J01DD02" 5481173 "Ceftazidime" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "c(\"caz\", \"cefta\", \"cfta\", \"cftz\", \"taz\", \"tz\", \"xtz\")" "c(\"ceftazidim\", \"ceftazidima\", \"ceftazidime\", \"ceftazidimum\", \"ceptaz\", \"fortaz\", \"fortum\", \"pentacef\", \"tazicef\", \"tazidime\")" 4 "g" "c(\"21151-6\", \"3449-6\", \"80960-8\")" "CAZ" "J01DD02" 5481173 "Ceftazidime" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "c(\"caz\", \"cefta\", \"cfta\", \"cftz\", \"taz\", \"tz\", \"xtz\")" "c(\"ceftazidim\", \"ceftazidima\", \"ceftazidime\", \"ceftazidimum\", \"ceptaz\", \"fortaz\", \"fortum\", \"pentacef\", \"tazicef\", \"tazidime\")" 4 "g" "c(\"21151-6\", \"3449-6\", \"80960-8\")"
"CZA" "Ceftazidime/avibactam" "Cephalosporins (3rd gen.)" "" "" "CZA" "Ceftazidime/avibactam" "Cephalosporins (3rd gen.)" "c(\"\", \"cfav\")" ""
"CCV" "J01DD52" 9575352 "Ceftazidime/clavulanic acid" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "c(\"czcl\", \"xtzl\")" "" "CCV" "J01DD52" 9575352 "Ceftazidime/clavulanic acid" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "c(\"czcl\", \"xtzl\")" ""
"CEM" 6537431 "Cefteram" "Cephalosporins (3rd gen.)" "" "c(\"cefteram\", \"cefterame\", \"cefteramum\", \"ceftetrame\")" "CEM" 6537431 "Cefteram" "Cephalosporins (3rd gen.)" "" "c(\"cefteram\", \"cefterame\", \"cefteramum\", \"ceftetrame\")"
"CPL" 5362114 "Cefteram pivoxil" "Cephalosporins (3rd gen.)" "" "c(\"cefteram pivoxil\", \"tomiron\")" "CPL" 5362114 "Cefteram pivoxil" "Cephalosporins (3rd gen.)" "" "c(\"cefteram pivoxil\", \"tomiron\")"

BIN
data-raw/data_dsmz.rds Normal file

Binary file not shown.

View File

@ -9,22 +9,6 @@
# >>>>> IF YOU WANT TO IMPORT THIS FILE INTO YOUR OWN SOFTWARE, HAVE THE FIRST 10 LINES SKIPPED <<<<< # >>>>> IF YOU WANT TO IMPORT THIS FILE INTO YOUR OWN SOFTWARE, HAVE THE FIRST 10 LINES SKIPPED <<<<<
# ------------------------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------------------------
if_mo_property like.is.one_of this_value and_these_antibiotics have_these_values then_change_these_antibiotics to_value reference.rule reference.rule_group if_mo_property like.is.one_of this_value and_these_antibiotics have_these_values then_change_these_antibiotics to_value reference.rule reference.rule_group
genus like .* AMP S AMX S Non-EUCAST: inherit ampicillin results for unavailable amoxicillin Other rules
genus like .* AMP I AMX I Non-EUCAST: inherit ampicillin results for unavailable amoxicillin Other rules
genus like .* AMP R AMX R Non-EUCAST: inherit ampicillin results for unavailable amoxicillin Other rules
genus like .* AMX S AMP S Non-EUCAST: inherit amoxicillin results for unavailable ampicillin Other rules
genus like .* AMX I AMP I Non-EUCAST: inherit amoxicillin results for unavailable ampicillin Other rules
genus like .* AMX R AMP R Non-EUCAST: inherit amoxicillin results for unavailable ampicillin Other rules
genus like .* AMC R AMP, AMX R Non-EUCAST: set ampicillin = R where amoxicillin/clav acid = R Other rules
genus like .* SAM R AMP, AMX R Non-EUCAST: set ampicillin = R where ampicillin/sulbactam = R Other rules
genus like .* TZP R PIP R Non-EUCAST: set piperacillin = R where piperacillin/tazobactam = R Other rules
genus like .* SXT R TMP R Non-EUCAST: set trimethoprim = R where trimethoprim/sulfa = R Other rules
genus like .* AMP S AMC S Non-EUCAST: set amoxicillin/clav acid = S where ampicillin = S Other rules
genus like .* AMX S AMC S Non-EUCAST: set amoxicillin/clav acid = S where ampicillin = S Other rules
genus like .* AMP S SAM S Non-EUCAST: set ampicillin/sulbactam = S where ampicillin = S Other rules
genus like .* AMX S SAM S Non-EUCAST: set ampicillin/sulbactam = S where ampicillin = S Other rules
genus like .* PIP S TZP S Non-EUCAST: set piperacillin/tazobactam = S where piperacillin = S Other rules
genus like .* TMP S SXT S Non-EUCAST: set trimethoprim/sulfa = S where trimethoprim = S Other rules
order is Enterobacterales AMP S AMX S Enterobacterales (Order) Breakpoints order is Enterobacterales AMP S AMX S Enterobacterales (Order) Breakpoints
order is Enterobacterales AMP I AMX I Enterobacterales (Order) Breakpoints order is Enterobacterales AMP I AMX I Enterobacterales (Order) Breakpoints
order is Enterobacterales AMP R AMX R Enterobacterales (Order) Breakpoints order is Enterobacterales AMP R AMX R Enterobacterales (Order) Breakpoints

Can't render this file because it contains an unexpected character in line 6 and column 96.

File diff suppressed because it is too large Load Diff

View File

@ -322,7 +322,7 @@ antibiotics[which(antibiotics$ab == as.ab("cefuroxim")), "abbreviations"][[1]] <
antibiotics[which(antibiotics$ab == as.ab("cefotaxim")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefotaxim")), "abbreviations"][[1]], "cftx")) antibiotics[which(antibiotics$ab == as.ab("cefotaxim")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefotaxim")), "abbreviations"][[1]], "cftx"))
antibiotics[which(antibiotics$ab == as.ab("ceftazidime")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("ceftazidime")), "abbreviations"][[1]], "cftz")) antibiotics[which(antibiotics$ab == as.ab("ceftazidime")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("ceftazidime")), "abbreviations"][[1]], "cftz"))
antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]], "cfpi")) antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]], "cfpi"))
antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]], "cfxt")) antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]], "cfxt", "cfsc"))
# More GLIMS codes # More GLIMS codes
antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz")) antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz"))
antibiotics[which(antibiotics$ab == "CRO"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CRO"), "abbreviations"][[1]], "cftr")) antibiotics[which(antibiotics$ab == "CRO"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CRO"), "abbreviations"][[1]], "cftr"))
@ -377,6 +377,7 @@ antibiotics[which(antibiotics$ab == "CTX"), "abbreviations"][[1]] <- list(c(anti
antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz")) antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz"))
antibiotics[which(antibiotics$ab == "CFM"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CFM"), "abbreviations"][[1]], "cfxm")) antibiotics[which(antibiotics$ab == "CFM"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CFM"), "abbreviations"][[1]], "cfxm"))
antibiotics[which(antibiotics$ab == "FOX"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "FOX"), "abbreviations"][[1]], "cfxt")) antibiotics[which(antibiotics$ab == "FOX"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "FOX"), "abbreviations"][[1]], "cfxt"))
antibiotics[which(antibiotics$ab == "CZA"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CZA"), "abbreviations"][[1]], "cfav"))
antibiotics[which(antibiotics$ab == "CZO"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CZO"), "abbreviations"][[1]], "cfzl")) antibiotics[which(antibiotics$ab == "CZO"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CZO"), "abbreviations"][[1]], "cfzl"))
antibiotics[which(antibiotics$ab == "CZX"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CZX"), "abbreviations"][[1]], "cfzx")) antibiotics[which(antibiotics$ab == "CZX"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CZX"), "abbreviations"][[1]], "cfzx"))
antibiotics[which(antibiotics$ab == "CHL"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CHL"), "abbreviations"][[1]], "chlo")) antibiotics[which(antibiotics$ab == "CHL"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CHL"), "abbreviations"][[1]], "chlo"))
@ -577,10 +578,10 @@ antibiotics <- antibiotics %>%
# set as data.frame again # set as data.frame again
antibiotics <- as.data.frame(antibiotics, stringsAsFactors = FALSE) antibiotics <- as.data.frame(antibiotics, stringsAsFactors = FALSE)
class(antibiotics$ab) <- "ab" class(antibiotics$ab) <- c("ab", "character")
antibiotics <- antibiotics %>% arrange(name) antibiotics <- antibiotics %>% arrange(name)
# make all abbreviations and synonyms lower case, unique and alphabetically sorted # make all abbreviations and synonyms lower case, unique and alphabetically sorted ----
for (i in 1:nrow(antibiotics)) { for (i in 1:nrow(antibiotics)) {
abb <- sort(unique(tolower(antibiotics[i, "abbreviations"][[1]]))) abb <- sort(unique(tolower(antibiotics[i, "abbreviations"][[1]])))
syn <- sort(unique(tolower(antibiotics[i, "synonyms"][[1]]))) syn <- sort(unique(tolower(antibiotics[i, "synonyms"][[1]])))

View File

@ -23,94 +23,134 @@
# Data retrieved from the Catalogue of Life (CoL) through the Encyclopaedia of Life: # Data retrieved from the Catalogue of Life (CoL) through the Encyclopaedia of Life:
# https://opendata.eol.org/dataset/catalogue-of-life/ # https://opendata.eol.org/dataset/catalogue-of-life/
# Data retrieved from the Global Biodiversity Information Facility (GBIF):
# https://doi.org/10.15468/rffz4x # https://doi.org/10.15468/rffz4x
# (download the resource file with a name like "Catalogue of Life yyyy-mm-dd") #
# and from the Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures # And from the Leibniz Institute: German Collection of Microorganisms and Cell Cultures (DSMZ)
# https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads.html # (register first at https://bacdive.dsmz.de/api/pnu/registration/register/ and use API as done below)
# (download the latest "Complete List" as xlsx file)
library(dplyr) library(dplyr)
library(AMR) library(AMR)
# also needed: data.table, httr, jsonlite, cleaner, stringr
# unzip and extract taxon.tab (around 1.5 GB) from the CoL archive, then: # unzip and extract taxa.txt (both around 1.5 GB, 3.7-3.9M rows) from Col and GBIF, then:
# data_col <- data.table::fread("data-raw/taxon.tab") data_col_raw <- data.table::fread("data-raw/taxon.tab", quote = "")
data_col <- data.table::fread("data-raw/taxa.txt", quote = "") data_gbif <- data.table::fread("data-raw/taxa.txt", quote = "")
# read the xlsx file from DSMZ (only around 2.5 MB): # merge the two
data_dsmz <- readxl::read_xlsx("data-raw/DSMZ_bactnames.xlsx") data_col <- data_gbif %>%
rename(referenceID = identifier) %>%
bind_rows(data_col_raw) %>%
distinct(scientificName, kingdom, genus, specificEpithet, infraspecificEpithet, .keep_all = TRUE)
rm(data_col_raw)
rm(data_gbif)
# read the data from the DSMZ API (around 19000 rows)
dsmz_username <- ""
dsmz_password <- ""
GET_df <- function(url) {
result <- httr::GET(url, httr::authenticate(dsmz_username, dsmz_password))
httr::stop_for_status(result)
result %>%
httr::content(type = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON(flatten = TRUE)
}
dsmz_first <- GET_df("https://bacdive.dsmz.de/api/pnu/species?page=1&format=json")
data_dsmz <- dsmz_first$results
# this next process will take appr. `dsmz_first$count / 100 * 5 / 60` minutes
for (i in 2:round((dsmz_first$count / 100) + 0.5)) {
data_dsmz <<- rbind(data_dsmz,
GET_df(paste0("https://bacdive.dsmz.de/api/pnu/species/?page=", i, "&format=json"))$results)
cat(i, "-", AMR:::percentage(i / round((dsmz_first$count / 100) + 0.5)), "\n")
}
rm(dsmz_first)
# the CoL data is over 3.7M rows: # the CoL data is over 3.7M rows:
data_col %>% freq(kingdom) data_col %>% cleaner::freq(kingdom)
# Item Count Percent Cum. Count Cum. Percent # Item Count Percent Cum. Count Cum. Percent
# --- ---------- ---------- -------- ----------- ------------- # --- ---------- ---------- -------- ----------- -------------
# 1 Animalia 2,225,627 59.1% 2,225,627 59.1% # 1 Animalia 2,494,992 55.43% 2,494,992 55.43%
# 2 Plantae 1,177,412 31.3% 3,403,039 90.4% # 2 Plantae 1,379,674 30.65% 3,874,666 86.08%
# 3 Fungi 290,145 7.7% 3,693,184 98.1% # 3 Fungi 547,619 12.17% 4,422,285 98.24%
# 4 Chromista 47,126 1.3% 3,740,310 99.3% # 4 Chromista 51,475 1.14% 4,473,760 99.39%
# 5 Bacteria 14,478 0.4% 3,754,788 99.7% # 5 Bacteria 14,442 0.32% 4,488,202 99.71%
# 6 Protozoa 6,060 0.2% 3,760,848 99.9% # 6 Protozoa 8,750 0.19% 4,496,952 99.90%
# 7 Viruses 3,827 0.1% 3,764,675 100.0% # 7 Viruses 3,805 0.08% 4,500,757 99.99%
# 8 Archaea 610 0.0% 3,765,285 100.0% # 8 Archaea 609 0.01% 4,501,366 100.00%
# clean data_col # clean data_col
data_col <- data_col %>% data_col.bak <- data_col
data_col_old <- data_col %>%
# filter: has new accepted name
filter(!is.na(acceptedNameUsageID)) %>%
as_tibble() %>% as_tibble() %>%
select(col_id = taxonID, transmute(fullname = trimws(stringr::str_replace(scientificName,
col_id_new = acceptedNameUsageID, pattern = stringr::fixed(scientificNameAuthorship),
fullname = scientificName, replacement = "")),
kingdom, fullname_new = trimws(paste(ifelse(is.na(genus), "", genus),
phylum, ifelse(is.na(specificEpithet), "", specificEpithet),
class, ifelse(is.na(infraspecificEpithet), "", infraspecificEpithet))),
order, ref = scientificNameAuthorship,
family, prevalence = NA_integer_)
genus, data_col <- data_col %>%
species = specificEpithet, # filter: has no new accepted name
subspecies = infraspecificEpithet, filter(is.na(acceptedNameUsageID)) %>%
rank = taxonRank, as_tibble() %>%
ref = scientificNameAuthorship, transmute(fullname = "",
species_id = references) kingdom,
data_col$source <- "CoL" phylum,
class,
order,
family,
genus,
species = specificEpithet,
subspecies = infraspecificEpithet,
rank = taxonRank,
ref = scientificNameAuthorship,
species_id = referenceID,
source = "CoL")
# clean data_dsmz # clean data_dsmz
data_dsmz <- data_dsmz %>% data_dsmz.bak <- data_dsmz
data_dsmz_old <- data_dsmz %>%
# filter: correct name is not NULL
filter(!sapply(correct_name, is.null)) %>%
as_tibble() %>% as_tibble() %>%
transmute(col_id = NA_integer_, transmute(fullname = trimws(paste(ifelse(is.na(genus), "", genus),
col_id_new = NA_integer_, ifelse(is.na(species_epithet), "", species_epithet),
fullname = "", ifelse(is.na(subspecies_epithet), "", subspecies_epithet))),
# kingdom = "", fullname_new = sapply(correct_name, function(x) x[2L]),
# phylum = "", ref = authors,
# class = "", prevalence = NA_integer_)
# order = "",
# family = "", data_dsmz <- data_dsmz %>%
genus = ifelse(is.na(GENUS), "", GENUS), # filter: correct name is NULL
species = ifelse(is.na(SPECIES), "", SPECIES), filter(sapply(correct_name, is.null)) %>%
subspecies = ifelse(is.na(SUBSPECIES), "", SUBSPECIES), as_tibble() %>%
transmute(fullname = "",
kingdom = regio,
phylum,
class = classis,
# order = "", # does not contain order, will add later based on CoL
family = familia,
genus = ifelse(is.na(genus), "", genus),
species = ifelse(is.na(species_epithet), "", species_epithet),
subspecies = ifelse(is.na(subspecies_epithet), "", subspecies_epithet),
rank = ifelse(species == "", "genus", "species"), rank = ifelse(species == "", "genus", "species"),
ref = AUTHORS, ref = authors,
species_id = as.character(RECORD_NO), species_id = as.character(pnu_no),
source = "DSMZ") source = "DSMZ")
# DSMZ only contains genus/(sub)species, try to find taxonomic properties based on genus and data_col # DSMZ only contains genus/(sub)species, try to find taxonomic properties based on genus and data_col
ref_taxonomy <- data_col %>% ref_taxonomy <- data_col %>%
filter(genus %in% data_dsmz$genus, filter(family %in% data_dsmz$family & family != "") %>%
kingdom %in% c("Bacteria", "Chromista", "Archaea", "Protozoa", "Fungi"),
family != "") %>%
mutate(kingdom = factor(kingdom,
# in the left_join following, try Bacteria first, then Chromista, ...
levels = c("Bacteria", "Chromista", "Archaea", "Protozoa", "Fungi"),
ordered = TRUE)) %>%
arrange(kingdom) %>% arrange(kingdom) %>%
distinct(genus, .keep_all = TRUE) %>% distinct(family, .keep_all = TRUE) %>%
select(kingdom, phylum, class, order, family, genus) select(family, order)
data_dsmz <- data_dsmz %>% data_dsmz <- data_dsmz %>%
left_join(ref_taxonomy, by = "genus") %>% left_join(ref_taxonomy, by = "family") # NAs will later become "(unknown ...)"
mutate(kingdom = "Bacteria",
phylum = ifelse(is.na(phylum), "(unknown phylum)", phylum),
class = ifelse(is.na(class), "(unknown class)", class),
order = ifelse(is.na(order), "(unknown order)", order),
family = ifelse(is.na(family), "(unknown family)", family),
)
# combine everything # combine everything
data_total <- data_col %>% data_total <- data_col %>%
@ -119,6 +159,8 @@ data_total <- data_col %>%
rm(data_col) rm(data_col)
rm(data_dsmz) rm(data_dsmz)
rm(ref_taxonomy) rm(ref_taxonomy)
rm(data_col.bak)
rm(data_dsmz.bak)
mo_found_in_NL <- c("Absidia", "Acremonium", "Actinotignum", "Aedes", "Alternaria", "Anaerosalibacter", "Ancylostoma", mo_found_in_NL <- c("Absidia", "Acremonium", "Actinotignum", "Aedes", "Alternaria", "Anaerosalibacter", "Ancylostoma",
"Angiostrongylus", "Anisakis", "Anopheles", "Apophysomyces", "Arachnia", "Ascaris", "Aspergillus", "Angiostrongylus", "Anisakis", "Anopheles", "Apophysomyces", "Arachnia", "Ascaris", "Aspergillus",
@ -158,8 +200,6 @@ MOs <- data_total %>%
) )
# or the genus has to be one of the genera we found in our hospitals last decades (Northern Netherlands, 2002-2018) # or the genus has to be one of the genera we found in our hospitals last decades (Northern Netherlands, 2002-2018)
| genus %in% mo_found_in_NL | genus %in% mo_found_in_NL
# or the taxonomic entry is old - the species was renamed
| !is.na(col_id_new)
) %>% ) %>%
# really no Plantae (e.g. Dracunculus exist both as worm and as plant) # really no Plantae (e.g. Dracunculus exist both as worm and as plant)
filter(kingdom != "Plantae") %>% filter(kingdom != "Plantae") %>%
@ -174,59 +214,56 @@ MOs <- MOs %>% bind_rows(data_total %>%
| (family %in% MOs$family & rank == "family") | (family %in% MOs$family & rank == "family")
| (genus %in% MOs$genus & rank == "genus"))) | (genus %in% MOs$genus & rank == "genus")))
# filter old taxonomic names so only the ones with an existing reference will be kept get_author_year <- function(ref) {
MOs <- MOs %>% # Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011'
filter(is.na(col_id_new) | (!is.na(col_id_new) & col_id_new %in% MOs$col_id))
authors2 <- iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT")
# remove leading and trailing brackets
authors2 <- gsub("^[(](.*)[)]$", "\\1", authors2)
# only take part after brackets if there's a name
authors2 <- ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
gsub(".*[)] (.*)", "\\1", authors2),
authors2)
# get year from last 4 digits
lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2))
# can never be later than now
lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
NA,
lastyear)
# get authors without last year
authors <- gsub("(.*)[0-9]{4}$", "\\1", authors2)
# remove nonsense characters from names
authors <- gsub("[^a-zA-Z,'& -]", "", authors)
# remove trailing and leading spaces
authors <- trimws(authors)
# only keep first author and replace all others by 'et al'
authors <- gsub("(,| and| et| &| ex| emend\\.?) .*", " et al.", authors)
# et al. always with ending dot
authors <- gsub(" et al\\.?", " et al.", authors)
authors <- gsub(" ?,$", "", authors)
# don't start with 'sensu' or 'ehrenb'
authors <- gsub("^(sensu|Ehrenb.?) ", "", authors, ignore.case = TRUE)
# no initials, only surname
authors <- gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE)
# combine author and year if year is available
ref <- ifelse(!is.na(lastyear),
paste0(authors, ", ", lastyear),
authors)
# fix beginning and ending
ref <- gsub(", $", "", ref)
ref <- gsub("^, ", "", ref)
ref <- gsub("^(emend|et al.,?)", "", ref)
ref <- trimws(ref)
# a lot start with a lowercase character - fix that
ref[!grepl("^d[A-Z]", ref)] <- gsub("^([a-z])", "\\U\\1", ref[!grepl("^d[A-Z]", ref)], perl = TRUE)
# specific one for the French that are named dOrbigny
ref[grepl("^d[A-Z]", ref)] <- gsub("^d", "d'", ref[grepl("^d[A-Z]", ref)])
ref <- gsub(" +", " ", ref)
ref
}
MOs <- MOs %>% MOs <- MOs %>% mutate(ref = get_author_year(ref))
# remove text if it contains 'Not assigned' like phylum in viruses
mutate_all(~gsub("(Not assigned|\\[homonym\\]|\\[mistake\\])", "", ., ignore.case = TRUE))
MOs <- MOs %>%
# Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011':
mutate(authors2 = iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT"),
# remove leading and trailing brackets
authors2 = gsub("^[(](.*)[)]$", "\\1", authors2),
# only take part after brackets if there's a name
authors2 = ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
gsub(".*[)] (.*)", "\\1", authors2),
authors2),
# get year from last 4 digits
lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2)),
# can never be later than now
lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
NA,
lastyear),
# get authors without last year
authors = gsub("(.*)[0-9]{4}$", "\\1", authors2),
# remove nonsense characters from names
authors = gsub("[^a-zA-Z,'& -]", "", authors),
# remove trailing and leading spaces
authors = trimws(authors),
# only keep first author and replace all others by 'et al'
authors = gsub("(,| and| et| &| ex| emend\\.?) .*", " et al.", authors),
# et al. always with ending dot
authors = gsub(" et al\\.?", " et al.", authors),
authors = gsub(" ?,$", "", authors),
# don't start with 'sensu' or 'ehrenb'
authors = gsub("^(sensu|Ehrenb.?) ", "", authors, ignore.case = TRUE),
# no initials, only surname
authors = gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE),
# combine author and year if year is available
ref = ifelse(!is.na(lastyear),
paste0(authors, ", ", lastyear),
authors),
# fix beginning and ending
ref = gsub(", $", "", ref),
ref = gsub("^, ", "", ref),
ref = gsub("^(emend|et al.,?)", "", ref),
ref = trimws(ref)
)
# a lot start with a lowercase character - fix that
MOs$ref[!grepl("^d[A-Z]", MOs$ref)] <- gsub("^([a-z])", "\\U\\1", MOs$ref[!grepl("^d[A-Z]", MOs$ref)], perl = TRUE)
# specific one for the French that are named dOrbigny
MOs$ref[grepl("^d[A-Z]", MOs$ref)] <- gsub("^d", "d'", MOs$ref[grepl("^d[A-Z]", MOs$ref)])
MOs <- MOs %>% mutate(ref = gsub(" +", " ", ref))
# Remove non-ASCII characters (these are not allowed by CRAN) # Remove non-ASCII characters (these are not allowed by CRAN)
MOs <- MOs %>% MOs <- MOs %>%
@ -235,53 +272,58 @@ MOs <- MOs %>%
# remove invalid characters # remove invalid characters
mutate_all(~gsub("[\"'`]+", "", .)) mutate_all(~gsub("[\"'`]+", "", .))
# Split old taxonomic names - they refer in the original data to a new `taxonID` with `acceptedNameUsageID` # set new fullnames
MOs.old <- MOs %>% MOs <- MOs %>%
filter(!is.na(col_id_new), mutate(fullname = trimws(case_when(rank == "family" ~ family,
ref != "", rank == "order" ~ order,
source != "DSMZ") %>% rank == "class" ~ class,
transmute(col_id, rank == "phylum" ~ phylum,
col_id_new, rank == "kingdom" ~ kingdom,
fullname = TRUE ~ paste(genus, species, subspecies))),
trimws( fullname = gsub(" (var|f|subsp)[.]", "", fullname)) %>%
gsub("(.*)[(].*", "\\1", # remove text if it contains 'Not assigned', etc.
stringr::str_replace( mutate_all(function(x) ifelse(x %like% "(not assigned|homonym|mistake)", NA, x)) %>%
string = fullname, # clean taxonomy
pattern = stringr::fixed(authors2), mutate(kingdom = ifelse(is.na(kingdom) | trimws(kingdom) == "", "(unknown kingdom)", trimws(kingdom)),
replacement = "")) %>% phylum = ifelse(is.na(phylum) | trimws(phylum) == "", "(unknown phylum)", trimws(phylum)),
gsub(" (var|f|subsp)[.]", "", .)), class = ifelse(is.na(class) | trimws(class) == "", "(unknown class)", trimws(class)),
ref) %>% order = ifelse(is.na(order) | trimws(order) == "", "(unknown order)", trimws(order)),
filter(!is.na(fullname)) %>% family = ifelse(is.na(family) | trimws(family) == "", "(unknown family)", trimws(family)))
distinct(fullname, .keep_all = TRUE) %>%
arrange(col_id)
MO.bak <- MOs # Split old taxonomic names
MOs.old <- data_col_old %>%
filter(!gsub(" (var|f|subsp)[.]", "", fullname_new) %in% data_dsmz_old$fullname) %>%
bind_rows(data_dsmz_old) %>%
mutate(fullname_new = gsub(" (var|f|subsp)[.]", "", fullname_new),
fullname = gsub(" (var|f|subsp)[.]", "", fullname)) %>%
# for cases like Chlamydia pneumoniae -> Chlamydophila pneumoniae -> Chlamydia pneumoniae:
filter(!fullname %in% fullname_new &
fullname_new %in% MOs$fullname &
!is.na(fullname) &
fullname != fullname_new) %>%
distinct(fullname, .keep_all = TRUE) %>%
arrange(fullname) %>%
mutate(ref = get_author_year(ref))
MOs <- MOs %>% MOs <- MOs %>%
filter(is.na(col_id_new) | source == "DSMZ") %>% # remove entries that are old and in MOs.old
transmute(col_id, filter(!fullname %in% MOs.old$fullname) %>%
fullname = trimws(case_when(rank == "family" ~ family, # mark up
rank == "order" ~ order, transmute(fullname,
rank == "class" ~ class,
rank == "phylum" ~ phylum,
rank == "kingdom" ~ kingdom,
TRUE ~ paste(genus, species, subspecies))),
kingdom, kingdom,
phylum, phylum,
class, class,
order, order,
family, family,
genus = gsub(":", "", genus), genus,
species, species,
subspecies, subspecies,
rank, rank,
ref, ref,
species_id = gsub(".*/([a-f0-9]+)", "\\1", species_id), species_id = gsub("[^a-zA-Z0-9].*", "", species_id),
source) %>% source) %>%
#distinct(fullname, .keep_all = TRUE) %>% # prefer known taxonomy over unknown taxonomy, then DSMZ over CoL (= desc)
filter(!grepl("unassigned", fullname, ignore.case = TRUE)) %>% arrange(desc(kingdom, genus, species, source)) %>%
# prefer DSMZ over CoL, since that's more recent
arrange(desc(source)) %>%
distinct(kingdom, fullname, .keep_all = TRUE) distinct(kingdom, fullname, .keep_all = TRUE)
# remove all genera that have no species - they are irrelevant for microbiology and almost all from the kingdom of Animalia # remove all genera that have no species - they are irrelevant for microbiology and almost all from the kingdom of Animalia
@ -296,43 +338,45 @@ to_remove <- MOs %>%
MOs <- MOs %>% filter(!(paste(kingdom, genus) %in% to_remove)) MOs <- MOs %>% filter(!(paste(kingdom, genus) %in% to_remove))
rm(to_remove) rm(to_remove)
# add CoL's col_id, source and ref from MOs.bak, for the cases where DSMZ took preference # add all mssing genera, families and orders
MOs <- MOs %>% MOs <- MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>% bind_rows(MOs %>%
left_join(MO.bak %>% arrange(genus, species) %>%
filter(is.na(col_id_new), !is.na(col_id)) %>% distinct(genus, .keep_all = TRUE) %>%
transmute(col_id, species_id, source, ref, kingdom_fullname = trimws(paste(kingdom, genus, species, subspecies))), filter(rank == "species") %>%
by = "kingdom_fullname", mutate(fullname = genus,
suffix = c("_dsmz", "_col")) %>% species = "",
mutate(col_id = col_id_col, rank = "genus",
species_id = ifelse(!is.na(species_id_col) & ref_col == ref_dsmz, species_id = "",
gsub(".*/(.*)$", "\\1", species_id_col), ref = NA_character_)) %>%
species_id_dsmz), bind_rows(MOs %>%
source = ifelse(!is.na(species_id_col) & ref_col == ref_dsmz, arrange(family, genus) %>%
source_col, distinct(family, .keep_all = TRUE) %>%
source_dsmz), filter(rank == "genus") %>%
ref = ifelse(!is.na(species_id_col) & ref_col == ref_dsmz, mutate(fullname = family,
ref_col, genus = "",
ref_dsmz)) %>% rank = "family",
select(-matches("(_col|_dsmz|kingdom_fullname)")) species_id = "",
ref = NA_character_)) %>%
bind_rows(MOs %>%
arrange(order, family) %>%
distinct(family, .keep_all = TRUE) %>%
filter(rank == "family") %>%
mutate(fullname = order,
family = "",
rank = "order",
species_id = "",
ref = NA_character_))
# remove the empty ones
MOs.old <- MOs.old %>% MOs <- MOs %>%
# remove the ones that are in the MOs data set mutate(fullname = gsub(",.*", "", fullname)) %>%
filter(col_id_new %in% MOs$col_id) %>% distinct(kingdom, fullname, .keep_all = TRUE) %>%
# and remove the ones that have the exact same fullname in the MOs data set, like Moraxella catarrhalis filter(fullname != "")
left_join(MOs, by = "fullname") %>%
filter(col_id_new != col_id.y | is.na(col_id.y)) %>%
select(col_id = col_id.x, col_id_new, fullname, ref = ref.x)
# remove the records that are in MOs.old
sum(MOs.old$fullname %in% MOs$fullname)
MOs <- MOs %>% filter(!fullname %in% MOs.old$fullname)
sum(MOs.old$fullname %in% MOs$fullname)
# what characters are in the fullnames? # what characters are in the fullnames?
table(sort(unlist(strsplit(x = paste(MOs$fullname, collapse = ""), split = "")))) table(sort(unlist(strsplit(x = paste(MOs$fullname, collapse = ""), split = ""))))
MOs %>% filter(!fullname %like% "^[a-z ]+$") %>% View() MOs %>% filter(!fullname %like% "^[a-z ]+$") %>% arrange(fullname) %>% View()
table(MOs$kingdom, MOs$rank) table(MOs$kingdom, MOs$rank)
table(AMR::microorganisms$kingdom, AMR::microorganisms$rank) table(AMR::microorganisms$kingdom, AMR::microorganisms$rank)
@ -436,7 +480,6 @@ MOs <- MOs %>%
bind_rows( bind_rows(
# Unknowns # Unknowns
data.frame(mo = "UNKNOWN", data.frame(mo = "UNKNOWN",
col_id = NA_integer_,
fullname = "(unknown name)", fullname = "(unknown name)",
kingdom = "(unknown kingdom)", kingdom = "(unknown kingdom)",
phylum = "(unknown phylum)", phylum = "(unknown phylum)",
@ -453,7 +496,6 @@ MOs <- MOs %>%
prevalence = 1, prevalence = 1,
stringsAsFactors = FALSE), stringsAsFactors = FALSE),
data.frame(mo = "B_GRAMN", data.frame(mo = "B_GRAMN",
col_id = NA_integer_,
fullname = "(unknown Gram-negatives)", fullname = "(unknown Gram-negatives)",
kingdom = "Bacteria", kingdom = "Bacteria",
phylum = "(unknown phylum)", phylum = "(unknown phylum)",
@ -470,7 +512,6 @@ MOs <- MOs %>%
prevalence = 1, prevalence = 1,
stringsAsFactors = FALSE), stringsAsFactors = FALSE),
data.frame(mo = "B_GRAMP", data.frame(mo = "B_GRAMP",
col_id = NA_integer_,
fullname = "(unknown Gram-positives)", fullname = "(unknown Gram-positives)",
kingdom = "Bacteria", kingdom = "Bacteria",
phylum = "(unknown phylum)", phylum = "(unknown phylum)",
@ -487,7 +528,6 @@ MOs <- MOs %>%
prevalence = 1, prevalence = 1,
stringsAsFactors = FALSE), stringsAsFactors = FALSE),
data.frame(mo = "F_YEAST", data.frame(mo = "F_YEAST",
col_id = NA_integer_,
fullname = "(unknown yeast)", fullname = "(unknown yeast)",
kingdom = "Fungi", kingdom = "Fungi",
phylum = "(unknown phylum)", phylum = "(unknown phylum)",
@ -504,7 +544,6 @@ MOs <- MOs %>%
prevalence = 2, prevalence = 2,
stringsAsFactors = FALSE), stringsAsFactors = FALSE),
data.frame(mo = "F_FUNGUS", data.frame(mo = "F_FUNGUS",
col_id = NA_integer_,
fullname = "(unknown fungus)", fullname = "(unknown fungus)",
kingdom = "Fungi", kingdom = "Fungi",
phylum = "(unknown phylum)", phylum = "(unknown phylum)",
@ -524,7 +563,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>% filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_CONS", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_CONS", mo),
col_id = NA_integer_,
species = "coagulase-negative", species = "coagulase-negative",
fullname = "Coagulase-negative Staphylococcus (CoNS)", fullname = "Coagulase-negative Staphylococcus (CoNS)",
ref = NA_character_, ref = NA_character_,
@ -534,7 +572,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>% filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_COPS", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_COPS", mo),
col_id = NA_integer_,
species = "coagulase-positive", species = "coagulase-positive",
fullname = "Coagulase-positive Staphylococcus (CoPS)", fullname = "Coagulase-positive Staphylococcus (CoPS)",
ref = NA_character_, ref = NA_character_,
@ -558,7 +595,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "dysgalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "dysgalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPC", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPC", mo),
col_id = NA_integer_,
species = "group C" , species = "group C" ,
fullname = "Streptococcus group C", fullname = "Streptococcus group C",
ref = NA_character_, ref = NA_character_,
@ -567,7 +603,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPD", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPD", mo),
col_id = NA_integer_,
species = "group D" , species = "group D" ,
fullname = "Streptococcus group D", fullname = "Streptococcus group D",
ref = NA_character_, ref = NA_character_,
@ -576,7 +611,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPF", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPF", mo),
col_id = NA_integer_,
species = "group F" , species = "group F" ,
fullname = "Streptococcus group F", fullname = "Streptococcus group F",
ref = NA_character_, ref = NA_character_,
@ -585,7 +619,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPG", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPG", mo),
col_id = NA_integer_,
species = "group G" , species = "group G" ,
fullname = "Streptococcus group G", fullname = "Streptococcus group G",
ref = NA_character_, ref = NA_character_,
@ -594,7 +627,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPH", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPH", mo),
col_id = NA_integer_,
species = "group H" , species = "group H" ,
fullname = "Streptococcus group H", fullname = "Streptococcus group H",
ref = NA_character_, ref = NA_character_,
@ -603,7 +635,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPK", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPK", mo),
col_id = NA_integer_,
species = "group K" , species = "group K" ,
fullname = "Streptococcus group K", fullname = "Streptococcus group K",
ref = NA_character_, ref = NA_character_,
@ -613,7 +644,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_HAEM", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_HAEM", mo),
col_id = NA_integer_,
species = "beta-haemolytic" , species = "beta-haemolytic" ,
fullname = "Beta-haemolytic Streptococcus", fullname = "Beta-haemolytic Streptococcus",
ref = NA_character_, ref = NA_character_,
@ -623,7 +653,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_VIRI", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_VIRI", mo),
col_id = NA_integer_,
species = "viridans" , species = "viridans" ,
fullname = "Viridans Group Streptococcus (VGS)", fullname = "Viridans Group Streptococcus (VGS)",
ref = NA_character_, ref = NA_character_,
@ -633,7 +662,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_MILL", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_MILL", mo),
col_id = NA_integer_,
species = "milleri" , species = "milleri" ,
fullname = "Milleri Group Streptococcus (MGS)", fullname = "Milleri Group Streptococcus (MGS)",
ref = NA_character_, ref = NA_character_,
@ -646,7 +674,6 @@ MOs <- MOs %>%
mutate(mo = paste0(mo, "_HMNS"), mutate(mo = paste0(mo, "_HMNS"),
fullname = paste(fullname, "hominis"), fullname = paste(fullname, "hominis"),
species = "hominis", species = "hominis",
col_id = NA,
source = "manually added", source = "manually added",
ref = NA_character_, ref = NA_character_,
species_id = ""), species_id = ""),
@ -654,7 +681,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(fullname == "Dientamoeba") %>% filter(fullname == "Dientamoeba") %>%
mutate(mo = gsub("(.*?)_.*", "\\1_THMNS", mo), mutate(mo = gsub("(.*?)_.*", "\\1_THMNS", mo),
col_id = NA,
fullname = "Trichomonas", fullname = "Trichomonas",
family = "Trichomonadidae", family = "Trichomonadidae",
genus = "Trichomonas", genus = "Trichomonas",
@ -664,7 +690,6 @@ MOs <- MOs %>%
MOs %>% MOs %>%
filter(fullname == "Dientamoeba fragilis") %>% filter(fullname == "Dientamoeba fragilis") %>%
mutate(mo = gsub("(.*?)_.*", "\\1_THMNS_VAG", mo), mutate(mo = gsub("(.*?)_.*", "\\1_THMNS_VAG", mo),
col_id = NA,
fullname = "Trichomonas vaginalis", fullname = "Trichomonas vaginalis",
family = "Trichomonadidae", family = "Trichomonadidae",
genus = "Trichomonas", genus = "Trichomonas",
@ -675,7 +700,6 @@ MOs <- MOs %>%
MOs %>% # add family as such too MOs %>% # add family as such too
filter(fullname == "Monocercomonadidae") %>% filter(fullname == "Monocercomonadidae") %>%
mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_TRCHMNDD", mo), mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_TRCHMNDD", mo),
col_id = NA,
fullname = "Trichomonadidae", fullname = "Trichomonadidae",
family = "Trichomonadidae", family = "Trichomonadidae",
rank = "family", rank = "family",
@ -760,33 +784,37 @@ new_families <- MOs %>%
filter(order == "Enterobacterales") %>% filter(order == "Enterobacterales") %>%
pull(family) %>% pull(family) %>%
unique() unique()
class(MOs$mo) <- "character"
MOs <- rbind(MOs %>% filter(!(rank == "family" & fullname %in% new_families)), MOs <- MOs %>%
AMR::microorganisms %>% filter(!(rank == "family" & fullname %in% new_families)) %>%
select(-snomed) %>% bind_rows(tibble(mo = paste0("B_[FAM]_",
filter(family == "Enterobacteriaceae" & rank == "family") %>% toupper(abbreviate(new_families,
rbind(., ., ., ., ., ., .) %>% minlength = 8,
mutate(fullname = new_families, use.classes = TRUE,
source = "manually added", method = "both.sides",
ref = "Adeolu et al., 2016", strict = FALSE))),
family = fullname, mo = paste0("B_[FAM]_", fullname = new_families,
toupper(abbreviate(new_families, kingdom = "Bacteria",
minlength = 8, phylum = "Proteobacteria",
use.classes = TRUE, class = "Gammaproteobacteria",
method = "both.sides", order = "Enterobacterales",
strict = FALSE))))) family = new_families,
genus = "",
species = "",
subspecies = "",
rank = "family",
ref = "Adeolu et al., 2016",
species_id = NA_character_,
source = "manually added",
prevalence = 1))
MOs[which(MOs$order == "Enterobacteriales"), "order"] <- "Enterobacterales" MOs[which(MOs$order == "Enterobacteriales"), "order"] <- "Enterobacterales"
MOs[which(MOs$fullname == "Enterobacteriales"), "fullname"] <- "Enterobacterales" MOs[which(MOs$fullname == "Enterobacteriales"), "fullname"] <- "Enterobacterales"
MOs <- MOs %>%
group_by(kingdom) %>%
distinct(fullname, .keep_all = TRUE) %>%
ungroup() %>%
filter(fullname != "")
# add prevalence to old taxonomic names # add prevalence to old taxonomic names
MOs.old <- MOs.old %>% MOs.old <- MOs.old %>%
left_join(MOs %>% select(col_id, prevalence), by = c("col_id_new" = "col_id")) select(-prevalence) %>%
left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname"))
# everything distinct? # everything distinct?
sum(duplicated(MOs$mo)) sum(duplicated(MOs$mo))
@ -797,18 +825,105 @@ colnames(MOs)
MOs %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms$fullname) %>% View() MOs %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms$fullname) %>% View()
MOs.old %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms.old$fullname) %>% View() MOs.old %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms.old$fullname) %>% View()
# and the ones we lost: # and the ones we lost:
AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View() # based on fullname # AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View() # based on fullname
AMR::microorganisms %>% filter(!mo %in% MOs$mo) %>% View() # based on mo AMR::microorganisms %>% filter(!fullname %in% c(MOs$fullname, MOs.old$fullname)) %>% View() # excluding renamed ones
AMR::microorganisms %>% filter(!mo %in% MOs$mo & !fullname %in% MOs$fullname) %>% View() # AMR::microorganisms %>% filter(!mo %in% MOs$mo) %>% View() # based on mo
# AMR::microorganisms %>% filter(!mo %in% MOs$mo & !fullname %in% MOs$fullname) %>% View()
# and these IDs have changed: # and these IDs have changed:
old_new <- MOs %>% old_new <- MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>% mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
filter(kingdom_fullname %in% (AMR::microorganisms %>% mutate(kingdom_fullname = paste(kingdom, fullname)) %>% pull(kingdom_fullname))) %>% filter(kingdom_fullname %in% (AMR::microorganisms %>%
left_join(AMR::microorganisms %>% mutate(kingdom_fullname = paste(kingdom, fullname)) %>% select(mo, kingdom_fullname), by = "kingdom_fullname", suffix = c("_new", "_old")) %>% mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
pull(kingdom_fullname))) %>%
left_join(AMR::microorganisms %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
select(mo, kingdom_fullname), by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
filter(mo_new != mo_old) %>% filter(mo_new != mo_old) %>%
select(mo_old, mo_new, everything()) select(mo_old, mo_new, everything())
View(old_new) View(old_new)
# set new MO codes as names to existing data sets
rsi_translation$mo <- mo_name(rsi_translation$mo, language = NULL)
microorganisms.codes$mo <- mo_name(microorganisms.codes$mo, language = NULL)
microorganisms.translation <- AMR:::microorganisms.translation %>%
bind_rows(tibble(mo_old = AMR:::microorganisms.translation$mo_new, mo_new = mo_old)) %>%
filter(!mo_old %in% MOs$mo) %>%
mutate(mo_new = mo_name(mo_new, language = NULL)) %>%
bind_rows(old_new %>% select(mo_old, mo_new)) %>%
distinct(mo_old, .keep_all = TRUE)
# arrange the data sets to save
MOs <- MOs %>% arrange(fullname)
MOs.old <- MOs.old %>% arrange(fullname)
# transform
MOs <- as.data.frame(MOs, stringsAsFactors = FALSE)
MOs.old <- as.data.frame(MOs.old, stringsAsFactors = FALSE)
microorganisms.codes <- as.data.frame(microorganisms.codes, stringsAsFactors = FALSE)
class(MOs$mo) <- c("mo", "character")
# SAVE
### for same server
microorganisms <- dataset_UTF8_to_ASCII(MOs)
microorganisms.old <- dataset_UTF8_to_ASCII(MOs.old)
### for other server
saveRDS(MOs, "microorganisms.rds")
saveRDS(MOs.old, "microorganisms.old.rds")
saveRDS(microorganisms.codes, "microorganisms.codes.rds")
# on the server, do:
usethis::use_data(microorganisms, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2)
rm(microorganisms)
rm(microorganisms.old)
# load new data sets
devtools::load_all(".")
# reset previously changed mo codes
rsi_translation$mo <- as.mo(rsi_translation$mo)
microorganisms.codes$mo <- as.mo(microorganisms.codes$mo)
class(microorganisms.codes$mo) <- c("mo", "character")
microorganisms.translation <- microorganisms.translation %>%
left_join(microorganisms.old[, c("fullname", "fullname_new")], # microorganisms.old is now new and loaded
by = c("mo_new" = "fullname")) %>%
mutate(name = ifelse(!is.na(fullname_new), fullname_new, mo_new)) %>%
left_join(microorganisms[, c("fullname", "mo")], # as is microorganisms
by = c("name" = "fullname")) %>%
select(mo_old, mo_new = mo) %>%
filter(!is.na(mo_old), !is.na(mo_new))
class(microorganisms.translation$mo_old) <- "character" # no class <mo> since those aren't valid MO codes
class(microorganisms.translation$mo_new) <- c("mo", "character")
# save those to the package
usethis::use_data(rsi_translation, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2)
saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
# to save microorganisms.translation internally to the package
source("data-raw/internals.R")
# load new data sets again
devtools::load_all(".")
# and check: these codes should not be missing (will otherwise throw a unit test error):
AMR::microorganisms.codes %>% filter(!mo %in% MOs$mo)
AMR::rsi_translation %>% filter(!mo %in% MOs$mo)
AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo)
# update the example_isolates data set
example_isolates$mo <- as.mo(example_isolates$mo)
usethis::use_data(example_isolates, overwrite = TRUE)
# Don't forget to add SNOMED codes! (data-raw/snomed.R)
# run the unit tests
testthat::test_file("tests/testthat/test-data.R")
testthat::test_file("tests/testthat/test-mo.R")
testthat::test_file("tests/testthat/test-mo_property.R")
# OLD CODE ----------------------------------------------------------------
# to keep all the old IDs: # to keep all the old IDs:
# MOs <- MOs %>% filter(!mo %in% old_new$mo_new) %>% # MOs <- MOs %>% filter(!mo %in% old_new$mo_new) %>%
# rbind(microorganisms %>% # rbind(microorganisms %>%
@ -816,79 +931,32 @@ View(old_new)
# select(mo, fullname) %>% # select(mo, fullname) %>%
# left_join(MOs %>% # left_join(MOs %>%
# select(-mo), by = "fullname")) # select(-mo), by = "fullname"))
# and these codes are now missing (which will throw a unit test error):
AMR::microorganisms.codes %>% filter(!mo %in% MOs$mo)
AMR::rsi_translation %>% filter(!mo %in% MOs$mo)
AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo) %>% View()
# this is how to fix it # this is how to fix it
microorganisms.codes <- AMR::microorganisms.codes %>% # microorganisms.codes <- AMR::microorganisms.codes %>%
left_join(MOs %>% # left_join(MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>% # mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
left_join(AMR::microorganisms %>% # left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)), # transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>% # by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new), # select(mo_old, mo_new),
by = c("mo" = "mo_old")) %>% # by = c("mo" = "mo_old")) %>%
select(code, mo = mo_new) %>% # select(code, mo = mo_new) %>%
filter(!is.na(mo)) # filter(!is.na(mo))
microorganisms.codes %>% filter(!mo %in% MOs$mo) # microorganisms.codes %>% filter(!mo %in% MOs$mo)
# and for microorganisms.translation: # # and for microorganisms.translation:
microorganisms.translation <- AMR:::microorganisms.translation %>% # microorganisms.translation <- AMR:::microorganisms.translation %>%
select(mo = mo_new) %>% # select(mo = mo_new) %>%
left_join(AMR::microorganisms %>% # left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)), # transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>% # by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new) # select(mo_old, mo_new)
left_join(MOs %>% # left_join(MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>% # mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
left_join(AMR::microorganisms %>% # left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)), # transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>% # by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new), # select(mo_old, mo_new),
by = c("mo" = "mo_old")) %>% # by = c("mo" = "mo_old")) %>%
select(code, mo = mo_new) %>% # select(code, mo = mo_new) %>%
filter(!is.na(mo)) # filter(!is.na(mo))
microorganisms.codes %>% filter(!mo %in% MOs$mo) # microorganisms.codes %>% filter(!mo %in% MOs$mo)
# arrange
MOs <- MOs %>% arrange(fullname)
MOs.old <- MOs.old %>% arrange(fullname)
microorganisms.codes <- microorganisms.codes %>% arrange(code)
# transform
MOs <- as.data.frame(MOs, stringsAsFactors = FALSE)
MOs.old <- as.data.frame(MOs.old, stringsAsFactors = FALSE)
microorganisms.codes <- as.data.frame(microorganisms.codes, stringsAsFactors = FALSE)
class(MOs$mo) <- "mo"
class(microorganisms.codes$mo) <- "mo"
MOs$col_id <- as.integer(MOs$col_id)
MOs.old$col_id <- as.integer(MOs.old$col_id)
MOs.old$col_id_new <- as.integer(MOs.old$col_id_new)
# SAVE
### for other server
saveRDS(MOs, "microorganisms.rds")
saveRDS(MOs.old, "microorganisms.old.rds")
saveRDS(microorganisms.codes, "microorganisms.codes.rds")
### for same server
microorganisms <- MOs
microorganisms.old <- MOs.old
microorganisms.translation <- old_new %>% select(mo_old, mo_new) %>% as.data.frame()
class(microorganisms.translation$mo_old) <- "mo"
class(microorganisms.translation$mo_new) <- "mo"
# on the server, do:
usethis::use_data(microorganisms, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2)
saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2) # this one will be covered in data-raw/internals.R
rm(microorganisms)
rm(microorganisms.old)
rm(microorganisms.codes)
rm(microorganisms.translation)
devtools::load_all(".")
# TO DO AFTER THIS
# * Rerun data-raw/reproduction_of_rsi_translation.R
# * Run unit tests

File diff suppressed because it is too large Load Diff

View File

@ -22,16 +22,17 @@
library(AMR) library(AMR)
library(tidyverse) library(tidyverse)
# go to https://www.nictiz.nl/standaardisatie/terminologiecentrum/referentielijsten/micro-organismen/ # go to https://www.nictiz.nl/standaardisatie/terminologiecentrum/referentielijsten/micro-organismen/ (Ctrl/Cmd + A in table)
# read the table from clipboard # read the table from clipboard
snomed <- clipr::read_clip_tbl() snomed <- clipr::read_clip_tbl(skip = 2)
# snomed <- snomed %>%
# transmute(fullname = trimws(gsub("^genus", "", Omschrijving, ignore.case = TRUE)),
# snomed = as.integer(Id))
snomed <- snomed %>% snomed <- snomed %>%
transmute(fullname = mo_name(Omschrijving), dplyr::filter(gsub("(^genus |^familie |^stam |ss.? |subsp.? |subspecies )", "",
Omschrijving.,
ignore.case = TRUE) %in% c(microorganisms$fullname,
microorganisms.old$fullname)) %>%
dplyr::transmute(fullname = mo_name(Omschrijving.),
snomed = as.integer(Id)) %>% snomed = as.integer(Id)) %>%
filter(!fullname %like% "unknown") dplyr::filter(!fullname %like% "unknown")
snomed_trans <- snomed %>% snomed_trans <- snomed %>%
group_by(fullname) %>% group_by(fullname) %>%
mutate(snomed_list = list(snomed)) %>% mutate(snomed_list = list(snomed)) %>%
@ -51,59 +52,59 @@ rm(microorganisms)
# OLD --------------------------------------------------------------------- # OLD ---------------------------------------------------------------------
baseUrl <- 'https://browser.ihtsdotools.org/snowstorm/snomed-ct' # baseUrl <- 'https://browser.ihtsdotools.org/snowstorm/snomed-ct'
edition <- 'MAIN' # edition <- 'MAIN'
version <- '2019-07-31' # version <- '2019-07-31'
#
microorganisms.snomed <- data.frame(conceptid = character(0), # microorganisms.snomed <- data.frame(conceptid = character(0),
mo = character(0), # mo = character(0),
stringsAsFactors = FALSE) # stringsAsFactors = FALSE)
microorganisms$snomed <- "" # microorganisms$snomed <- ""
#
# for (i in 1:50) { # # for (i in 1:50) {
for (i in 1:1000) { # for (i in 1:1000) {
#
if (i %% 10 == 0) { # if (i %% 10 == 0) {
cat(paste0(i, " - ", cleaner::percentage(i / nrow(microorganisms)), "\n")) # cat(paste0(i, " - ", cleaner::percentage(i / nrow(microorganisms)), "\n"))
} # }
#
mo_data <- microorganisms %>% # mo_data <- microorganisms %>%
filter(mo == microorganisms$mo[i]) %>% # filter(mo == microorganisms$mo[i]) %>%
as.list() # as.list()
#
if (!mo_data$rank %in% c("genus", "species")) { # if (!mo_data$rank %in% c("genus", "species")) {
next # next
} # }
#
searchTerm <- paste0( # searchTerm <- paste0(
ifelse(mo_data$rank == "genus", "Genus ", ""), # ifelse(mo_data$rank == "genus", "Genus ", ""),
mo_data$fullname, # mo_data$fullname,
" (organism)") # " (organism)")
#
url <- paste0(baseUrl, '/browser/', # url <- paste0(baseUrl, '/browser/',
edition, '/', # edition, '/',
version, # version,
'/descriptions?term=', curl::curl_escape(searchTerm), # '/descriptions?term=', curl::curl_escape(searchTerm),
'&mode=fullText&activeFilter=true&limit=', 250) # '&mode=fullText&activeFilter=true&limit=', 250)
results <- url %>% # results <- url %>%
httr::GET() %>% # httr::GET() %>%
httr::content(type = "text", encoding = "UTF-8") %>% # httr::content(type = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON(flatten = TRUE) %>% # jsonlite::fromJSON(flatten = TRUE) %>%
.$items # .$items
if (NROW(results) == 0) { # if (NROW(results) == 0) {
next # next
} else { # } else {
message("Adding ", crayon::italic(mo_data$fullname)) # message("Adding ", crayon::italic(mo_data$fullname))
} # }
#
tryCatch( # tryCatch(
microorganisms$snomed[i] <- results %>% filter(term == searchTerm) %>% pull(concept.conceptId), # microorganisms$snomed[i] <- results %>% filter(term == searchTerm) %>% pull(concept.conceptId),
error = function(e) invisible() # error = function(e) invisible()
) # )
#
if (nrow(results) > 1) { # if (nrow(results) > 1) {
microorganisms.snomed <- microorganisms.snomed %>% # microorganisms.snomed <- microorganisms.snomed %>%
bind_rows(tibble(conceptid = results %>% filter(term != searchTerm) %>% pull(concept.conceptId) %>% unique(), # bind_rows(tibble(conceptid = results %>% filter(term != searchTerm) %>% pull(concept.conceptId) %>% unique(),
mo = as.character(mo_data$mo))) # mo = as.character(mo_data$mo)))
} # }
} # }

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -81,7 +81,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a> <a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>

View File

@ -81,7 +81,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a> <a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>

View File

@ -81,7 +81,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>

View File

@ -81,7 +81,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a> <a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>

View File

@ -43,7 +43,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a> <a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>
@ -221,7 +221,7 @@ A methods paper about this package has been preprinted at bioRxiv (DOI: 10.1101/
<a href="#what-can-you-do-with-this-package" class="anchor"></a>What can you do with this package?</h3> <a href="#what-can-you-do-with-this-package" class="anchor"></a>What can you do with this package?</h3>
<p>This package can be used for:</p> <p>This package can be used for:</p>
<ul> <ul>
<li>Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the <a href="http://www.catalogueoflife.org">Catalogue of Life</a> (<a href="./reference/mo_property.html">manual</a>)</li> <li>Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the <a href="http://www.catalogueoflife.org">Catalogue of Life</a> and <a href="https://lpsn.dsmz.de">List of Prokaryotic names with Standing in Nomenclature</a> (<a href="./reference/mo_property.html">manual</a>)</li>
<li>Interpreting raw MIC and disk diffusion values, based on the latest CLSI or EUCAST guidelines (<a href="./reference/as.rsi.html">manual</a>)</li> <li>Interpreting raw MIC and disk diffusion values, based on the latest CLSI or EUCAST guidelines (<a href="./reference/as.rsi.html">manual</a>)</li>
<li>Determining first isolates to be used for AMR analysis (<a href="./reference/first_isolate.html">manual</a>)</li> <li>Determining first isolates to be used for AMR analysis (<a href="./reference/first_isolate.html">manual</a>)</li>
<li>Calculating antimicrobial resistance (<a href="./articles/AMR.html">tutorial</a>)</li> <li>Calculating antimicrobial resistance (<a href="./articles/AMR.html">tutorial</a>)</li>
@ -268,7 +268,7 @@ A methods paper about this package has been preprinted at bioRxiv (DOI: 10.1101/
<div id="microbial-taxonomic-reference-data" class="section level4"> <div id="microbial-taxonomic-reference-data" class="section level4">
<h4 class="hasAnchor"> <h4 class="hasAnchor">
<a href="#microbial-taxonomic-reference-data" class="anchor"></a>Microbial (taxonomic) reference data</h4> <a href="#microbial-taxonomic-reference-data" class="anchor"></a>Microbial (taxonomic) reference data</h4>
<p>This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, <a href="http://www.catalogueoflife.org">www.catalogueoflife.org</a>), supplemented by data from the Deutsche Sammlung von Mikroorganismen und Zellkulturen (DSMZ, <a href="https://www.dsmz.de">www.dsmz.de</a>). This supplementation is needed until the <a href="https://github.com/Sp2000/colplus">CoL+ project</a> is finished, which we await. With <code><a href="reference/catalogue_of_life_version.html">catalogue_of_life_version()</a></code> can be checked which version of the CoL is included in this package.</p> <p>This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, <a href="http://www.catalogueoflife.org">www.catalogueoflife.org</a>), supplemented by data from the List of Prokaryotic names with Standing in Nomenclature (LPSN, <a href="https://lpsn.dsmz.de">lpsn.dsmz.de</a>). This supplementation is needed until the <a href="https://github.com/Sp2000/colplus">CoL+ project</a> is finished, which we await. With <code><a href="reference/catalogue_of_life_version.html">catalogue_of_life_version()</a></code> can be checked which version of the CoL is included in this package.</p>
<p>Read more about which data from the Catalogue of Life <a href="./reference/catalogue_of_life.html">in our manual</a>.</p> <p>Read more about which data from the Catalogue of Life <a href="./reference/catalogue_of_life.html">in our manual</a>.</p>
</div> </div>
<div id="antimicrobial-reference-data" class="section level4"> <div id="antimicrobial-reference-data" class="section level4">

View File

@ -81,7 +81,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>
@ -229,13 +229,13 @@
<small>Source: <a href='https://gitlab.com/msberends/AMR/blob/master/NEWS.md'><code>NEWS.md</code></a></small> <small>Source: <a href='https://gitlab.com/msberends/AMR/blob/master/NEWS.md'><code>NEWS.md</code></a></small>
</div> </div>
<div id="amr-1-1-0-9019" class="section level1"> <div id="amr-1-1-0-9020" class="section level1">
<h1 class="page-header" data-toc-text="1.1.0.9019"> <h1 class="page-header" data-toc-text="1.1.0.9020">
<a href="#amr-1-1-0-9019" class="anchor"></a>AMR 1.1.0.9019<small> Unreleased </small> <a href="#amr-1-1-0-9020" class="anchor"></a>AMR 1.1.0.9020<small> Unreleased </small>
</h1> </h1>
<div id="last-updated-25-may-2020" class="section level2"> <div id="last-updated-27-may-2020" class="section level2">
<h2 class="hasAnchor"> <h2 class="hasAnchor">
<a href="#last-updated-25-may-2020" class="anchor"></a><small>Last updated: 25-May-2020</small> <a href="#last-updated-27-may-2020" class="anchor"></a><small>Last updated: 27-May-2020</small>
</h2> </h2>
<div id="breaking" class="section level3"> <div id="breaking" class="section level3">
<h3 class="hasAnchor"> <h3 class="hasAnchor">
@ -258,9 +258,22 @@ Negative effects of this change are:
<h3 class="hasAnchor"> <h3 class="hasAnchor">
<a href="#changed" class="anchor"></a>Changed</h3> <a href="#changed" class="anchor"></a>Changed</h3>
<ul> <ul>
<li>Taxonomy:
<ul>
<li>Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020)</li>
<li>Removed the Catalogue of Life IDs (like 776351), since they now work with a species ID (hexadecimal string)</li>
</ul>
</li>
<li>EUCAST rules: <li>EUCAST rules:
<ul> <ul>
<li>The <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code> function no longer applies “other” rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzym inhibitor = R). The default input value for <code>rules</code> is now <code><a href="https://rdrr.io/r/base/c.html">c("breakpoints", "expert")</a></code> instead of <code>"all"</code>, but this can be changed by the user. To return to the old behaviour, set <code><a href="https://rdrr.io/r/base/options.html">options(AMR.eucast_rules = "all")</a></code>.</li> <li>The <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code> function no longer applies “other” rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzyme inhibitor = R). The default input value for <code>rules</code> is now <code><a href="https://rdrr.io/r/base/c.html">c("breakpoints", "expert")</a></code> instead of <code>"all"</code>, but this can be changed by the user. To return to the old behaviour, set <code><a href="https://rdrr.io/r/base/options.html">options(AMR.eucast_rules = "all")</a></code>.</li>
<li>Fixed a bug where checking antimicrobial results in the original data were not regarded as valid R/SI values</li>
<li>All “other” rules now apply for all drug combinations in the <code>antibiotics</code> data set these two rules:
<ol>
<li>A drug <strong>with</strong> enzyme inhibitor will be set to S if the drug <strong>without</strong> enzyme inhibitor is S</li>
<li>A drug <strong>without</strong> enzyme inhibitor will be set to R if the drug <strong>with</strong> enzyme inhibitor is R</li>
</ol>
This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/avibactam, trimethoprim/sulfamethoxazole, etc.</li>
<li>Added official drug names to verbose output of <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code> <li>Added official drug names to verbose output of <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code>
</li> </li>
</ul> </ul>
@ -271,6 +284,7 @@ Negative effects of this change are:
<li>Small fix for some text input that could not be coerced as valid MIC values</li> <li>Small fix for some text input that could not be coerced as valid MIC values</li>
<li>Fix for interpretation of generic CLSI interpretation rules (thanks to Anthony Underwood)</li> <li>Fix for interpretation of generic CLSI interpretation rules (thanks to Anthony Underwood)</li>
<li>Fix for <code><a href="../reference/mo_source.html">set_mo_source()</a></code> to make sure that column <code>mo</code> will always be the second column</li> <li>Fix for <code><a href="../reference/mo_source.html">set_mo_source()</a></code> to make sure that column <code>mo</code> will always be the second column</li>
<li>Added abbreviation “cfsc” for Cefoxitin and “cfav” for Ceftazidime/avibactam</li>
</ul> </ul>
</div> </div>
<div id="other" class="section level3"> <div id="other" class="section level3">

View File

@ -10,7 +10,7 @@ articles:
WHONET: WHONET.html WHONET: WHONET.html
benchmarks: benchmarks.html benchmarks: benchmarks.html
resistance_predict: resistance_predict.html resistance_predict: resistance_predict.html
last_built: 2020-05-24T22:55Z last_built: 2020-05-27T14:37Z
urls: urls:
reference: https://msberends.gitlab.io/AMR/reference reference: https://msberends.gitlab.io/AMR/reference
article: https://msberends.gitlab.io/AMR/articles article: https://msberends.gitlab.io/AMR/articles

View File

@ -82,7 +82,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>

View File

@ -82,7 +82,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>

View File

@ -82,7 +82,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>
@ -399,7 +399,6 @@ This package contains the complete taxonomic tree of almost all microorganisms (
<span class='fu'>as.mo</span>(<span class='st'>"MRSA"</span>) <span class='co'># Methicillin Resistant S. aureus</span> <span class='fu'>as.mo</span>(<span class='st'>"MRSA"</span>) <span class='co'># Methicillin Resistant S. aureus</span>
<span class='fu'>as.mo</span>(<span class='st'>"VISA"</span>) <span class='co'># Vancomycin Intermediate S. aureus</span> <span class='fu'>as.mo</span>(<span class='st'>"VISA"</span>) <span class='co'># Vancomycin Intermediate S. aureus</span>
<span class='fu'>as.mo</span>(<span class='st'>"VRSA"</span>) <span class='co'># Vancomycin Resistant S. aureus</span> <span class='fu'>as.mo</span>(<span class='st'>"VRSA"</span>) <span class='co'># Vancomycin Resistant S. aureus</span>
<span class='fu'>as.mo</span>(<span class='fl'>22242419</span>) <span class='co'># Catalogue of Life ID</span>
<span class='fu'>as.mo</span>(<span class='fl'>115329001</span>) <span class='co'># SNOMED CT code</span> <span class='fu'>as.mo</span>(<span class='fl'>115329001</span>) <span class='co'># SNOMED CT code</span>
<span class='co'># Dyslexia is no problem - these all work:</span> <span class='co'># Dyslexia is no problem - these all work:</span>

View File

@ -82,7 +82,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>

View File

@ -82,7 +82,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>
@ -275,9 +275,9 @@ Function <code><a href='as.mo.html'>as.mo()</a></code> to use the data for intel
<span class='co'># Get a note when a species was renamed</span> <span class='co'># Get a note when a species was renamed</span>
<span class='fu'><a href='mo_property.html'>mo_shortname</a></span>(<span class='st'>"Chlamydia psittaci"</span>) <span class='fu'><a href='mo_property.html'>mo_shortname</a></span>(<span class='st'>"Chlamydophila psittaci"</span>)
<span class='co'># Note: 'Chlamydia psittaci' (Page, 1968) was renamed</span> <span class='co'># Note: 'Chlamydophila psittaci' (Everett et al., 1999) was renamed back to</span>
<span class='co'># 'Chlamydophila psittaci' (Everett et al., 1999)</span> <span class='co'># 'Chlamydia psittaci' (Page, 1968)</span>
<span class='co'># [1] "C. psittaci"</span> <span class='co'># [1] "C. psittaci"</span>
<span class='co'># Get any property from the entire taxonomic tree for all included species</span> <span class='co'># Get any property from the entire taxonomic tree for all included species</span>
@ -295,9 +295,9 @@ Function <code><a href='as.mo.html'>as.mo()</a></code> to use the data for intel
<span class='co'># Do not get mistaken - this package is about microorganisms</span> <span class='co'># Do not get mistaken - this package is about microorganisms</span>
<span class='fu'><a href='mo_property.html'>mo_kingdom</a></span>(<span class='st'>"C. elegans"</span>) <span class='fu'><a href='mo_property.html'>mo_kingdom</a></span>(<span class='st'>"C. elegans"</span>)
<span class='co'># [1] "Bacteria" # Bacteria?!</span> <span class='co'># [1] "Fungi" # Fungi?!</span>
<span class='fu'><a href='mo_property.html'>mo_name</a></span>(<span class='st'>"C. elegans"</span>) <span class='fu'><a href='mo_property.html'>mo_name</a></span>(<span class='st'>"C. elegans"</span>)
<span class='co'># [1] "Chroococcus limneticus elegans" # Because a microorganism was found</span></pre> <span class='co'># [1] "Cladosporium elegans" # Because a microorganism was found</span></pre>
</div> </div>
<div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar"> <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
<nav id="toc" data-toggle="toc" class="sticky-top"> <nav id="toc" data-toggle="toc" class="sticky-top">
@ -313,7 +313,7 @@ Function <code><a href='as.mo.html'>as.mo()</a></code> to use the data for intel
</div> </div>
<div class="pkgdown"> <div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.0.</p> <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.1.</p>
</div> </div>
</footer> </footer>

View File

@ -81,7 +81,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>
@ -472,7 +472,7 @@
<td> <td>
<p><code><a href="microorganisms.html">microorganisms</a></code> </p> <p><code><a href="microorganisms.html">microorganisms</a></code> </p>
</td> </td>
<td><p>Data set with 69,447 microorganisms</p></td> <td><p>Data set with 67,107 microorganisms</p></td>
</tr><tr> </tr><tr>
<td> <td>
@ -502,7 +502,7 @@
<td> <td>
<p><code><a href="microorganisms.codes.html">microorganisms.codes</a></code> </p> <p><code><a href="microorganisms.codes.html">microorganisms.codes</a></code> </p>
</td> </td>
<td><p>Translation table for common microorganism codes</p></td> <td><p>Translation table with 5,582 common microorganism codes</p></td>
</tr><tr> </tr><tr>
<td> <td>

View File

@ -6,7 +6,7 @@
<meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Translation table for common microorganism codes — microorganisms.codes • AMR (for R)</title> <title>Translation table with 5,582 common microorganism codes — microorganisms.codes • AMR (for R)</title>
<!-- favicons --> <!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"> <link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
@ -48,7 +48,7 @@
<link href="../extra.css" rel="stylesheet"> <link href="../extra.css" rel="stylesheet">
<script src="../extra.js"></script> <script src="../extra.js"></script>
<meta property="og:title" content="Translation table for common microorganism codes — microorganisms.codes" /> <meta property="og:title" content="Translation table with 5,582 common microorganism codes — microorganisms.codes" />
<meta property="og:description" content="A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with set_mo_source(). They will all be searched when using as.mo() and consequently all the mo_* functions." /> <meta property="og:description" content="A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with set_mo_source(). They will all be searched when using as.mo() and consequently all the mo_* functions." />
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.svg" /> <meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.svg" />
@ -82,7 +82,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>
@ -226,7 +226,7 @@
<div class="row"> <div class="row">
<div class="col-md-9 contents"> <div class="col-md-9 contents">
<div class="page-header"> <div class="page-header">
<h1>Translation table for common microorganism codes</h1> <h1>Translation table with 5,582 common microorganism codes</h1>
<small class="dont-index">Source: <a href='https://gitlab.com/msberends/AMR/blob/master/R/data.R'><code>R/data.R</code></a></small> <small class="dont-index">Source: <a href='https://gitlab.com/msberends/AMR/blob/master/R/data.R'><code>R/data.R</code></a></small>
<div class="hidden name"><code>microorganisms.codes.Rd</code></div> <div class="hidden name"><code>microorganisms.codes.Rd</code></div>
</div> </div>
@ -240,7 +240,7 @@
<h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2> <h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>
<p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 5,585 observations and 2 variables:</p><ul> <p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 5,582 observations and 2 variables:</p><ul>
<li><p><code>code</code><br /> Commonly used code of a microorganism</p></li> <li><p><code>code</code><br /> Commonly used code of a microorganism</p></li>
<li><p><code>mo</code><br /> ID of the microorganism in the <a href='microorganisms.html'>microorganisms</a> data set</p></li> <li><p><code>mo</code><br /> ID of the microorganism in the <a href='microorganisms.html'>microorganisms</a> data set</p></li>
</ul> </ul>
@ -276,7 +276,7 @@ This package contains the complete taxonomic tree of almost all microorganisms (
</div> </div>
<div class="pkgdown"> <div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.0.</p> <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.1.</p>
</div> </div>
</footer> </footer>

View File

@ -6,7 +6,7 @@
<meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Data set with 69,447 microorganisms — microorganisms • AMR (for R)</title> <title>Data set with 67,107 microorganisms — microorganisms • AMR (for R)</title>
<!-- favicons --> <!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"> <link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
@ -48,7 +48,7 @@
<link href="../extra.css" rel="stylesheet"> <link href="../extra.css" rel="stylesheet">
<script src="../extra.js"></script> <script src="../extra.js"></script>
<meta property="og:title" content="Data set with 69,447 microorganisms — microorganisms" /> <meta property="og:title" content="Data set with 67,107 microorganisms — microorganisms" />
<meta property="og:description" content="A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using as.mo()." /> <meta property="og:description" content="A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using as.mo()." />
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.svg" /> <meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.svg" />
@ -82,7 +82,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9004</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>
@ -226,7 +226,7 @@
<div class="row"> <div class="row">
<div class="col-md-9 contents"> <div class="col-md-9 contents">
<div class="page-header"> <div class="page-header">
<h1>Data set with 69,447 microorganisms</h1> <h1>Data set with 67,107 microorganisms</h1>
<small class="dont-index">Source: <a href='https://gitlab.com/msberends/AMR/blob/master/R/data.R'><code>R/data.R</code></a></small> <small class="dont-index">Source: <a href='https://gitlab.com/msberends/AMR/blob/master/R/data.R'><code>R/data.R</code></a></small>
<div class="hidden name"><code>microorganisms.Rd</code></div> <div class="hidden name"><code>microorganisms.Rd</code></div>
</div> </div>
@ -240,9 +240,8 @@
<h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2> <h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>
<p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 69,447 observations and 17 variables:</p><ul> <p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 67,107 observations and 16 variables:</p><ul>
<li><p><code>mo</code><br /> ID of microorganism as used by this package</p></li> <li><p><code>mo</code><br /> ID of microorganism as used by this package</p></li>
<li><p><code>col_id</code><br /> Catalogue of Life ID</p></li>
<li><p><code>fullname</code><br /> Full name, like <code>"Escherichia coli"</code></p></li> <li><p><code>fullname</code><br /> Full name, like <code>"Escherichia coli"</code></p></li>
<li><p><code>kingdom</code>, <code>phylum</code>, <code>class</code>, <code>order</code>, <code>family</code>, <code>genus</code>, <code>species</code>, <code>subspecies</code><br /> Taxonomic rank of the microorganism</p></li> <li><p><code>kingdom</code>, <code>phylum</code>, <code>class</code>, <code>order</code>, <code>family</code>, <code>genus</code>, <code>species</code>, <code>subspecies</code><br /> Taxonomic rank of the microorganism</p></li>
<li><p><code>rank</code><br /> Text of the taxonomic rank of the microorganism, like <code>"species"</code> or <code>"genus"</code></p></li> <li><p><code>rank</code><br /> Text of the taxonomic rank of the microorganism, like <code>"species"</code> or <code>"genus"</code></p></li>
@ -256,6 +255,7 @@
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2> <h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>
<p>Catalogue of Life: Annual Checklist (public online taxonomic database), <a href='http://www.catalogueoflife.org'>http://www.catalogueoflife.org</a> (check included annual version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p> <p>Catalogue of Life: Annual Checklist (public online taxonomic database), <a href='http://www.catalogueoflife.org'>http://www.catalogueoflife.org</a> (check included annual version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p>
<p>Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786</p>
<p>Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, <a href='https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date'>https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date</a> (check included version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p> <p>Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, <a href='https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date'>https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date</a> (check included version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2> <h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
@ -266,7 +266,7 @@
<li><p>1 entry of <em>Blastocystis</em> (<em>Blastocystis hominis</em>), although it officially does not exist (Noel <em>et al.</em> 2005, PMID 15634993)</p></li> <li><p>1 entry of <em>Blastocystis</em> (<em>Blastocystis hominis</em>), although it officially does not exist (Noel <em>et al.</em> 2005, PMID 15634993)</p></li>
<li><p>5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus)</p></li> <li><p>5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus)</p></li>
<li><p>6 families under the Enterobacterales order, according to Adeolu <em>et al.</em> (2016, PMID 27620848), that are not (yet) in the Catalogue of Life</p></li> <li><p>6 families under the Enterobacterales order, according to Adeolu <em>et al.</em> (2016, PMID 27620848), that are not (yet) in the Catalogue of Life</p></li>
<li><p>12,600 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications</p></li> <li><p>7,368 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications</p></li>
</ul> </ul>
<h3>Direct download</h3> <h3>Direct download</h3>

View File

@ -82,7 +82,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
</span> </span>
</div> </div>
@ -240,10 +240,9 @@
<h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2> <h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>
<p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 24,253 observations and 5 variables:</p><ul> <p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 12,709 observations and 4 variables:</p><ul>
<li><p><code>col_id</code><br /> Catalogue of Life ID that was originally given</p></li>
<li><p><code>col_id_new</code><br /> New Catalogue of Life ID that responds to an entry in the <a href='microorganisms.html'>microorganisms</a> data set</p></li>
<li><p><code>fullname</code><br /> Old full taxonomic name of the microorganism</p></li> <li><p><code>fullname</code><br /> Old full taxonomic name of the microorganism</p></li>
<li><p><code>fullname_new</code><br /> New full taxonomic name of the microorganism</p></li>
<li><p><code>ref</code><br /> Author(s) and year of concerning scientific publication</p></li> <li><p><code>ref</code><br /> Author(s) and year of concerning scientific publication</p></li>
<li><p><code>prevalence</code><br /> Prevalence of the microorganism, see <code><a href='as.mo.html'>as.mo()</a></code></p></li> <li><p><code>prevalence</code><br /> Prevalence of the microorganism, see <code><a href='as.mo.html'>as.mo()</a></code></p></li>
</ul> </ul>
@ -251,6 +250,7 @@
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2> <h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>
<p>Catalogue of Life: Annual Checklist (public online taxonomic database), <a href='http://www.catalogueoflife.org'>http://www.catalogueoflife.org</a> (check included annual version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p> <p>Catalogue of Life: Annual Checklist (public online taxonomic database), <a href='http://www.catalogueoflife.org'>http://www.catalogueoflife.org</a> (check included annual version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p>
<p>Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786</p>
<h2 class="hasAnchor" id="catalogue-of-life"><a class="anchor" href="#catalogue-of-life"></a>Catalogue of Life</h2> <h2 class="hasAnchor" id="catalogue-of-life"><a class="anchor" href="#catalogue-of-life"></a>Catalogue of Life</h2>
@ -282,7 +282,7 @@ This package contains the complete taxonomic tree of almost all microorganisms (
</div> </div>
<div class="pkgdown"> <div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.0.</p> <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.1.</p>
</div> </div>
</footer> </footer>

View File

@ -36,7 +36,7 @@ The development of this package is part of, related to, or made possible by:
This package can be used for: This package can be used for:
* Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the [Catalogue of Life](http://www.catalogueoflife.org) ([manual](./reference/mo_property.html)) * Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the [Catalogue of Life](http://www.catalogueoflife.org) and [List of Prokaryotic names with Standing in Nomenclature](https://lpsn.dsmz.de) ([manual](./reference/mo_property.html))
* Interpreting raw MIC and disk diffusion values, based on the latest CLSI or EUCAST guidelines ([manual](./reference/as.rsi.html)) * Interpreting raw MIC and disk diffusion values, based on the latest CLSI or EUCAST guidelines ([manual](./reference/as.rsi.html))
* Determining first isolates to be used for AMR analysis ([manual](./reference/first_isolate.html)) * Determining first isolates to be used for AMR analysis ([manual](./reference/first_isolate.html))
* Calculating antimicrobial resistance ([tutorial](./articles/AMR.html)) * Calculating antimicrobial resistance ([tutorial](./articles/AMR.html))
@ -82,7 +82,7 @@ To find out how to conduct AMR analysis, please [continue reading here to get st
#### Microbial (taxonomic) reference data #### Microbial (taxonomic) reference data
This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, [www.catalogueoflife.org](http://www.catalogueoflife.org)), supplemented by data from the Deutsche Sammlung von Mikroorganismen und Zellkulturen (DSMZ, [www.dsmz.de](https://www.dsmz.de)). This supplementation is needed until the [CoL+ project](https://github.com/Sp2000/colplus) is finished, which we await. With `catalogue_of_life_version()` can be checked which version of the CoL is included in this package. This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, [www.catalogueoflife.org](http://www.catalogueoflife.org)), supplemented by data from the List of Prokaryotic names with Standing in Nomenclature (LPSN, [lpsn.dsmz.de](https://lpsn.dsmz.de)). This supplementation is needed until the [CoL+ project](https://github.com/Sp2000/colplus) is finished, which we await. With `catalogue_of_life_version()` can be checked which version of the CoL is included in this package.
Read more about which data from the Catalogue of Life [in our manual](./reference/catalogue_of_life.html). Read more about which data from the Catalogue of Life [in our manual](./reference/catalogue_of_life.html).

View File

@ -166,7 +166,6 @@ as.mo("Zthafilokkoockus oureuz") # handles incorrect spelling
as.mo("MRSA") # Methicillin Resistant S. aureus as.mo("MRSA") # Methicillin Resistant S. aureus
as.mo("VISA") # Vancomycin Intermediate S. aureus as.mo("VISA") # Vancomycin Intermediate S. aureus
as.mo("VRSA") # Vancomycin Resistant S. aureus as.mo("VRSA") # Vancomycin Resistant S. aureus
as.mo(22242419) # Catalogue of Life ID
as.mo(115329001) # SNOMED CT code as.mo(115329001) # SNOMED CT code
# Dyslexia is no problem - these all work: # Dyslexia is no problem - these all work:

View File

@ -42,9 +42,9 @@ catalogue_of_life_version()
# Get a note when a species was renamed # Get a note when a species was renamed
mo_shortname("Chlamydia psittaci") mo_shortname("Chlamydophila psittaci")
# Note: 'Chlamydia psittaci' (Page, 1968) was renamed # Note: 'Chlamydophila psittaci' (Everett et al., 1999) was renamed back to
# 'Chlamydophila psittaci' (Everett et al., 1999) # 'Chlamydia psittaci' (Page, 1968)
# [1] "C. psittaci" # [1] "C. psittaci"
# Get any property from the entire taxonomic tree for all included species # Get any property from the entire taxonomic tree for all included species
@ -62,9 +62,9 @@ mo_ref("E. coli")
# Do not get mistaken - this package is about microorganisms # Do not get mistaken - this package is about microorganisms
mo_kingdom("C. elegans") mo_kingdom("C. elegans")
# [1] "Bacteria" # Bacteria?! # [1] "Fungi" # Fungi?!
mo_name("C. elegans") mo_name("C. elegans")
# [1] "Chroococcus limneticus elegans" # Because a microorganism was found # [1] "Cladosporium elegans" # Because a microorganism was found
} }
\seealso{ \seealso{
Data set \link{microorganisms} for the actual data. \cr Data set \link{microorganisms} for the actual data. \cr

View File

@ -3,12 +3,11 @@
\docType{data} \docType{data}
\name{microorganisms} \name{microorganisms}
\alias{microorganisms} \alias{microorganisms}
\title{Data set with 69,447 microorganisms} \title{Data set with 67,107 microorganisms}
\format{ \format{
A \code{\link{data.frame}} with 69,447 observations and 17 variables: A \code{\link{data.frame}} with 67,107 observations and 16 variables:
\itemize{ \itemize{
\item \code{mo}\cr ID of microorganism as used by this package \item \code{mo}\cr ID of microorganism as used by this package
\item \code{col_id}\cr Catalogue of Life ID
\item \code{fullname}\cr Full name, like \code{"Escherichia coli"} \item \code{fullname}\cr Full name, like \code{"Escherichia coli"}
\item \code{kingdom}, \code{phylum}, \code{class}, \code{order}, \code{family}, \code{genus}, \code{species}, \code{subspecies}\cr Taxonomic rank of the microorganism \item \code{kingdom}, \code{phylum}, \code{class}, \code{order}, \code{family}, \code{genus}, \code{species}, \code{subspecies}\cr Taxonomic rank of the microorganism
\item \code{rank}\cr Text of the taxonomic rank of the microorganism, like \code{"species"} or \code{"genus"} \item \code{rank}\cr Text of the taxonomic rank of the microorganism, like \code{"species"} or \code{"genus"}
@ -22,6 +21,8 @@ A \code{\link{data.frame}} with 69,447 observations and 17 variables:
\source{ \source{
Catalogue of Life: Annual Checklist (public online taxonomic database), \url{http://www.catalogueoflife.org} (check included annual version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}). Catalogue of Life: Annual Checklist (public online taxonomic database), \url{http://www.catalogueoflife.org} (check included annual version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}).
Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, \url{https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date} (check included version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}). Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, \url{https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date} (check included version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}).
} }
\usage{ \usage{
@ -39,7 +40,7 @@ Manually added were:
\item 1 entry of \emph{Blastocystis} (\emph{Blastocystis hominis}), although it officially does not exist (Noel \emph{et al.} 2005, PMID 15634993) \item 1 entry of \emph{Blastocystis} (\emph{Blastocystis hominis}), although it officially does not exist (Noel \emph{et al.} 2005, PMID 15634993)
\item 5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus) \item 5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus)
\item 6 families under the Enterobacterales order, according to Adeolu \emph{et al.} (2016, PMID 27620848), that are not (yet) in the Catalogue of Life \item 6 families under the Enterobacterales order, according to Adeolu \emph{et al.} (2016, PMID 27620848), that are not (yet) in the Catalogue of Life
\item 12,600 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications \item 7,368 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications
} }
\subsection{Direct download}{ \subsection{Direct download}{

View File

@ -3,9 +3,9 @@
\docType{data} \docType{data}
\name{microorganisms.codes} \name{microorganisms.codes}
\alias{microorganisms.codes} \alias{microorganisms.codes}
\title{Translation table for common microorganism codes} \title{Translation table with 5,582 common microorganism codes}
\format{ \format{
A \code{\link{data.frame}} with 5,585 observations and 2 variables: A \code{\link{data.frame}} with 5,582 observations and 2 variables:
\itemize{ \itemize{
\item \code{code}\cr Commonly used code of a microorganism \item \code{code}\cr Commonly used code of a microorganism
\item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set \item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set

View File

@ -5,17 +5,18 @@
\alias{microorganisms.old} \alias{microorganisms.old}
\title{Data set with previously accepted taxonomic names} \title{Data set with previously accepted taxonomic names}
\format{ \format{
A \code{\link{data.frame}} with 24,253 observations and 5 variables: A \code{\link{data.frame}} with 12,709 observations and 4 variables:
\itemize{ \itemize{
\item \code{col_id}\cr Catalogue of Life ID that was originally given
\item \code{col_id_new}\cr New Catalogue of Life ID that responds to an entry in the \link{microorganisms} data set
\item \code{fullname}\cr Old full taxonomic name of the microorganism \item \code{fullname}\cr Old full taxonomic name of the microorganism
\item \code{fullname_new}\cr New full taxonomic name of the microorganism
\item \code{ref}\cr Author(s) and year of concerning scientific publication \item \code{ref}\cr Author(s) and year of concerning scientific publication
\item \code{prevalence}\cr Prevalence of the microorganism, see \code{\link[=as.mo]{as.mo()}} \item \code{prevalence}\cr Prevalence of the microorganism, see \code{\link[=as.mo]{as.mo()}}
} }
} }
\source{ \source{
Catalogue of Life: Annual Checklist (public online taxonomic database), \url{http://www.catalogueoflife.org} (check included annual version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}). Catalogue of Life: Annual Checklist (public online taxonomic database), \url{http://www.catalogueoflife.org} (check included annual version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}).
Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
} }
\usage{ \usage{
microorganisms.old microorganisms.old

View File

@ -56,11 +56,11 @@ test_that("creation of data sets is valid", {
expect_lt(nrow(df[which(df$prevalence == 2), ]), nrow(df[which(df$prevalence == 3), ])) expect_lt(nrow(df[which(df$prevalence == 2), ]), nrow(df[which(df$prevalence == 3), ]))
expect_true(all(c("mo", "fullname", expect_true(all(c("mo", "fullname",
"kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies",
"rank", "col_id", "species_id", "source", "ref", "prevalence", "rank", "ref", "species_id", "source", "prevalence", "snomed",
"kingdom_index", "fullname_lower", "g_species") %in% colnames(df))) "kingdom_index", "fullname_lower", "g_species") %in% colnames(df)))
olddf <- create_MO.old_lookup() olddf <- create_MO.old_lookup()
expect_true(all(c("col_id", "col_id_new", "fullname", "ref", "prevalence", expect_true(all(c("fullname", "fullname_new", "ref", "prevalence",
"fullname_lower", "g_species") %in% colnames(olddf))) "fullname_lower", "g_species") %in% colnames(olddf)))
old <- make_trans_tbl() old <- make_trans_tbl()

View File

@ -34,7 +34,6 @@ test_that("as.mo works", {
expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI") expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI") expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
expect_equal(as.character(as.mo(22242416)), "B_ESCHR_COLI")
expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI") expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI")
expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR") expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR")
expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR") expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR")
@ -45,7 +44,7 @@ test_that("as.mo works", {
expect_equal(as.character(as.mo("Klebsiella")), "B_KLBSL") expect_equal(as.character(as.mo("Klebsiella")), "B_KLBSL")
expect_equal(as.character(as.mo("K. pneu rhino")), "B_KLBSL_PNMN_RHNS") # K. pneumoniae subspp. rhinoscleromatis expect_equal(as.character(as.mo("K. pneu rhino")), "B_KLBSL_PNMN_RHNS") # K. pneumoniae subspp. rhinoscleromatis
expect_equal(as.character(as.mo("Bartonella")), "B_BRTNL") expect_equal(as.character(as.mo("Bartonella")), "B_BRTNL")
expect_equal(as.character(as.mo("C. difficile")), "B_CTRDM_DFFC") expect_equal(as.character(as.mo("C. difficile")), "B_CRDDS_DFFC")
expect_equal(as.character(as.mo("L. pneumophila")), "B_LGNLL_PNMP") expect_equal(as.character(as.mo("L. pneumophila")), "B_LGNLL_PNMP")
expect_equal(as.character(as.mo("Strepto")), "B_STRPT") expect_equal(as.character(as.mo("Strepto")), "B_STRPT")
expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus
@ -99,11 +98,11 @@ test_that("as.mo works", {
# unprevalent MO # unprevalent MO
expect_identical( expect_identical(
as.character( as.character(
as.mo(c("burnod", as.mo(c("parnod",
"B. nodosa", "P. nodosa",
"B nodosa", "P nodosa",
"Burkholderia nodosa"))), "Paraburkholderia nodosa"))),
rep("B_BRKHL_NODS", 4)) rep("B_PRBRK_NODS", 4))
# empty values # empty values
expect_identical(as.character(as.mo(c("", NA, NaN))), rep(NA_character_, 3)) expect_identical(as.character(as.mo(c("", NA, NaN))), rep(NA_character_, 3))
@ -239,7 +238,7 @@ test_that("as.mo works", {
# Salmonella (City) are all actually Salmonella enterica spp (City) # Salmonella (City) are all actually Salmonella enterica spp (City)
expect_equal(suppressWarnings(mo_name(c("Salmonella Goettingen", "Salmonella Typhimurium", "Salmonella Group A"))), expect_equal(suppressWarnings(mo_name(c("Salmonella Goettingen", "Salmonella Typhimurium", "Salmonella Group A"))),
c("Salmonella enterica", "Salmonella typhimurium", "Salmonella")) c("Salmonella enterica", "Salmonella enterica", "Salmonella"))
# no virusses # no virusses
expect_equal(as.character(as.mo("Virus")), NA_character_) expect_equal(as.character(as.mo("Virus")), NA_character_)

View File

@ -93,8 +93,7 @@ test_that("mo_property works", {
expect_identical(suppressWarnings(mo_ref("Chlamydia psittaci")), "Page, 1968") expect_identical(suppressWarnings(mo_ref("Chlamydia psittaci")), "Page, 1968")
expect_identical(mo_ref("Chlamydophila psittaci"), "Everett et al., 1999") expect_identical(mo_ref("Chlamydophila psittaci"), "Everett et al., 1999")
expect_equal(mo_snomed("Escherichia coli"), expect_equal(mo_snomed("Escherichia coli"), 112283007)
c(112283007, 116395006, 116396007, 103429008, 83285000, 116394005, 407166006, 457914007))
# old codes must throw a warning in mo_* family # old codes must throw a warning in mo_* family
expect_warning(mo_name(c("B_ESCHR_COL", "B_STPHY_AUR"))) expect_warning(mo_name(c("B_ESCHR_COL", "B_STPHY_AUR")))