1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 07:51:57 +02:00

(v0.9.0.9016) Support SNOMED codes

This commit is contained in:
2020-01-27 19:14:23 +01:00
parent 42b079cdb7
commit a13c62e6e8
31 changed files with 456 additions and 342 deletions

View File

@ -69,7 +69,7 @@
#'
#' A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using [as.mo()].
#' @inheritSection catalogue_of_life Catalogue of Life
#' @format A [`data.frame`] with 69,447 observations and 16 variables:
#' @format A [`data.frame`] with 69,447 observations and 17 variables:
#' - `mo`\cr ID of microorganism as used by this package
#' - `col_id`\cr Catalogue of Life ID
#' - `fullname`\cr Full name, like `"Escherichia coli"`
@ -79,6 +79,7 @@
#' - `species_id`\cr ID of the species as used by the Catalogue of Life
#' - `source`\cr Either "CoL", "DSMZ" (see Source) or "manually added"
#' - `prevalence`\cr Prevalence of the microorganism, see [as.mo()]
#' - `snomed`\cr SNOMED code of the microorganism. Use [mo_snomed()] to retrieve it quickly, see [mo_property()].
#' @details Manually added were:
#' - 11 entries of *Streptococcus* (beta-haemolytic: groups A, B, C, D, F, G, H, K and unspecified; other: viridans, milleri)
#' - 2 entries of *Staphylococcus* (coagulase-negative (CoNS) and coagulase-positive (CoPS))
@ -145,7 +146,7 @@ catalogue_of_life <- list(
#' - `gender`\cr gender of the patient
#' - `patient_id`\cr ID of the patient
#' - `mo`\cr ID of microorganism created with [as.mo()], see also [microorganisms]
#' - `PEN:RIF`\cr 40 different antibiotics with class [`rsi`] (see [as.rsi()]); these column names occur in [antibiotics] data set and can be translated with [ab_name()]
#' - `PEN:RIF`\cr 40 different antibiotics with class [`rsi`] (see [as.rsi()]); these column names occur in the [antibiotics] data set and can be translated with [ab_name()]
#' @inheritSection AMR Read more on our website!
"example_isolates"
@ -182,9 +183,9 @@ catalogue_of_life <- list(
#' @inheritSection AMR Read more on our website!
"WHONET"
#' Data set for RSI interpretation
#' Data set for R/SI interpretation
#'
#' Data set to interpret MIC and disk diffusion to RSI values. Included guidelines are CLSI (2011-2019) and EUCAST (2011-2019). Use [as.rsi()] to transform MICs or disks measurements to RSI values.
#' Data set to interpret MIC and disk diffusion to R/SI values. Included guidelines are CLSI (2011-2019) and EUCAST (2011-2019). Use [as.rsi()] to transform MICs or disks measurements to R/SI values.
#' @format A [`data.frame`] with 13,975 observations and 9 variables:
#' - `guideline`\cr Name of the guideline
#' - `method`\cr Either "MIC" or "DISK"
@ -195,32 +196,7 @@ catalogue_of_life <- list(
#' - `disk_dose`\cr Dose of the used disk diffusion method
#' - `breakpoint_S`\cr Lowest MIC value or highest number of millimeters that leads to "S"
#' - `breakpoint_R`\cr Highest MIC value or lowest number of millimeters that leads to "R"
#' @details The repository of this `AMR` package contains a file comprising this exact data set: [https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt]. This file **allows for machine reading EUCAST and CLSI guidelines**, which is almost impossible with the Excel and PDF files distributed by EUCAST and CLSI. This file is updated automatically.
#' @inheritSection AMR Read more on our website!
"rsi_translation"
# transforms data set to data.frame with only ASCII values, to comply with CRAN policies
dataset_UTF8_to_ASCII <- function(df) {
trans <- function(vect) {
iconv(vect, from = "UTF-8", to = "ASCII//TRANSLIT")
}
df <- as.data.frame(df, stringsAsFactors = FALSE)
for (i in seq_len(NCOL(df))) {
col <- df[, i]
if (is.list(col)) {
for (j in seq_len(length(col))) {
col[[j]] <- trans(col[[j]])
}
df[, i] <- list(col)
} else {
if (is.factor(col)) {
levels(col) <- trans(levels(col))
} else if (is.character(col)) {
col <- trans(col)
} else {
col
}
df[, i] <- col
}
}
df
}

View File

@ -144,3 +144,31 @@ class_integrity_check <- function(value, type, check_vector) {
}
value
}
# transforms data set to data.frame with only ASCII values, to comply with CRAN policies
dataset_UTF8_to_ASCII <- function(df) {
trans <- function(vect) {
iconv(vect, from = "UTF-8", to = "ASCII//TRANSLIT")
}
df <- as.data.frame(df, stringsAsFactors = FALSE)
for (i in seq_len(NCOL(df))) {
col <- df[, i]
if (is.list(col)) {
col <- lapply(col, function(j) trans(j))
# for (j in seq_len(length(col))) {
# col[[j]] <- trans(col[[j]])
# }
df[, i] <- list(col)
} else {
if (is.factor(col)) {
levels(col) <- trans(levels(col))
} else if (is.character(col)) {
col <- trans(col)
} else {
col
}
df[, i] <- col
}
}
df
}

66
R/mo.R
View File

@ -132,11 +132,12 @@
#' as.mo("Staphylococcus aureus")
#' as.mo("Staphylococcus aureus (MRSA)")
#' as.mo("Zthafilokkoockus oureuz") # handles incorrect spelling
#' as.mo("MRSA") # Methicillin Resistant S. aureus
#' as.mo("VISA") # Vancomycin Intermediate S. aureus
#' as.mo("VRSA") # Vancomycin Resistant S. aureus
#' as.mo(22242419) # Catalogue of Life ID
#'
#' as.mo("MRSA") # Methicillin Resistant S. aureus
#' as.mo("VISA") # Vancomycin Intermediate S. aureus
#' as.mo("VRSA") # Vancomycin Resistant S. aureus
#' as.mo(22242419) # Catalogue of Life ID
#' as.mo(115329001) # SNOMED CT code
#'
#' # Dyslexia is no problem - these all work:
#' as.mo("Ureaplasma urealyticum")
#' as.mo("Ureaplasma urealyticus")
@ -567,11 +568,29 @@ exec_as.mo <- function(x,
cat(paste0(blue("x_trimmed_without_group"), ' "', x_trimmed_without_group, '"\n'))
}
progress <- progress_estimated(n = length(x), min_time = 3)
if (initial_search == TRUE) {
progress <- progress_estimated(n = length(x), min_time = 3)
# before we start, omit the ones that are obvious - MO codes and full names
skip_vect <- rep(FALSE, length(x))
skip_vect[toupper(x_backup) %in% reference_data_to_use$mo] <- TRUE
skip_vect[tolower(x_backup) %in% reference_data_to_use$fullname_lower] <- TRUE
x[toupper(x_backup) %in% reference_data_to_use$mo] <- reference_data_to_use[data.table(mo = toupper(x_backup[toupper(x_backup) %in% reference_data_to_use$mo])),
on = "mo",
..property][[1]]
x[tolower(x_backup) %in% reference_data_to_use$fullname_lower] <- reference_data_to_use[data.table(fullname_lower = tolower(x_backup[tolower(x_backup) %in% reference_data_to_use$fullname_lower])),
on = "fullname_lower",
..property][[1]]
}
for (i in seq_len(length(x))) {
progress$tick()$print()
if (initial_search == TRUE) {
progress$tick()$print()
if (isTRUE(skip_vect[i])) {
next
}
}
mo_hist <- get_mo_history(x_backup[i], uncertainty_level, force = force_mo_history, disable = disable_mo_history)
if (initial_search == TRUE & !any(is.na(mo_hist))) {
@ -633,10 +652,27 @@ exec_as.mo <- function(x,
next
}
# exact SNOMED code
if (x_backup[i] %like% "^[0-9]+$") {
snomed_found <- unlist(lapply(reference_data_to_use$snomed,
function(s) if (x_backup[i] %in% s) {
TRUE
} else {
FALSE
}))
found <- reference_data_to_use[snomed_found == TRUE,
..property][[1]]
if (length(found) > 0) {
x[i] <- found[1L]
# don't save to history, as all items are already in microorganisms
next
}
}
# very probable: is G. species
found <- reference_data_to_use[g_species %in% gsub("[^a-z0-9/ \\-]+", "",
tolower(c(x_backup[i], x_backup_without_spp[i]))),
..property][[1]]
# very probable: is G. species
if (length(found) > 0) {
x[i] <- found[1L]
if (initial_search == TRUE) {
@ -1634,6 +1670,7 @@ exec_as.mo <- function(x,
msg <- paste0(msg, ": ", paste('"', unique(failures), '"', sep = "", collapse = ", "))
}
msg <- paste0(msg, ". Use mo_failures() to review ", plural[2], ". Edit the `allow_uncertain` parameter if needed (see ?as.mo).")
cat("\n")
warning(red(msg),
call. = FALSE,
immediate. = TRUE) # thus will always be shown, even if >= warnings
@ -1646,8 +1683,9 @@ exec_as.mo <- function(x,
if (NROW(uncertainties) > 1) {
plural <- c("s", "them", "were")
}
msg <- paste0("\nResult", plural[1], " of ", nr2char(NROW(uncertainties)), " value", plural[1],
msg <- paste0("Result", plural[1], " of ", nr2char(NROW(uncertainties)), " value", plural[1],
" ", plural[3], " guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".")
cat("\n")
warning(red(msg),
call. = FALSE,
immediate. = TRUE) # thus will always be shown, even if >= warnings
@ -2036,6 +2074,14 @@ unregex <- function(x) {
get_mo_code <- function(x, property) {
if (property == "mo") {
unique(x)
} else if (property == "snomed") {
found <- unlist(lapply(microorganismsDT$snomed,
function(s) if (any(x %in% s, na.rm = TRUE)) {
TRUE
} else {
FALSE
}))
microorganismsDT$mo[found == TRUE]
} else {
microorganismsDT[get(property) == x, "mo"][[1]]
}

View File

@ -45,6 +45,7 @@
#' - An [`integer`] in case of [mo_year()]
#' - A [`list`] in case of [mo_taxonomy()] and [mo_info()]
#' - A named [`character`] in case of [mo_url()]
#' - A [`double`] in case of [mo_snomed()]
#' - A [`character`] in all other cases
#' @export
#' @seealso [microorganisms]
@ -62,11 +63,12 @@
#'
#' # colloquial properties ----------------------------------------------------
#' mo_name("E. coli") # "Escherichia coli"
#' mo_fullname("E. coli") # "Escherichia coli", same as mo_name()
#' mo_fullname("E. coli") # "Escherichia coli" - same as mo_name()
#' mo_shortname("E. coli") # "E. coli"
#'
#' # other properties ---------------------------------------------------------
#' mo_gramstain("E. coli") # "Gram-negative"
#' mo_snomed("E. coli") # 112283007, 116395006, ... (SNOMED codes)
#' mo_type("E. coli") # "Bacteria" (equal to kingdom, but may be translated)
#' mo_rank("E. coli") # "species"
#' mo_url("E. coli") # get the direct url to the online database entry
@ -249,6 +251,12 @@ mo_gramstain <- function(x, language = get_locale(), ...) {
translate_AMR(x, language = language, only_unknown = FALSE)
}
#' @rdname mo_property
#' @export
mo_snomed <- function(x, ...) {
mo_validate(x = x, property = "snomed", ...)
}
#' @rdname mo_property
#' @export
mo_ref <- function(x, ...) {
@ -428,6 +436,8 @@ mo_validate <- function(x, property, ...) {
return(to_class_mo(x))
} else if (property == "col_id") {
return(as.integer(x))
} else if (property == "snomed") {
return(as.double(eval(parse(text = x))))
} else {
return(x)
}

View File

@ -21,8 +21,6 @@
#' @importFrom data.table as.data.table setkey
.onLoad <- function(libname, pkgname) {
# packageStartupMessage("Loading taxonomic reference data")
# get new functions not available in older versions of R
backports::import(pkgname)