mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 07:51:57 +02:00
(v0.9.0.9016) Support SNOMED codes
This commit is contained in:
38
R/data.R
38
R/data.R
@ -69,7 +69,7 @@
|
||||
#'
|
||||
#' A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using [as.mo()].
|
||||
#' @inheritSection catalogue_of_life Catalogue of Life
|
||||
#' @format A [`data.frame`] with 69,447 observations and 16 variables:
|
||||
#' @format A [`data.frame`] with 69,447 observations and 17 variables:
|
||||
#' - `mo`\cr ID of microorganism as used by this package
|
||||
#' - `col_id`\cr Catalogue of Life ID
|
||||
#' - `fullname`\cr Full name, like `"Escherichia coli"`
|
||||
@ -79,6 +79,7 @@
|
||||
#' - `species_id`\cr ID of the species as used by the Catalogue of Life
|
||||
#' - `source`\cr Either "CoL", "DSMZ" (see Source) or "manually added"
|
||||
#' - `prevalence`\cr Prevalence of the microorganism, see [as.mo()]
|
||||
#' - `snomed`\cr SNOMED code of the microorganism. Use [mo_snomed()] to retrieve it quickly, see [mo_property()].
|
||||
#' @details Manually added were:
|
||||
#' - 11 entries of *Streptococcus* (beta-haemolytic: groups A, B, C, D, F, G, H, K and unspecified; other: viridans, milleri)
|
||||
#' - 2 entries of *Staphylococcus* (coagulase-negative (CoNS) and coagulase-positive (CoPS))
|
||||
@ -145,7 +146,7 @@ catalogue_of_life <- list(
|
||||
#' - `gender`\cr gender of the patient
|
||||
#' - `patient_id`\cr ID of the patient
|
||||
#' - `mo`\cr ID of microorganism created with [as.mo()], see also [microorganisms]
|
||||
#' - `PEN:RIF`\cr 40 different antibiotics with class [`rsi`] (see [as.rsi()]); these column names occur in [antibiotics] data set and can be translated with [ab_name()]
|
||||
#' - `PEN:RIF`\cr 40 different antibiotics with class [`rsi`] (see [as.rsi()]); these column names occur in the [antibiotics] data set and can be translated with [ab_name()]
|
||||
#' @inheritSection AMR Read more on our website!
|
||||
"example_isolates"
|
||||
|
||||
@ -182,9 +183,9 @@ catalogue_of_life <- list(
|
||||
#' @inheritSection AMR Read more on our website!
|
||||
"WHONET"
|
||||
|
||||
#' Data set for RSI interpretation
|
||||
#' Data set for R/SI interpretation
|
||||
#'
|
||||
#' Data set to interpret MIC and disk diffusion to RSI values. Included guidelines are CLSI (2011-2019) and EUCAST (2011-2019). Use [as.rsi()] to transform MICs or disks measurements to RSI values.
|
||||
#' Data set to interpret MIC and disk diffusion to R/SI values. Included guidelines are CLSI (2011-2019) and EUCAST (2011-2019). Use [as.rsi()] to transform MICs or disks measurements to R/SI values.
|
||||
#' @format A [`data.frame`] with 13,975 observations and 9 variables:
|
||||
#' - `guideline`\cr Name of the guideline
|
||||
#' - `method`\cr Either "MIC" or "DISK"
|
||||
@ -195,32 +196,7 @@ catalogue_of_life <- list(
|
||||
#' - `disk_dose`\cr Dose of the used disk diffusion method
|
||||
#' - `breakpoint_S`\cr Lowest MIC value or highest number of millimeters that leads to "S"
|
||||
#' - `breakpoint_R`\cr Highest MIC value or lowest number of millimeters that leads to "R"
|
||||
#' @details The repository of this `AMR` package contains a file comprising this exact data set: [https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt]. This file **allows for machine reading EUCAST and CLSI guidelines**, which is almost impossible with the Excel and PDF files distributed by EUCAST and CLSI. This file is updated automatically.
|
||||
|
||||
#' @inheritSection AMR Read more on our website!
|
||||
"rsi_translation"
|
||||
|
||||
# transforms data set to data.frame with only ASCII values, to comply with CRAN policies
|
||||
dataset_UTF8_to_ASCII <- function(df) {
|
||||
trans <- function(vect) {
|
||||
iconv(vect, from = "UTF-8", to = "ASCII//TRANSLIT")
|
||||
}
|
||||
df <- as.data.frame(df, stringsAsFactors = FALSE)
|
||||
for (i in seq_len(NCOL(df))) {
|
||||
col <- df[, i]
|
||||
if (is.list(col)) {
|
||||
for (j in seq_len(length(col))) {
|
||||
col[[j]] <- trans(col[[j]])
|
||||
}
|
||||
df[, i] <- list(col)
|
||||
} else {
|
||||
if (is.factor(col)) {
|
||||
levels(col) <- trans(levels(col))
|
||||
} else if (is.character(col)) {
|
||||
col <- trans(col)
|
||||
} else {
|
||||
col
|
||||
}
|
||||
df[, i] <- col
|
||||
}
|
||||
}
|
||||
df
|
||||
}
|
||||
|
28
R/misc.R
28
R/misc.R
@ -144,3 +144,31 @@ class_integrity_check <- function(value, type, check_vector) {
|
||||
}
|
||||
value
|
||||
}
|
||||
|
||||
# transforms data set to data.frame with only ASCII values, to comply with CRAN policies
|
||||
dataset_UTF8_to_ASCII <- function(df) {
|
||||
trans <- function(vect) {
|
||||
iconv(vect, from = "UTF-8", to = "ASCII//TRANSLIT")
|
||||
}
|
||||
df <- as.data.frame(df, stringsAsFactors = FALSE)
|
||||
for (i in seq_len(NCOL(df))) {
|
||||
col <- df[, i]
|
||||
if (is.list(col)) {
|
||||
col <- lapply(col, function(j) trans(j))
|
||||
# for (j in seq_len(length(col))) {
|
||||
# col[[j]] <- trans(col[[j]])
|
||||
# }
|
||||
df[, i] <- list(col)
|
||||
} else {
|
||||
if (is.factor(col)) {
|
||||
levels(col) <- trans(levels(col))
|
||||
} else if (is.character(col)) {
|
||||
col <- trans(col)
|
||||
} else {
|
||||
col
|
||||
}
|
||||
df[, i] <- col
|
||||
}
|
||||
}
|
||||
df
|
||||
}
|
||||
|
66
R/mo.R
66
R/mo.R
@ -132,11 +132,12 @@
|
||||
#' as.mo("Staphylococcus aureus")
|
||||
#' as.mo("Staphylococcus aureus (MRSA)")
|
||||
#' as.mo("Zthafilokkoockus oureuz") # handles incorrect spelling
|
||||
#' as.mo("MRSA") # Methicillin Resistant S. aureus
|
||||
#' as.mo("VISA") # Vancomycin Intermediate S. aureus
|
||||
#' as.mo("VRSA") # Vancomycin Resistant S. aureus
|
||||
#' as.mo(22242419) # Catalogue of Life ID
|
||||
#'
|
||||
#' as.mo("MRSA") # Methicillin Resistant S. aureus
|
||||
#' as.mo("VISA") # Vancomycin Intermediate S. aureus
|
||||
#' as.mo("VRSA") # Vancomycin Resistant S. aureus
|
||||
#' as.mo(22242419) # Catalogue of Life ID
|
||||
#' as.mo(115329001) # SNOMED CT code
|
||||
#'
|
||||
#' # Dyslexia is no problem - these all work:
|
||||
#' as.mo("Ureaplasma urealyticum")
|
||||
#' as.mo("Ureaplasma urealyticus")
|
||||
@ -567,11 +568,29 @@ exec_as.mo <- function(x,
|
||||
cat(paste0(blue("x_trimmed_without_group"), ' "', x_trimmed_without_group, '"\n'))
|
||||
}
|
||||
|
||||
progress <- progress_estimated(n = length(x), min_time = 3)
|
||||
|
||||
if (initial_search == TRUE) {
|
||||
progress <- progress_estimated(n = length(x), min_time = 3)
|
||||
|
||||
# before we start, omit the ones that are obvious - MO codes and full names
|
||||
skip_vect <- rep(FALSE, length(x))
|
||||
skip_vect[toupper(x_backup) %in% reference_data_to_use$mo] <- TRUE
|
||||
skip_vect[tolower(x_backup) %in% reference_data_to_use$fullname_lower] <- TRUE
|
||||
x[toupper(x_backup) %in% reference_data_to_use$mo] <- reference_data_to_use[data.table(mo = toupper(x_backup[toupper(x_backup) %in% reference_data_to_use$mo])),
|
||||
on = "mo",
|
||||
..property][[1]]
|
||||
x[tolower(x_backup) %in% reference_data_to_use$fullname_lower] <- reference_data_to_use[data.table(fullname_lower = tolower(x_backup[tolower(x_backup) %in% reference_data_to_use$fullname_lower])),
|
||||
on = "fullname_lower",
|
||||
..property][[1]]
|
||||
}
|
||||
|
||||
for (i in seq_len(length(x))) {
|
||||
|
||||
progress$tick()$print()
|
||||
if (initial_search == TRUE) {
|
||||
progress$tick()$print()
|
||||
if (isTRUE(skip_vect[i])) {
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
mo_hist <- get_mo_history(x_backup[i], uncertainty_level, force = force_mo_history, disable = disable_mo_history)
|
||||
if (initial_search == TRUE & !any(is.na(mo_hist))) {
|
||||
@ -633,10 +652,27 @@ exec_as.mo <- function(x,
|
||||
next
|
||||
}
|
||||
|
||||
# exact SNOMED code
|
||||
if (x_backup[i] %like% "^[0-9]+$") {
|
||||
snomed_found <- unlist(lapply(reference_data_to_use$snomed,
|
||||
function(s) if (x_backup[i] %in% s) {
|
||||
TRUE
|
||||
} else {
|
||||
FALSE
|
||||
}))
|
||||
found <- reference_data_to_use[snomed_found == TRUE,
|
||||
..property][[1]]
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
# don't save to history, as all items are already in microorganisms
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
# very probable: is G. species
|
||||
found <- reference_data_to_use[g_species %in% gsub("[^a-z0-9/ \\-]+", "",
|
||||
tolower(c(x_backup[i], x_backup_without_spp[i]))),
|
||||
..property][[1]]
|
||||
# very probable: is G. species
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
if (initial_search == TRUE) {
|
||||
@ -1634,6 +1670,7 @@ exec_as.mo <- function(x,
|
||||
msg <- paste0(msg, ": ", paste('"', unique(failures), '"', sep = "", collapse = ", "))
|
||||
}
|
||||
msg <- paste0(msg, ". Use mo_failures() to review ", plural[2], ". Edit the `allow_uncertain` parameter if needed (see ?as.mo).")
|
||||
cat("\n")
|
||||
warning(red(msg),
|
||||
call. = FALSE,
|
||||
immediate. = TRUE) # thus will always be shown, even if >= warnings
|
||||
@ -1646,8 +1683,9 @@ exec_as.mo <- function(x,
|
||||
if (NROW(uncertainties) > 1) {
|
||||
plural <- c("s", "them", "were")
|
||||
}
|
||||
msg <- paste0("\nResult", plural[1], " of ", nr2char(NROW(uncertainties)), " value", plural[1],
|
||||
msg <- paste0("Result", plural[1], " of ", nr2char(NROW(uncertainties)), " value", plural[1],
|
||||
" ", plural[3], " guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".")
|
||||
cat("\n")
|
||||
warning(red(msg),
|
||||
call. = FALSE,
|
||||
immediate. = TRUE) # thus will always be shown, even if >= warnings
|
||||
@ -2036,6 +2074,14 @@ unregex <- function(x) {
|
||||
get_mo_code <- function(x, property) {
|
||||
if (property == "mo") {
|
||||
unique(x)
|
||||
} else if (property == "snomed") {
|
||||
found <- unlist(lapply(microorganismsDT$snomed,
|
||||
function(s) if (any(x %in% s, na.rm = TRUE)) {
|
||||
TRUE
|
||||
} else {
|
||||
FALSE
|
||||
}))
|
||||
microorganismsDT$mo[found == TRUE]
|
||||
} else {
|
||||
microorganismsDT[get(property) == x, "mo"][[1]]
|
||||
}
|
||||
|
@ -45,6 +45,7 @@
|
||||
#' - An [`integer`] in case of [mo_year()]
|
||||
#' - A [`list`] in case of [mo_taxonomy()] and [mo_info()]
|
||||
#' - A named [`character`] in case of [mo_url()]
|
||||
#' - A [`double`] in case of [mo_snomed()]
|
||||
#' - A [`character`] in all other cases
|
||||
#' @export
|
||||
#' @seealso [microorganisms]
|
||||
@ -62,11 +63,12 @@
|
||||
#'
|
||||
#' # colloquial properties ----------------------------------------------------
|
||||
#' mo_name("E. coli") # "Escherichia coli"
|
||||
#' mo_fullname("E. coli") # "Escherichia coli", same as mo_name()
|
||||
#' mo_fullname("E. coli") # "Escherichia coli" - same as mo_name()
|
||||
#' mo_shortname("E. coli") # "E. coli"
|
||||
#'
|
||||
#' # other properties ---------------------------------------------------------
|
||||
#' mo_gramstain("E. coli") # "Gram-negative"
|
||||
#' mo_snomed("E. coli") # 112283007, 116395006, ... (SNOMED codes)
|
||||
#' mo_type("E. coli") # "Bacteria" (equal to kingdom, but may be translated)
|
||||
#' mo_rank("E. coli") # "species"
|
||||
#' mo_url("E. coli") # get the direct url to the online database entry
|
||||
@ -249,6 +251,12 @@ mo_gramstain <- function(x, language = get_locale(), ...) {
|
||||
translate_AMR(x, language = language, only_unknown = FALSE)
|
||||
}
|
||||
|
||||
#' @rdname mo_property
|
||||
#' @export
|
||||
mo_snomed <- function(x, ...) {
|
||||
mo_validate(x = x, property = "snomed", ...)
|
||||
}
|
||||
|
||||
#' @rdname mo_property
|
||||
#' @export
|
||||
mo_ref <- function(x, ...) {
|
||||
@ -428,6 +436,8 @@ mo_validate <- function(x, property, ...) {
|
||||
return(to_class_mo(x))
|
||||
} else if (property == "col_id") {
|
||||
return(as.integer(x))
|
||||
} else if (property == "snomed") {
|
||||
return(as.double(eval(parse(text = x))))
|
||||
} else {
|
||||
return(x)
|
||||
}
|
||||
|
Reference in New Issue
Block a user