1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 16:42:10 +02:00
fixes #54
fixes #51
This commit is contained in:
2022-09-23 12:55:52 +02:00
parent 96a9fd0382
commit 1724e6d3f3
36 changed files with 233 additions and 252 deletions

View File

@ -562,15 +562,22 @@ ab_select_exec <- function(function_name,
message_("No antimicrobial agents found in the data.")
return(NULL)
}
if (is.null(ab_class_args)) {
# their upper case equivalent are vectors with class <ab>, created in data-raw/_pre_commit_hook.R
# carbapenems() gets its codes from AMR:::AB_CARBAPENEMS
abx <- get(paste0("AB_", toupper(function_name)), envir = asNamespace("AMR"))
ab_group <- function_name
if (is.null(ab_class_args) || function_name %in% c("antifungals", "antimycobacterials")) {
ab_group <- NULL
if (function_name == "antifungals") {
abx <- antibiotics$ab[which(antibiotics$group == "Antifungals")]
} else if (function_name == "antimycobacterials") {
abx <- antibiotics$ab[which(antibiotics$group == "Antimycobacterials")]
} else {
# their upper case equivalent are vectors with class <ab>, created in data-raw/_pre_commit_hook.R
# carbapenems() gets its codes from AMR:::AB_CARBAPENEMS
abx <- get(paste0("AB_", toupper(function_name)), envir = asNamespace("AMR"))
ab_group <- function_name
}
examples <- paste0(" (such as ", vector_or(ab_name(sample(abx, size = min(2, length(abx)), replace = FALSE),
tolower = TRUE,
language = NULL
tolower = TRUE,
language = NULL
),
quotes = FALSE
), ")")

View File

@ -98,14 +98,14 @@
#'
#' @section Included Taxa:
#' Included taxonomic data are:
#' - All `r format_included_data_number(microorganisms[which(microorganisms$kingdom %in% c("Archeae", "Bacteria", "Protozoa")), , drop = FALSE])` (sub)species from the kingdoms of Archaea, Bacteria and Protozoa
#' - All `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Fungi"), , drop = FALSE])` (sub)species from `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Fungi"), "order", drop = TRUE])` relevant orders of the kingdom of Fungi. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including relevant taxonomic orders, the most relevant fungi are covered (such as all species of *Aspergillus*, *Candida*, *Cryptococcus*, *Histplasma*, *Pneumocystis*, *Saccharomyces* and *Trichophyton*).
#' - All `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Animalia"), , drop = FALSE])` (sub)species from `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Animalia"), "genus", drop = TRUE])` other relevant genera from the kingdom of Animalia (such as *Strongyloides* and *Taenia*)
#' - All `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Plantae"), , drop = FALSE])` (sub)species from `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Animalia"), "genus", drop = TRUE])` other relevant genera from the kingdom of Animalia (such as *Strongyloides* and *Taenia*)
#' - All `r format_included_data_number(microorganisms[which(microorganisms$kingdom %in% c("Archeae", "Bacteria")), , drop = FALSE])` (sub)species from the kingdoms of Archaea and Bacteria
#' - `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Fungi"), , drop = FALSE])` (sub)species from the kingdom of Fungi. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package. Only relevant fungi are covered (such as all species of *Aspergillus*, *Candida*, *Cryptococcus*, *Histoplasma*, *Pneumocystis*, *Saccharomyces* and *Trichophyton*).
#' - `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Protozoa"), , drop = FALSE])` (sub)species from the kingdom of Protozoa
#' - `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Animalia"), , drop = FALSE])` (sub)species from `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Animalia"), "genus", drop = TRUE])` other relevant genera from the kingdom of Animalia (such as *Strongyloides* and *Taenia*)
#' - All `r format_included_data_number(microorganisms[which(microorganisms$status != "accepted"), , drop = FALSE])` previously accepted names of all included (sub)species (these were taxonomically renamed)
#' - The complete taxonomic tree of all included (sub)species: from kingdom to subspecies
#' - The identifier of the parent taxons
#' - The responsible author(s) and year of scientific publication
#' - The year and first author of the related scientific publication
#'
#' ## Manual additions
#' For convenience, some entries were added manually:

View File

@ -175,7 +175,7 @@ mdro <- function(x = NULL,
...) {
if (is_null_or_grouped_tbl(x)) {
# when `x` is left blank, auto determine it (get_current_data() also contains dplyr::cur_data_all())
# is also fix for using a grouped df as input (a dot as first argument)
# is also a fix for using a grouped df as input (i.e., a dot as first argument)
x <- tryCatch(get_current_data(arg_name = "x", call = -2), error = function(e) x)
}
meet_criteria(x, allow_class = "data.frame") # also checks dimensions to be >0

30
R/mic.R
View File

@ -24,23 +24,27 @@
# ==================================================================== #
# these are allowed MIC values and will become [factor] levels
ops <- c("<", "<=", "", ">=", ">")
operators <- c("<", "<=", "", ">=", ">")
valid_mic_levels <- c(
c(t(vapply(
FUN.VALUE = character(9), ops,
function(x) paste0(x, "0.00", 1:9)
FUN.VALUE = character(6), operators,
function(x) paste0(x, "0.000", c(1:4, 6, 8))
))),
c(t(vapply(
FUN.VALUE = character(90), operators,
function(x) paste0(x, "0.00", c(1:9, 11:19, 21:29, 31:39, 41:49, 51:59, 61:69, 71:79, 81:89, 91:99))
))),
unique(c(t(vapply(
FUN.VALUE = character(104), ops,
FUN.VALUE = character(106), operators,
function(x) {
paste0(x, sort(as.double(paste0(
"0.0",
sort(c(1:99, 125, 128, 256, 512, 625))
sort(c(1:99, 125, 128, 156, 165, 256, 512, 625))
))))
}
)))),
unique(c(t(vapply(
FUN.VALUE = character(103), ops,
FUN.VALUE = character(103), operators,
function(x) {
paste0(x, sort(as.double(paste0(
"0.",
@ -49,15 +53,15 @@ valid_mic_levels <- c(
}
)))),
c(t(vapply(
FUN.VALUE = character(10), ops,
FUN.VALUE = character(10), operators,
function(x) paste0(x, sort(c(1:9, 1.5)))
))),
c(t(vapply(
FUN.VALUE = character(45), ops,
FUN.VALUE = character(45), operators,
function(x) paste0(x, c(10:98)[9:98 %% 2 == TRUE])
))),
c(t(vapply(
FUN.VALUE = character(17), ops,
FUN.VALUE = character(17), operators,
function(x) paste0(x, sort(c(2^c(7:11), 192, 80 * c(2:12))))
)))
)
@ -159,11 +163,15 @@ valid_mic_levels <- c(
as.mic <- function(x, na.rm = FALSE) {
meet_criteria(x, allow_class = c("mic", "character", "numeric", "integer", "factor"), allow_NA = TRUE)
meet_criteria(na.rm, allow_class = "logical", has_length = 1)
if (is.mic(x)) {
x
} else {
x <- as.character(unlist(x))
if (is.numeric(x)) {
x <- format(x, scientific = FALSE)
} else {
x <- as.character(unlist(x))
}
if (na.rm == TRUE) {
x <- x[!is.na(x)]
}

169
R/mo.R
View File

@ -88,9 +88,9 @@
#' The level of uncertainty can be set using the argument `allow_uncertain`. The default is `allow_uncertain = TRUE`, which is equal to uncertainty level 2. Using `allow_uncertain = FALSE` is equal to uncertainty level 0 and will skip all rules. You can also use e.g. `as.mo(..., allow_uncertain = 1)` to only allow up to level 1 uncertainty.
#'
#' With the default setting (`allow_uncertain = TRUE`, level 2), below examples will lead to valid results:
#' - `"Streptococcus group B (known as S. agalactiae)"`. The text between brackets will be removed and a warning will be thrown that the result *Streptococcus group B* (``r as.mo("Streptococcus group B")``) needs review.
#' - `"S. aureus - please mind: MRSA"`. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result *Staphylococcus aureus* (``r as.mo("Staphylococcus aureus")``) needs review.
#' - `"Fluoroquinolone-resistant Neisseria gonorrhoeae"`. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result *Neisseria gonorrhoeae* (``r as.mo("Neisseria gonorrhoeae")``) needs review.
#' - `"Streptococcus group B (known as S. agalactiae)"`. The text between brackets will be removed and a warning will be thrown that the result *Streptococcus group B* (`B_STRPT_GRPB`) needs review.
#' - `"S. aureus - please mind: MRSA"`. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result *Staphylococcus aureus* (`B_STPHY_AURS`) needs review.
#' - `"Fluoroquinolone-resistant Neisseria gonorrhoeae"`. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result *Neisseria gonorrhoeae* (`B_NESSR_GNRR`) needs review.
#'
#' There are three helper functions that can be run after using the [as.mo()] function:
#' - Use [mo_uncertainties()] to get a [data.frame] that prints in a pretty format with all taxonomic names that were guessed. The output contains the matching score for all matches (see *Matching Score for Microorganisms* below).
@ -197,22 +197,6 @@ as.mo <- function(x,
# WHONET: xxx = no growth
x[tolower(x) %in% c("", "xxx", "na", "nan")] <- NA_character_
if (tryCatch(all(x == "" | gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE) %in% MO_lookup$fullname_lower, na.rm = TRUE) &&
isFALSE(Becker) &&
isTRUE(keep_synonyms) &&
isFALSE(Lancefield), error = function(e) FALSE)) {
# to improve speed, special case for taxonomically correct full names (case-insensitive)
return(set_clean_class(MO_lookup[match(
gsub(".*(unknown ).*", "unknown name",
tolower(x),
perl = TRUE
),
MO_lookup$fullname_lower
), "mo", drop = TRUE],
new_class = c("mo", "character")
))
}
out <- rep(NA_character_, length(x))
# below we use base R's match(), known for powering '%in%', and incredibly fast!
@ -233,6 +217,9 @@ as.mo <- function(x,
# found this extremely fast gem here: https://stackoverflow.com/a/11002456/4575331
out[is.na(out) & x %in% unlist(microorganisms$snomed)] <- microorganisms$mo[rep(seq_along(microorganisms$snomed), vapply(FUN.VALUE = double(1), microorganisms$snomed, length))[match(x[is.na(out) & x %in% unlist(microorganisms$snomed)], unlist(microorganisms$snomed))]]
}
# From other familiar output ----
# such as Salmonella groups, colloquial names, etc.
out[is.na(out)] <- convert_colloquial_input(x[is.na(out)])
# From previous hits in this session ----
old <- out
out[is.na(out) & x %in% pkg_env$mo_previously_coerced$x] <- pkg_env$mo_previously_coerced$mo[match(x[is.na(out) & x %in% pkg_env$mo_previously_coerced$x], pkg_env$mo_previously_coerced$x)]
@ -310,7 +297,7 @@ as.mo <- function(x,
m[m < minimum_matching_score_current] <- NA_real_
top_hits <- mo_to_search[order(m, decreasing = TRUE, na.last = NA)] # na.last = NA will remove the NAs
if (length(top_hits) == 0) {
warning_("No hits found for \"", x_search, "\" with minimum_matching_score = ", ifelse(is.null(minimum_matching_score), "NULL", minimum_matching_score), ". Try setting this value higher.")
warning_("No hits found for \"", x_search, "\" with minimum_matching_score = ", ifelse(is.null(minimum_matching_score), "NULL", minimum_matching_score), ". Try setting this value lower or even to 0.")
result_mo <- NA_character_
} else {
result_mo <- MO_lookup$mo[match(top_hits[1], MO_lookup$fullname)]
@ -409,13 +396,11 @@ as.mo <- function(x,
)
)
}
} else {
} else if (is.null(getOption("AMR_keep_synonyms")) && any(!is.na(c(gbif_matches, lpsn_matches))) && message_not_thrown_before("as.mo", "keep_synonyms_warning", entire_session = TRUE)) {
# keep synonyms is TRUE, so check if any do have synonyms
if (any(!is.na(c(gbif_matches, lpsn_matches))) && message_not_thrown_before("as.mo", unique(c(gbif_matches, lpsn_matches)))) {
warning_("Function `as.mo()` returned some old taxonomic names. Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`.")
}
warning_("Function `as.mo()` returned some old taxonomic names. Use `as.mo(..., keep_synonyms = FALSE)` to clean the input to currently accepted taxonomic names, or set the R option `AMR_keep_synonyms` to `FALSE`. This warning will be shown once per session.")
}
# Apply Becker ----
if (isTRUE(Becker) || Becker == "all") {
# warn when species found that are not in:
@ -615,19 +600,18 @@ print.mo <- function(x, print.shortnames = FALSE, ...) {
#' @noRd
summary.mo <- function(object, ...) {
# unique and top 1-3
x <- as.mo(object) # force again, could be mo from older pkg version
top <- as.data.frame(table(x), responseName = "n", stringsAsFactors = FALSE)
top_3 <- top[order(-top$n), 1, drop = TRUE][1:3]
value <- c(
x <- object
top_3 <- names(sort(-table(x[!is.na(x)])))[1:3]
out <- c(
"Class" = "mo",
"<NA>" = length(x[is.na(x)]),
"Unique" = pm_n_distinct(x[!is.na(x)]),
"Unique" = length(unique(x[!is.na(x)])),
"#1" = top_3[1],
"#2" = top_3[2],
"#3" = top_3[3]
)
class(value) <- c("summaryDefault", "table")
value
class(out) <- c("summaryDefault", "table")
out
}
#' @method as.data.frame mo
@ -710,12 +694,6 @@ rep.mo <- function(x, ...) {
y
}
#' @rdname as.mo
#' @export
mo_failures <- function() {
pkg_env$mo_failures
}
#' @rdname as.mo
#' @export
mo_uncertainties <- function() {
@ -833,51 +811,6 @@ mo_reset_session <- function() {
}
}
#' @rdname as.mo
#' @export
mo_renamed <- function() {
items <- pkg_env$mo_renamed
if (is.null(items)) {
items <- data.frame(stringsAsFactors = FALSE)
} else {
items <- pm_distinct(items, old_name, .keep_all = TRUE)
}
set_clean_class(as.data.frame(items,
stringsAsFactors = FALSE
),
new_class = c("mo_renamed", "data.frame")
)
}
#' @method print mo_renamed
#' @export
#' @noRd
print.mo_renamed <- function(x, ...) {
if (NROW(x) == 0) {
return(invisible())
}
for (i in seq_len(nrow(x))) {
message_(
font_italic(x$old_name[i]),
ifelse(x$old_ref[i] %in% c("", NA),
"",
paste0(" (", gsub("et al.", font_italic("et al."), x$old_ref[i]), ")")
),
" was renamed ",
ifelse(!x$new_ref[i] %in% c("", NA) && as.integer(gsub("[^0-9]", "", x$new_ref[i])) < as.integer(gsub("[^0-9]", "", x$old_ref[i])),
font_bold("back to "),
""
),
font_italic(x$new_name[i]),
ifelse(x$new_ref[i] %in% c("", NA),
"",
paste0(" (", gsub("et al.", font_italic("et al."), x$new_ref[i]), ")")
),
" [", x$mo[i], "]"
)
}
}
nr2char <- function(x) {
if (x %in% c(1:10)) {
v <- c(
@ -890,10 +823,6 @@ nr2char <- function(x) {
}
}
unregex <- function(x) {
gsub("[^a-zA-Z0-9 -]", "", x)
}
translate_allow_uncertain <- function(allow_uncertain) {
if (isTRUE(allow_uncertain)) {
# default to uncertainty level 2
@ -911,22 +840,14 @@ translate_allow_uncertain <- function(allow_uncertain) {
}
get_mo_failures_uncertainties_renamed <- function() {
remember <- list(
failures = pkg_env$mo_failures,
uncertainties = pkg_env$mo_uncertainties,
renamed = pkg_env$mo_renamed
)
# empty them, otherwise mo_shortname("Chlamydophila psittaci") will give 3 notes
pkg_env$mo_failures <- NULL
remember <- list(uncertainties = pkg_env$mo_uncertainties)
# empty them, otherwise e.g. mo_shortname("Chlamydophila psittaci") will give 3 notes
pkg_env$mo_uncertainties <- NULL
pkg_env$mo_renamed <- NULL
remember
}
load_mo_failures_uncertainties_renamed <- function(metadata) {
pkg_env$mo_failures <- metadata$failures
pkg_env$mo_uncertainties <- metadata$uncertainties
pkg_env$mo_renamed <- metadata$renamed
}
trimws2 <- function(x) {
@ -934,6 +855,9 @@ trimws2 <- function(x) {
}
parse_and_convert <- function(x) {
if (tryCatch(is.character(x) && Encoding(x) == "unknown", error = function(e) FALSE)) {
return(x)
}
tryCatch(
{
if (!is.null(dim(x))) {
@ -966,7 +890,7 @@ replace_old_mo_codes <- function(x, property) {
# this function transform old MO codes to current codes, such as:
# B_ESCH_COL (AMR v0.5.0) -> B_ESCHR_COLI
ind <- x %like_case% "^[A-Z]_[A-Z_]+$" & !x %in% AMR::microorganisms$mo
if (any(ind)) {
if (any(ind, na.rm = TRUE)) {
# get the ones that match
affected <- x[ind]
affected_unique <- unique(affected)
@ -1067,13 +991,44 @@ repair_reference_df <- function(reference_df) {
reference_df
}
strip_words <- function(text, n, side = "right") {
out <- lapply(strsplit(text, " "), function(x) {
if (side %like% "^r" & length(x) > n) {
x[seq_len(length(x) - n)]
} else if (side %like% "^l" & length(x) > n) {
x[2:length(x)]
}
})
vapply(FUN.VALUE = character(1), out, paste, collapse = " ")
convert_colloquial_input <- function(x) {
x.bak <- trimws(x)
x <- trimws(tolower(x))
out <- rep(NA_character_, length(x))
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB)
out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s",
"B_STRPT_GRP\\U\\1",
x[x %like_case% "^g[abcdfghkl]s$"],
perl = TRUE)
# Streptococci in different languages, like "estreptococos grupo B"
out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$",
"B_STRPT_GRP\\U\\1",
x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"],
perl = TRUE)
out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*",
"B_STRPT_GRP\\U\\1",
x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"],
perl = TRUE)
out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM"
out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL"
out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
out[x %like_case% "([ck]oagulas[ea].positie?[vf]|^[ck]o?ps[^a-z]*$)"] <- "B_STPHY_COPS"
# Gram stains
out[x %like_case% "gram[ -]?neg.*|negatie?[vf]"] <- "B_GRAMN"
out[x %like_case% "gram[ -]?pos.*|positie?[vf]"] <- "B_GRAMP"
# Salmonella city names, starting with capital species name - they are all S. enterica
out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR"
# trivial names known to the field
out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR"
out[x %like_case% "pneumo[ck]o[ck]"] <- "B_STRPT_PNMN"
out
}

24
R/rsi.R
View File

@ -327,31 +327,35 @@ as.rsi.default <- function(x, ...) {
# remove other invalid characters
# set to capitals
x <- toupper(x)
x <- gsub("[^RSIHDU]+", "", x, perl = TRUE)
x <- gsub("[^A-Z]+", "", x, perl = TRUE)
# some labs now report "H" instead of "I" to not interfere with EUCAST prior to 2019
x <- gsub("^H$", "I", x, perl = TRUE)
x <- gsub("H", "I", x, fixed = TRUE)
# and MIPS uses D for Dose-dependent (which is I, but it will throw a note)
x <- gsub("^D$", "I", x, perl = TRUE)
x <- gsub("D", "I", x, fixed = TRUE)
# and MIPS uses U for "susceptible urine"
x <- gsub("^U$", "S", x, perl = TRUE)
x <- gsub("U", "S", x, fixed = TRUE)
# in cases of "S;S" keep S, but in case of "S;I" make it NA
x <- gsub("^S+$", "S", x)
x <- gsub("^I+$", "I", x)
x <- gsub("^R+$", "R", x)
x[!x %in% c("S", "I", "R")] <- NA_character_
na_after <- length(x[is.na(x) | x == ""])
if (!isFALSE(list(...)$warn)) { # so as.rsi(..., warn = FALSE) will never throw a warning
if (na_before != na_after) {
list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>%
unique() %pm>%
sort() %pm>%
vector_and(quotes = TRUE)
warning_("in `as.rsi()`: ", na_after - na_before, " results truncated (",
round(((na_after - na_before) / length(x)) * 100),
"%) that were invalid antimicrobial interpretations: ",
list_missing,
call = FALSE
cur_col <- get_current_column()
warning_("in `as.rsi()`: ", na_after - na_before, " result",
ifelse(na_after - na_before > 1, "s", ""),
ifelse(is.null(cur_col), "", paste0(" in column '", cur_col, "'")),
" truncated (",
round(((na_after - na_before) / length(x)) * 100),
"%) that were invalid antimicrobial interpretations: ",
list_missing,
call = FALSE
)
}
if (any(toupper(x.bak[!is.na(x.bak)]) == "U") && message_not_thrown_before("as.rsi", "U")) {