1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-10 23:01:58 +02:00

Update clinical breakpoints and fix some as.mo() bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
This commit is contained in:
Dr. Matthijs Berends
2023-06-22 15:10:59 +02:00
committed by GitHub
parent 9591688811
commit f065945d7b
55 changed files with 43056 additions and 18400 deletions

50
R/mo.R
View File

@ -171,10 +171,11 @@ as.mo <- function(x,
meet_criteria(info, allow_class = "logical", has_length = 1)
add_MO_lookup_to_AMR_env()
if (tryCatch(all(x %in% c(AMR_env$MO_lookup$mo, NA)) &&
isFALSE(Becker) &&
isFALSE(Lancefield), error = function(e) FALSE)) {
if (tryCatch(all(x %in% c(AMR_env$MO_lookup$mo, NA)), error = function(e) FALSE) &&
isFALSE(Becker) &&
isFALSE(Lancefield) &&
isTRUE(keep_synonyms)) {
# don't look into valid MO codes, just return them
# is.mo() won't work - MO codes might change between package versions
return(set_clean_class(x, new_class = c("mo", "character")))
@ -266,6 +267,8 @@ as.mo <- function(x,
x_out <- trimws2(gsub(" +", " ", x_out, perl = TRUE))
x_search_cleaned <- x_out
x_out <- tolower(x_out)
# when x_search_cleaned are only capitals (such as in codes), make them lowercase to increase matching score
x_search_cleaned[x_search_cleaned == toupper(x_search_cleaned)] <- x_out[x_search_cleaned == toupper(x_search_cleaned)]
# first check if cleaning led to an exact result, case-insensitive
if (x_out %in% AMR_env$MO_lookup$fullname_lower) {
@ -297,15 +300,19 @@ as.mo <- function(x,
} else if (length(x_parts) > 3) {
first_chars <- paste0("(^| )[", paste(substr(x_parts, 1, 1), collapse = ""), "]")
filtr <- which(AMR_env$MO_lookup$full_first %like_case% first_chars)
} else if (nchar(x_out) == 3) {
# no space and 3 characters - probably a code such as SAU or ECO
msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", substr(x_out, 1, 1), AMR_env$dots, " ", substr(x_out, 2, 3), AMR_env$dots, "\""))
filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 1), ".* ", substr(x_out, 2, 3)))
} else if (nchar(x_out) == 4) {
# no space and 4 characters - probably a code such as STAU or ESCO
msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(substr(x_out, 1, 2), substr(x_out, 3, 4)), sort = FALSE)))
msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", substr(x_out, 1, 2), AMR_env$dots, " ", substr(x_out, 3, 4), AMR_env$dots, "\""))
filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 2), ".* ", substr(x_out, 3, 4)))
} else if (nchar(x_out) <= 6) {
# no space and 5-6 characters - probably a code such as STAAUR or ESCCOL
first_part <- paste0(substr(x_out, 1, 2), "[a-z]*", substr(x_out, 3, 3))
second_part <- substr(x_out, 4, nchar(x_out))
msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(gsub("[a-z]*", "(...)", first_part, fixed = TRUE), second_part), sort = FALSE)))
msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", gsub("[a-z]*", AMR_env$dots, first_part, fixed = TRUE), " ", second_part, AMR_env$dots, "\""))
filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", first_part, ".* ", second_part))
} else {
# for genus or species or subspecies
@ -328,15 +335,18 @@ as.mo <- function(x,
minimum_matching_score_current <- minimum_matching_score_current / AMR_env$MO_lookup$prevalence[match(mo_to_search, AMR_env$MO_lookup$fullname)]
# correct back for kingdom
minimum_matching_score_current <- minimum_matching_score_current / AMR_env$MO_lookup$kingdom_index[match(mo_to_search, AMR_env$MO_lookup$fullname)]
minimum_matching_score_current <- pmax(minimum_matching_score_current, m)
if (length(m) > 1 && all(m <= 0.55, na.rm = TRUE)) {
# if the highest score is 0.5, we have nothing serious - 0.5 is the lowest for pathogenic group 1
# make everything NA so the results will get removed below
m[seq_len(length(m))] <- NA_real_
}
} else {
# minimum_matching_score was set, so remove everything below it
m[m < minimum_matching_score] <- NA_real_
minimum_matching_score_current <- minimum_matching_score
}
if (sum(m >= minimum_matching_score_current) > 10) {
# at least 10 are left over, make the ones under `m` NA
m[m < minimum_matching_score_current] <- NA_real_
}
top_hits <- mo_to_search[order(m, decreasing = TRUE, na.last = NA)] # na.last = NA will remove the NAs
if (length(top_hits) == 0) {
warning_("No hits found for \"", x_search, "\" with minimum_matching_score = ", ifelse(is.null(minimum_matching_score), paste0("NULL (=", round(min(minimum_matching_score_current, na.rm = TRUE), 3), ")"), minimum_matching_score), ". Try setting this value lower or even to 0.", call = FALSE)
@ -815,7 +825,7 @@ rep.mo <- function(x, ...) {
print.mo_uncertainties <- function(x, n = 10, ...) {
more_than_50 <- FALSE
if (NROW(x) == 0) {
cat(word_wrap("No uncertainties to show. Only uncertainties of the last call of `as.mo()` or any `mo_*()` function are stored.\n\n", add_fn = font_blue))
cat(word_wrap("No uncertainties to show. Only uncertainties of the last call to `as.mo()` or any `mo_*()` function are stored.\n\n", add_fn = font_blue))
return(invisible(NULL))
} else if (NROW(x) > 50) {
more_than_50 <- TRUE
@ -833,20 +843,20 @@ print.mo_uncertainties <- function(x, n = 10, ...) {
if (has_colour()) {
cat(word_wrap("Colour keys: ",
col_red(" 0.000-0.499 "),
col_orange(" 0.500-0.599 "),
col_yellow(" 0.600-0.699 "),
col_green(" 0.700-1.000"),
col_red(" 0.000-0.549 "),
col_orange(" 0.550-0.649 "),
col_yellow(" 0.650-0.749 "),
col_green(" 0.750-1.000"),
add_fn = font_blue
), font_green_bg(" "), "\n", sep = "")
}
score_set_colour <- function(text, scores) {
# set colours to scores
text[scores >= 0.7] <- col_green(text[scores >= 0.7])
text[scores >= 0.6 & scores < 0.7] <- col_yellow(text[scores >= 0.6 & scores < 0.7])
text[scores >= 0.5 & scores < 0.6] <- col_orange(text[scores >= 0.5 & scores < 0.6])
text[scores < 0.5] <- col_red(text[scores < 0.5])
text[scores >= 0.75] <- col_green(text[scores >= 0.75])
text[scores >= 0.65 & scores < 0.75] <- col_yellow(text[scores >= 0.65 & scores < 0.75])
text[scores >= 0.55 & scores < 0.65] <- col_orange(text[scores >= 0.55 & scores < 0.65])
text[scores < 0.55] <- col_red(text[scores < 0.55])
text
}