From 1d9ee39cc788008a7d9152887dd21df963590c58 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Thu, 22 Jun 2023 15:24:18 +0200 Subject: [PATCH] fix for mo codes --- DESCRIPTION | 2 +- NEWS.md | 2 +- R/aa_helper_functions.R | 4 ++++ R/mo.R | 9 +++++---- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 063592b8..6b8c73ab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 2.0.0.9024 +Version: 2.0.0.9025 Date: 2023-06-22 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NEWS.md b/NEWS.md index bb030f00..5dcab5ea 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 2.0.0.9024 +# AMR 2.0.0.9025 ## New * Clinical breakpoints and intrinsic resistance of EUCAST 2023 and CLSI 2023 have been added for `as.sir()`. EUCAST 2023 (v13.0) is now the new default guideline for all MIC and disks diffusion interpretations, and for `eucast_rules()` to apply EUCAST Expert Rules. diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index 2d457dde..2f84e1f8 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -1517,6 +1517,10 @@ trimws2 <- function(..., whitespace = "[\u0009\u000A\u000B\u000C\u000D\u0020\u00 trimws(..., whitespace = whitespace) } +totitle <- function(x) { + gsub("^(.)", "\\U\\1", x, perl = TRUE) +} + readRDS_AMR <- function(file, refhook = NULL) { # this is readRDS with remote file support con <- file(file) diff --git a/R/mo.R b/R/mo.R index 0ad16c9e..36288daa 100755 --- a/R/mo.R +++ b/R/mo.R @@ -302,17 +302,17 @@ as.mo <- function(x, filtr <- which(AMR_env$MO_lookup$full_first %like_case% first_chars) } else if (nchar(x_out) == 3) { # no space and 3 characters - probably a code such as SAU or ECO - msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", substr(x_out, 1, 1), AMR_env$dots, " ", substr(x_out, 2, 3), AMR_env$dots, "\"")) + msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", totitle(substr(x_out, 1, 1)), AMR_env$dots, " ", substr(x_out, 2, 3), AMR_env$dots, "\"")) filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 1), ".* ", substr(x_out, 2, 3))) } else if (nchar(x_out) == 4) { # no space and 4 characters - probably a code such as STAU or ESCO - msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", substr(x_out, 1, 2), AMR_env$dots, " ", substr(x_out, 3, 4), AMR_env$dots, "\"")) + msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", totitle(substr(x_out, 1, 2)), AMR_env$dots, " ", substr(x_out, 3, 4), AMR_env$dots, "\"")) filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 2), ".* ", substr(x_out, 3, 4))) } else if (nchar(x_out) <= 6) { # no space and 5-6 characters - probably a code such as STAAUR or ESCCOL first_part <- paste0(substr(x_out, 1, 2), "[a-z]*", substr(x_out, 3, 3)) second_part <- substr(x_out, 4, nchar(x_out)) - msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", gsub("[a-z]*", AMR_env$dots, first_part, fixed = TRUE), " ", second_part, AMR_env$dots, "\"")) + msg <<- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on \"", gsub("[a-z]*", AMR_env$dots, totitle(first_part), fixed = TRUE), " ", second_part, AMR_env$dots, "\"")) filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", first_part, ".* ", second_part)) } else { # for genus or species or subspecies @@ -336,9 +336,10 @@ as.mo <- function(x, # correct back for kingdom minimum_matching_score_current <- minimum_matching_score_current / AMR_env$MO_lookup$kingdom_index[match(mo_to_search, AMR_env$MO_lookup$fullname)] minimum_matching_score_current <- pmax(minimum_matching_score_current, m) - if (length(m) > 1 && all(m <= 0.55, na.rm = TRUE)) { + if (length(x_parts) > 1 && all(m <= 0.55, na.rm = TRUE)) { # if the highest score is 0.5, we have nothing serious - 0.5 is the lowest for pathogenic group 1 # make everything NA so the results will get removed below + # (we added length(x_parts) > 1 to exclude microbial codes from this rule, such as "STAU") m[seq_len(length(m))] <- NA_real_ } } else {