diff --git a/DESCRIPTION b/DESCRIPTION index 5f97fa71..9d2d9dbe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.4.0.9029 -Date: 2020-12-01 +Version: 1.4.0.9030 +Date: 2020-12-03 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index b51ee582..3f014348 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ -# AMR 1.4.0.9029 -## Last updated: 1 December 2020 +# AMR 1.4.0.9030 +## Last updated: 3 December 2020 ### New * Function `is_new_episode()` to determine patient episodes which are not necessarily based on microorganisms. It also supports grouped variables with e.g. `mutate()`, `filter()` and `summarise()` of the `dplyr` package: @@ -30,6 +30,8 @@ * Fix for using parameter `reference_df` in `as.mo()` and `mo_*()` functions that contain old microbial codes (from previous package versions) * Fix for using `as.rsi()` on a data.frame in older R versions * `as.rsi()` on a data.frame will not print a message anymore if the values are already clean R/SI values +* Fixed a bug where `mo_uncertainties()` would not return the results based on the MO matching score +* Fixed a bug where `as.mo()` would not return results for known laboratory codes for microorganisms ### Other * All messages and warnings thrown by this package now break sentences on whole words diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index 453d6d17..a819526b 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -146,7 +146,7 @@ search_type_in_df <- function(x, type, info = TRUE) { # WHONET support found <- sort(colnames(x)[colnames(x) %like% "^(specimen date|specimen_date|spec_date)"])[1] if (!any(class(pm_pull(x, found)) %in% c("Date", "POSIXct"))) { - stop(font_red(paste0("ERROR: Found column `", font_bold(found), "` to be used as input for `col_", type, + stop(font_red(paste0("Found column '", font_bold(found), "' to be used as input for `col_", type, "`, but this column contains no valid dates. Transform its values to valid dates first.")), call. = FALSE) } @@ -178,7 +178,7 @@ search_type_in_df <- function(x, type, info = TRUE) { if (!is.null(found)) { # this column should contain logicals if (!is.logical(x[, found, drop = TRUE])) { - message_("Column `", font_bold(found), "` found as input for `col_", type, + message_("Column '", font_bold(found), "' found as input for `col_", type, "`, but this column does not contain 'logical' values (TRUE/FALSE) and was ignored.", add_fn = font_red) found <- NULL @@ -187,7 +187,7 @@ search_type_in_df <- function(x, type, info = TRUE) { } if (!is.null(found) & info == TRUE) { - msg <- paste0("Using column `", font_bold(found), "` as input for `col_", type, "`.") + msg <- paste0("Using column '", found, "' as input for `col_", type, "`.") if (type %in% c("keyantibiotics", "specimen")) { msg <- paste(msg, "Use", font_bold(paste0("col_", type), "= FALSE"), "to prevent this.") } diff --git a/R/eucast_rules.R b/R/eucast_rules.R index 97d9e79e..5a9fa5dc 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -454,7 +454,7 @@ eucast_rules <- function(x, if (ab_missing(AMP) & !ab_missing(AMX)) { # ampicillin column is missing, but amoxicillin is available if (info == TRUE) { - message_("Using column `", font_bold(AMX), "` as input for ampicillin since many EUCAST rules depend on it.") + message_("Using column '", font_bold(AMX), "' as input for ampicillin since many EUCAST rules depend on it.") } AMP <- AMX } diff --git a/R/first_isolate.R b/R/first_isolate.R index cb73145a..58705639 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -209,7 +209,7 @@ first_isolate <- function(x, # WHONET support x$patient_id <- paste(x$`First name`, x$`Last name`, x$Sex) col_patient_id <- "patient_id" - message_("Using combined columns `", font_bold("First name"), "`, `", font_bold("Last name"), "` and `", font_bold("Sex"), "` as input for `col_patient_id`") + message_("Using combined columns '", font_bold("First name"), "', '", font_bold("Last name"), "' and '", font_bold("Sex"), "' as input for `col_patient_id`") } else { col_patient_id <- search_type_in_df(x = x, type = "patient_id") } diff --git a/R/guess_ab_col.R b/R/guess_ab_col.R index 30380c6c..22c39fb6 100755 --- a/R/guess_ab_col.R +++ b/R/guess_ab_col.R @@ -44,7 +44,7 @@ #' # [1] "tetr" #' #' guess_ab_col(df, "J01AA07", verbose = TRUE) -#' # NOTE: Using column `tetr` as input for `J01AA07` (tetracycline). +#' # NOTE: Using column 'tetr' as input for J01AA07 (tetracycline). #' # [1] "tetr" #' #' # WHONET codes @@ -94,16 +94,16 @@ guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE) { if (length(ab_result) == 0) { if (verbose == TRUE) { - message_("No column found as input for `", search_string, - "` (", ab_name(search_string, language = NULL, tolower = TRUE), ").", + message_("No column found as input for ", search_string, + " (", ab_name(search_string, language = NULL, tolower = TRUE), ").", add_fn = font_black, as_note = FALSE) } return(NULL) } else { if (verbose == TRUE) { - message_("Using column `", font_bold(ab_result), "` as input for `", search_string, - "` (", ab_name(search_string, language = NULL, tolower = TRUE), ").") + message_("Using column '", font_bold(ab_result), "' as input for ", search_string, + " (", ab_name(search_string, language = NULL, tolower = TRUE), ").") } return(ab_result) } @@ -204,12 +204,12 @@ get_column_abx <- function(x, for (i in seq_len(length(x))) { if (info == TRUE & verbose == TRUE & !names(x[i]) %in% names(duplicates)) { - message_("Using column `", font_bold(x[i]), "` as input for `", names(x)[i], - "` (", ab_name(names(x)[i], tolower = TRUE, language = NULL), ").") + message_("Using column '", font_bold(x[i]), "' as input for ", names(x)[i], + " (", ab_name(names(x)[i], tolower = TRUE, language = NULL), ").") } if (info == TRUE & names(x[i]) %in% names(duplicates)) { - warning_(paste0("Using column `", font_bold(x[i]), "` as input for `", names(x)[i], - "` (", ab_name(names(x)[i], tolower = TRUE, language = NULL), + warning_(paste0("Using column '", font_bold(x[i]), "' as input for ", names(x)[i], + " (", ab_name(names(x)[i], tolower = TRUE, language = NULL), "), although it was matched for multiple antibiotics or columns."), add_fn = font_red, call = FALSE, diff --git a/R/mo.R b/R/mo.R index 62096c23..08be2ee5 100755 --- a/R/mo.R +++ b/R/mo.R @@ -636,9 +636,7 @@ exec_as.mo <- function(x, } # WHONET and other common LIS codes ---- - found <- lookup(code %in% toupper(c(x_backup_untouched[i], x_backup[i], x_backup_without_spp[i])), - column = "mo", - haystack = microorganisms.codes) + found <- microorganisms.codes[which(microorganisms.codes$code %in% toupper(c(x_backup_untouched[i], x_backup[i], x_backup_without_spp[i]))), "mo", drop = TRUE][1L] if (!is.na(found)) { x[i] <- lookup(mo == found) next @@ -893,10 +891,12 @@ exec_as.mo <- function(x, } # try any match keeping spaces ---- - found <- lookup(fullname_lower %like_case% d.x_withspaces_start_end, - haystack = data_to_check) - if (!is.na(found) & nchar(g.x_backup_without_spp) >= 6) { - return(found[1L]) + if (nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% d.x_withspaces_start_end, + haystack = data_to_check) + if (!is.na(found)) { + return(found[1L]) + } } # try any match keeping spaces, not ending with $ ---- @@ -905,10 +905,12 @@ exec_as.mo <- function(x, if (!is.na(found)) { return(found[1L]) } - found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, - haystack = data_to_check) - if (!is.na(found) & nchar(g.x_backup_without_spp) >= 6) { - return(found[1L]) + if (nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, + haystack = data_to_check) + if (!is.na(found)) { + return(found[1L]) + } } # try any match keeping spaces, not start with ^ ---- @@ -919,14 +921,16 @@ exec_as.mo <- function(x, } # try a trimmed version - found <- lookup(fullname_lower %like_case% b.x_trimmed | - fullname_lower %like_case% c.x_trimmed_without_group, - haystack = data_to_check) - if (!is.na(found) & nchar(g.x_backup_without_spp) >= 6) { - return(found[1L]) + if (nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% b.x_trimmed | + fullname_lower %like_case% c.x_trimmed_without_group, + haystack = data_to_check) + if (!is.na(found)) { + return(found[1L]) + } } - - + + # try splitting of characters in the middle and then find ID ---- # only when text length is 6 or lower # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus @@ -1313,14 +1317,16 @@ exec_as.mo <- function(x, if (isTRUE(debug)) { message("Running '", f.x_withspaces_end_only, "'") } - found <- lookup(fullname_lower %like_case% f.x_withspaces_end_only, column = "mo") - if (!is.na(found) & nchar(g.x_backup_without_spp) >= 6) { - found_result <- lookup(mo == found) - uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) - found <- lookup(mo == found) - return(found) + if (nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% f.x_withspaces_end_only, column = "mo") + if (!is.na(found)) { + found_result <- lookup(mo == found) + uncertainties <<- rbind(uncertainties, + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE) + found <- lookup(mo == found) + return(found) + } } } diff --git a/R/mo_property.R b/R/mo_property.R index d375ff73..e88e8a82 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -383,9 +383,10 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_locale(), ...) { ab <- rep(ab, length(x)) } if (length(x) != length(ab)) { - stop_("length of 'x' and 'ab' must be equal, or one of them must be of length 1.") + stop_("length of `x` and `ab` must be equal, or one of them must be of length 1.") } + # this saves about 50% in calculation time intrinsic_to_check <- intrinsic_resistant[which(intrinsic_resistant$microorganism %in% x | intrinsic_resistant$antibiotic %in% ab), , drop = FALSE] paste(x, ab) %in% paste(intrinsic_to_check$microorganism, intrinsic_to_check$antibiotic) @@ -618,12 +619,12 @@ find_mo_col <- function(fn) { mo <- suppressMessages(search_type_in_df(df, "mo")) }, silent = TRUE) if (!is.null(df) && !is.null(mo) && is.data.frame(df)) { - message_("Using column `", font_bold(mo), "` as input for ", fn, "()") + message_("Using column '", font_bold(mo), "' as input for ", fn, "()") return(df[, mo, drop = TRUE]) } else { - stop_("Argument 'x' is missing and no column with info about microorganisms could be found.", call = -2) + stop_("Argument `x` is missing and no column with info about microorganisms could be found.", call = -2) } } else { - stop_("Argument 'x' is missing.", call = -2) + stop_("Argument `x` is missing.", call = -2) } } diff --git a/R/rsi.R b/R/rsi.R index 05a5afd9..87b47526 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -1087,11 +1087,11 @@ check_reference_data <- function(reference_data) { class_rsi <- sapply(rsi_translation, function(x) paste0("<", class(x), ">", collapse = " and ")) class_ref <- sapply(reference_data, function(x) paste0("<", class(x), ">", collapse = " and ")) if (!all(names(class_rsi) == names(class_ref))) { - stop_("'reference_data' must have the same column names as the 'rsi_translation' data set.", call = -2) + stop_("`reference_data` must have the same column names as the 'rsi_translation' data set.", call = -2) } if (!all(class_rsi == class_ref)) { class_rsi[class_rsi != class_ref][1] - stop_("'reference_data' must be the same structure as the 'rsi_translation' data set. Column '", names(class_ref[class_rsi != class_ref][1]), "' is of class ", class_ref[class_rsi != class_ref][1], ", but should be of class ", class_rsi[class_rsi != class_ref][1], ".", call = -2) + stop_("`reference_data` must be the same structure as the 'rsi_translation' data set. Column '", names(class_ref[class_rsi != class_ref][1]), "' is of class ", class_ref[class_rsi != class_ref][1], ", but should be of class ", class_rsi[class_rsi != class_ref][1], ".", call = -2) } } } diff --git a/docs/404.html b/docs/404.html index b032174b..37052d8d 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index ce530dae..fd0428c5 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html index b8c3637f..2cf032be 100644 --- a/docs/articles/benchmarks.html +++ b/docs/articles/benchmarks.html @@ -39,7 +39,7 @@ @@ -187,8 +187,7 @@ - -So getting official taxonomic names of 2,000,000 (!!) items consisting of 90 unique values only takes 0.331 seconds. You only lose time on your unique input values.
+# mo_name(x) 165 176 223 202 274 318 10So getting official taxonomic names of 2,000,000 (!!) items consisting of 90 unique values only takes 0.202 seconds. You only lose time on your unique input values.
So going from mo_name("Staphylococcus aureus")
to "Staphylococcus aureus"
takes 0.0041 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:
So going from mo_name("Staphylococcus aureus")
to "Staphylococcus aureus"
takes 0.0024 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:
run_it <- microbenchmark(A = mo_species("aureus"), B = mo_genus("Staphylococcus"), @@ -317,14 +316,14 @@ print(run_it, unit = "ms", signif = 3) # Unit: milliseconds # expr min lq mean median uq max neval -# A 3.62 3.62 3.66 3.64 3.67 3.80 10 -# B 3.57 3.59 3.68 3.64 3.72 4.01 10 -# C 3.63 3.64 3.65 3.64 3.67 3.71 10 -# D 3.57 3.58 3.62 3.61 3.67 3.69 10 -# E 3.56 3.57 3.60 3.59 3.60 3.68 10 -# F 3.55 3.58 3.73 3.65 3.85 4.01 10 -# G 3.54 3.56 3.62 3.58 3.61 3.93 10 -# H 3.53 3.57 3.59 3.58 3.63 3.66 10
Of course, when running mo_phylum("Firmicutes")
the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes"
anyway, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.
Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.
diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png index 4492f129..707e0abc 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index c51e4bbe..8b3e7c78 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ diff --git a/docs/authors.html b/docs/authors.html index 4be138d9..ec320917 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ diff --git a/docs/index.html b/docs/index.html index 3a9abf89..b86ab004 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ diff --git a/docs/news/index.html b/docs/news/index.html index 423bad78..df543dd8 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ @@ -236,13 +236,13 @@ Source:NEWS.md
- as.rsi()
on a data.frame in older R versionsas.rsi()
on a data.frame will not print a message anymore if the values are already clean R/SI valuesmo_uncertainties()
would not return the results based on the MO matching scoreas.mo()
would not return results for known laboratory codes for microorganisms