diff --git a/DESCRIPTION b/DESCRIPTION index 6d2ed32f..081d7363 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.7.1.9008 -Date: 2021-06-23 +Version: 1.7.1.9009 +Date: 2021-07-03 Title: Antimicrobial Resistance Data Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index b2814543..622048ba 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ -# `AMR` 1.7.1.9008 -## Last updated: 23 June 2021 +# `AMR` 1.7.1.9009 +## Last updated: 3 July 2021 ### Changed * Antibiotic class selectors (see `ab_class()`) @@ -13,6 +13,8 @@ * When printing a tibble with any old MO code, a warning will be thrown that old codes should be updated using `as.mo()` * Improved automatic column selector when `col_*` arguments are left blank, e.g. in `first_isolate()` * The right input types for `random_mic()`, `random_disk()` and `random_rsi()` are now enforced +* `as.rsi()` can now correct for textual input (such as "Susceptible", "Resistant") in Dutch, English, French, German, Italian, Portuguese and Spanish +* More informative warnings for all `count_*()`, `proportion_*()` functions (and `resistant()` and `susceptible()`) when they return NA because of too few test results. The warnings now include the official drug name and if used, the `dplyr` group name. # `AMR` 1.7.1 diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index b6f2436d..f326e956 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -736,22 +736,22 @@ get_current_data <- function(arg_name, call) { } # try a manual (base R) method, by going over all underlying environments with sys.frames() - for (el in sys.frames()) { - if (!is.null(el$`.Generic`)) { - # don't check `".Generic" %in% names(el)`, because in R < 3.2, `names(el)` is always NULL + for (env in sys.frames()) { + if (!is.null(env$`.Generic`)) { + # don't check `".Generic" %in% names(env)`, because in R < 3.2, `names(env)` is always NULL - if (!is.null(el$`.data`) && is.data.frame(el$`.data`)) { + if (!is.null(env$`.data`) && is.data.frame(env$`.data`)) { # an element `.data` will be in the environment when using `dplyr::select()` # (but not when using `dplyr::filter()`, `dplyr::mutate()` or `dplyr::summarise()`) - return(structure(el$`.data`, type = "dplyr_selector")) + return(structure(env$`.data`, type = "dplyr_selector")) - } else if (!is.null(el$xx) && is.data.frame(el$xx)) { + } else if (!is.null(env$xx) && is.data.frame(env$xx)) { # an element `xx` will be in the environment for rows + cols, e.g. `example_isolates[c(1:3), carbapenems()]` - return(structure(el$xx, type = "base_R")) + return(structure(env$xx, type = "base_R")) - } else if (!is.null(el$x) && is.data.frame(el$x)) { + } else if (!is.null(env$x) && is.data.frame(env$x)) { # an element `x` will be in the environment for only cols, e.g. `example_isolates[, carbapenems()]` - return(structure(el$x, type = "base_R")) + return(structure(env$x, type = "base_R")) } } } @@ -786,19 +786,19 @@ get_current_column <- function() { } } - # cur_column() doesn't always work (only allowed for conditions set by dplyr), but it's probably still possible: - frms <- lapply(sys.frames(), function(el) { - if ("i" %in% names(el)) { - if ("tibble_vars" %in% names(el)) { + # cur_column() doesn't always work (only allowed for certain conditions set by dplyr), but it's probably still possible: + frms <- lapply(sys.frames(), function(env) { + if (!is.null(env$i)) { + if (!is.null(env$tibble_vars)) { # for mutate_if() - el$tibble_vars[el$i] + env$tibble_vars[env$i] } else { # for mutate(across()) df <- tryCatch(get_current_data(NA, 0), error = function(e) NULL) if (is.data.frame(df)) { - colnames(df)[el$i] + colnames(df)[env$i] } else { - el$i + env$i } } } else { @@ -816,7 +816,7 @@ get_current_column <- function() { } is_null_or_grouped_tbl <- function(x) { - # attribute "grouped_df" might change at one point, so only set in one place; here. + # class "grouped_df" might change at one point, so only set in one place; here. is.null(x) || inherits(x, "grouped_df") } @@ -825,7 +825,7 @@ unique_call_id <- function(entire_session = FALSE) { c(envir = "session", call = "session") } else { - # combination of environment ID (like "0x7fed4ee8c848") + # combination of environment ID (such as "0x7fed4ee8c848") # and highest system call call <- paste0(deparse(sys.calls()[[1]]), collapse = "") if (!interactive() || call %like% "run_test_dir|test_all|tinytest|test_package|testthat") { diff --git a/R/ab_class_selectors.R b/R/ab_class_selectors.R index 5b40a715..5cb26415 100644 --- a/R/ab_class_selectors.R +++ b/R/ab_class_selectors.R @@ -299,7 +299,7 @@ ab_selector <- function(function_name, # get the columns with a group names in the chosen ab class agents <- ab_in_data[names(ab_in_data) %in% abx] - if (message_not_thrown_before(function_name)) { + if (message_not_thrown_before(paste0(function_name, ".", paste(pkg_env$get_column_abx.out, collapse = "|")))) { if (length(agents) == 0) { message_("No antimicrobial agents of class '", ab_group, "' found", examples, ".") } else { @@ -315,7 +315,7 @@ ab_selector <- function(function_name, ifelse(length(agents) == 1, "column: ", "columns: "), vector_and(agents_formatted, quotes = FALSE, sort = FALSE)) } - remember_thrown_message(function_name) + remember_thrown_message(paste0(function_name, ".", paste(pkg_env$get_column_abx.out, collapse = "|"))) } if (!is.null(attributes(vars_df)$type) && diff --git a/R/guess_ab_col.R b/R/guess_ab_col.R index e8ddeec7..cc6fc27b 100755 --- a/R/guess_ab_col.R +++ b/R/guess_ab_col.R @@ -97,16 +97,39 @@ guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE, only_r } get_column_abx <- function(x, + ..., soft_dependencies = NULL, hard_dependencies = NULL, verbose = FALSE, info = TRUE, only_rsi_columns = FALSE, sort = TRUE, - ...) { + reuse_previous_result = TRUE) { # check if retrieved before, then get it from package environment - if (identical(unique_call_id(entire_session = FALSE), pkg_env$get_column_abx.call)) { + if (isTRUE(reuse_previous_result) && identical(unique_call_id(entire_session = FALSE), pkg_env$get_column_abx.call)) { + # so within the same call, within the same environment, we got here again. + # but we could've come from another function within the same call, so now only check the columns that changed + + # first remove the columns that are not existing anymore + previous <- pkg_env$get_column_abx.out + current <- previous[previous %in% colnames(x)] + + # then compare columns in current call with columns in original call + new_cols <- colnames(x)[!colnames(x) %in% pkg_env$get_column_abx.checked_cols] + if (length(new_cols) > 0) { + # these columns did not exist in the last call, so add them + new_cols_rsi <- get_column_abx(x[, new_cols, drop = FALSE], reuse_previous_result = FALSE, info = FALSE, sort = FALSE) + current <- c(current, new_cols_rsi) + # order according to data in current call + current <- current[match(colnames(x)[colnames(x) %in% current], current)] + } + + # update pkg environment to improve speed on next run + pkg_env$get_column_abx.out <- current + pkg_env$get_column_abx.checked_cols <- colnames(x) + + # and return right values return(pkg_env$get_column_abx.out) } @@ -123,6 +146,7 @@ get_column_abx <- function(x, } x <- as.data.frame(x, stringsAsFactors = FALSE) + x.bak <- x if (only_rsi_columns == TRUE) { x <- x[, which(is.rsi(x)), drop = FALSE] } @@ -163,8 +187,8 @@ get_column_abx <- function(x, abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)), stringsAsFactors = FALSE) df_trans <- df_trans[!is.na(df_trans$abcode), , drop = FALSE] - x <- as.character(df_trans$colnames) - names(x) <- df_trans$abcode + out <- as.character(df_trans$colnames) + names(out) <- df_trans$abcode # add from self-defined dots (...): # such as get_column_abx(example_isolates %>% rename(thisone = AMX), amox = "thisone") @@ -177,33 +201,34 @@ get_column_abx <- function(x, immediate = TRUE) } # turn all NULLs to NAs - dots <- unlist(lapply(dots, function(x) if (is.null(x)) NA else x)) + dots <- unlist(lapply(dots, function(dot) if (is.null(dot)) NA else dot)) names(dots) <- newnames dots <- dots[!is.na(names(dots))] # merge, but overwrite automatically determined ones by 'dots' - x <- c(x[!x %in% dots & !names(x) %in% names(dots)], dots) + out <- c(out[!out %in% dots & !names(out) %in% names(dots)], dots) # delete NAs, this will make e.g. eucast_rules(... TMP = NULL) work to prevent TMP from being used - x <- x[!is.na(x)] + out <- out[!is.na(out)] } - if (length(x) == 0) { + if (length(out) == 0) { if (info == TRUE) { message_("No columns found.") } pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE) - pkg_env$get_column_abx.out <- x - return(x) + pkg_env$get_column_abx.checked_cols <- colnames(x.bak) + pkg_env$get_column_abx.out <- out + return(out) } # sort on name if (sort == TRUE) { - x <- x[order(names(x), x)] + out <- out[order(names(out), out)] } - duplicates <- c(x[duplicated(x)], x[duplicated(names(x))]) + duplicates <- c(out[duplicated(out)], out[duplicated(names(out))]) duplicates <- duplicates[unique(names(duplicates))] - x <- c(x[!names(x) %in% names(duplicates)], duplicates) + out <- c(out[!names(out) %in% names(duplicates)], duplicates) if (sort == TRUE) { - x <- x[order(names(x), x)] + out <- out[order(names(out), out)] } # succeeded with auto-guessing @@ -211,14 +236,14 @@ get_column_abx <- function(x, message_(" OK.", add_fn = list(font_green, font_bold), as_note = FALSE) } - for (i in seq_len(length(x))) { - if (info == TRUE & verbose == TRUE & !names(x[i]) %in% names(duplicates)) { - message_("Using column '", font_bold(x[i]), "' as input for ", names(x)[i], - " (", ab_name(names(x)[i], tolower = TRUE, language = NULL), ").") + for (i in seq_len(length(out))) { + if (info == TRUE & verbose == TRUE & !names(out[i]) %in% names(duplicates)) { + message_("Using column '", font_bold(out[i]), "' as input for ", names(out)[i], + " (", ab_name(names(out)[i], tolower = TRUE, language = NULL), ").") } - if (info == TRUE & names(x[i]) %in% names(duplicates)) { - warning_(paste0("Using column '", font_bold(x[i]), "' as input for ", names(x)[i], - " (", ab_name(names(x)[i], tolower = TRUE, language = NULL), + if (info == TRUE & names(out[i]) %in% names(duplicates)) { + warning_(paste0("Using column '", font_bold(out[i]), "' as input for ", names(out)[i], + " (", ab_name(names(out)[i], tolower = TRUE, language = NULL), "), although it was matched for multiple antibiotics or columns."), add_fn = font_red, call = FALSE, @@ -228,18 +253,18 @@ get_column_abx <- function(x, if (!is.null(hard_dependencies)) { hard_dependencies <- unique(hard_dependencies) - if (!all(hard_dependencies %in% names(x))) { + if (!all(hard_dependencies %in% names(out))) { # missing a hard dependency will return NA and consequently the data will not be analysed - missing <- hard_dependencies[!hard_dependencies %in% names(x)] + missing <- hard_dependencies[!hard_dependencies %in% names(out)] generate_warning_abs_missing(missing, any = FALSE) return(NA) } } if (!is.null(soft_dependencies)) { soft_dependencies <- unique(soft_dependencies) - if (info == TRUE & !all(soft_dependencies %in% names(x))) { + if (info == TRUE & !all(soft_dependencies %in% names(out))) { # missing a soft dependency may lower the reliability - missing <- soft_dependencies[!soft_dependencies %in% names(x)] + missing <- soft_dependencies[!soft_dependencies %in% names(out)] missing_msg <- vector_and(paste0(ab_name(missing, tolower = TRUE, language = NULL), " (", font_bold(missing, collapse = NULL), ")"), quotes = FALSE) @@ -249,8 +274,9 @@ get_column_abx <- function(x, } pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE) - pkg_env$get_column_abx.out <- x - x + pkg_env$get_column_abx.checked_cols <- colnames(x.bak) + pkg_env$get_column_abx.out <- out + out } generate_warning_abs_missing <- function(missing, any = FALSE) { diff --git a/R/rsi.R b/R/rsi.R index 308c4274..ba20c141 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -284,10 +284,26 @@ as.rsi.default <- function(x, ...) { } } - x <- as.character(unlist(x)) + # trim leading and trailing spaces, new lines, etc. + x <- trimws2(as.character(unlist(x))) x.bak <- x - na_before <- length(x[is.na(x) | x == ""]) + + # correct for translations + trans_R <- unlist(TRANSLATIONS[which(TRANSLATIONS$pattern == "Resistant"), + LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]]) + trans_S <- unlist(TRANSLATIONS[which(TRANSLATIONS$pattern == "Susceptible"), + LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]]) + trans_I <- unlist(TRANSLATIONS[which(TRANSLATIONS$pattern %in% c("Incr. exposure", "Intermediate")), + LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]]) + x <- gsub(paste0(unique(trans_R[!is.na(trans_R)]), collapse = "|"), "R", x, ignore.case = TRUE) + x <- gsub(paste0(unique(trans_S[!is.na(trans_S)]), collapse = "|"), "S", x, ignore.case = TRUE) + x <- gsub(paste0(unique(trans_I[!is.na(trans_I)]), collapse = "|"), "I", x, ignore.case = TRUE) + # replace all English textual input + x <- gsub("res(is(tant)?)?", "R", x, ignore.case = TRUE) + x <- gsub("sus(cep(tible)?)?", "S", x, ignore.case = TRUE) + x <- gsub("int(er(mediate)?)?", "I", x, ignore.case = TRUE) + x <- gsub("inc(r(eased)?)? exp[a-z]*", "I", x, ignore.case = TRUE) # remove all spaces x <- gsub(" +", "", x) # remove all MIC-like values: numbers, operators and periods diff --git a/R/rsi_calc.R b/R/rsi_calc.R index 30203020..fdcddde8 100755 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -27,7 +27,12 @@ dots2vars <- function(...) { # this function is to give more informative output about # variable names in count_* and proportion_* functions dots <- substitute(list(...)) - vector_and(as.character(dots)[2:length(dots)], quotes = FALSE) + agents <- as.character(dots)[2:length(dots)] + agents_formatted <- paste0("'", font_bold(agents, collapse = NULL), "'") + agents_names <- ab_name(agents, tolower = TRUE, language = NULL) + need_name <- generalise_antibiotic_name(agents) != agents_names + agents_formatted[need_name] <- paste0(agents_formatted[need_name], " (", agents_names[need_name], ")") + vector_and(agents_formatted, quotes = FALSE) } rsi_calc <- function(..., @@ -163,8 +168,30 @@ rsi_calc <- function(..., if (denominator < minimum) { if (data_vars != "") { data_vars <- paste(" for", data_vars) + # also add group name if used in dplyr::group_by() + cur_group <- import_fn("cur_group", "dplyr", error_on_fail = FALSE) + if (!is.null(cur_group)) { + group_df <- tryCatch(cur_group(), error = function(e) data.frame()) + if (NCOL(group_df) > 0) { + # transform factors to characters + group <- vapply(FUN.VALUE = character(1), group_df, function(x) { + if (is.numeric(x)) { + format(x) + } else if (is.logical(x)) { + as.character(x) + } else { + paste0('"', x, '"') + } + }) + data_vars <- paste0(data_vars, " in group: ", paste0(names(group), " = ", group, collapse = ", ")) + } + } } - warning_("Introducing NA: only ", denominator, " results available", data_vars, " (`minimum` = ", minimum, ").", call = FALSE) + warning_("Introducing NA: ", + ifelse(denominator == 0, "no", paste("only", denominator)), + " results available", + data_vars, + " (`minimum` = ", minimum, ").", call = FALSE) fraction <- NA_real_ } else { fraction <- numerator / denominator diff --git a/R/sysdata.rda b/R/sysdata.rda index f833060e..8ab09c8f 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/data-raw/AMR_latest.tar.gz b/data-raw/AMR_latest.tar.gz index d00bd402..905a430a 100644 Binary files a/data-raw/AMR_latest.tar.gz and b/data-raw/AMR_latest.tar.gz differ diff --git a/docs/404.html b/docs/404.html index 0290928a..d716e6cb 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.7.1.9008 + 1.7.1.9009 @@ -89,14 +89,14 @@