diff --git a/.github/workflows/check-current.yaml b/.github/workflows/check-current.yaml index 261cca9c..3f1b3715 100644 --- a/.github/workflows/check-current.yaml +++ b/.github/workflows/check-current.yaml @@ -58,6 +58,9 @@ jobs: - {os: ubuntu-latest, r: 'release', allowfail: false} - {os: windows-latest, r: 'devel', allowfail: false} - {os: windows-latest, r: 'release', allowfail: false} + - {os: macOS-latest, r: '3.6', allowfail: false} + - {os: ubuntu-latest, r: '3.6', allowfail: false} + - {os: windows-latest, r: '3.6', allowfail: false} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/check-old.yaml b/.github/workflows/check-old.yaml index 58bfe1a9..58548c46 100644 --- a/.github/workflows/check-old.yaml +++ b/.github/workflows/check-old.yaml @@ -59,6 +59,7 @@ jobs: - {os: ubuntu-22.04, r: '4.0', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"} - {os: ubuntu-22.04, r: '3.6', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"} # R 3.5 returns a strange GC error when running examples, omit the checks for that + # - {os: ubuntu-22.04, r: '3.5', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"} - {os: ubuntu-22.04, r: '3.4', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"} - {os: ubuntu-22.04, r: '3.3', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"} - {os: ubuntu-22.04, r: '3.2', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"} diff --git a/DESCRIPTION b/DESCRIPTION index d159ca81..a909b844 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.8.2.9076 -Date: 2022-12-30 +Version: 1.8.2.9077 +Date: 2023-01-05 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by diff --git a/NEWS.md b/NEWS.md index d204913d..58d290bc 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# 1.8.2.9076 +# 1.8.2.9077 *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)* diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index 9155b6bf..469d7ee7 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -966,7 +966,7 @@ unique_call_id <- function(entire_session = FALSE, match_fn = NULL) { # and relevant system call (where 'match_fn' is being called in) calls <- sys.calls() in_test <- any(as.character(calls[[1]]) %like_case% "run_test_dir|run_test_file|test_all|tinytest|test_package|testthat", na.rm = TRUE) - if (!isTRUE(in_test)) { + if (!isTRUE(in_test) && !is.null(match_fn)) { for (i in seq_len(length(calls))) { call_clean <- gsub("[^a-zA-Z0-9_().-]", "", as.character(calls[[i]]), perl = TRUE) if (match_fn %in% call_clean || any(call_clean %like% paste0(match_fn, "\\("), na.rm = TRUE)) { @@ -1262,6 +1262,7 @@ create_pillar_column <- function(x, ...) { as_original_data_class <- function(df, old_class = NULL) { if ("tbl_df" %in% old_class && pkg_is_available("tibble", also_load = FALSE)) { + # this will then also remove groups fn <- import_fn("as_tibble", "tibble") } else if ("tbl_ts" %in% old_class && pkg_is_available("tsibble", also_load = FALSE)) { fn <- import_fn("as_tsibble", "tsibble") @@ -1270,7 +1271,7 @@ as_original_data_class <- function(df, old_class = NULL) { } else if ("tabyl" %in% old_class && pkg_is_available("janitor", also_load = FALSE)) { fn <- import_fn("as_tabyl", "janitor") } else { - fn <- base::as.data.frame + fn <- function(x) base::as.data.frame(df, stringsAsFactors = FALSE) } fn(df) } diff --git a/R/ab_property.R b/R/ab_property.R index 57a98b5d..6ed83dbe 100644 --- a/R/ab_property.R +++ b/R/ab_property.R @@ -36,7 +36,7 @@ #' @param language language of the returned text, defaults to system language (see [get_AMR_locale()]) and can also be set with `getOption("AMR_locale")`. Use `language = NULL` or `language = ""` to prevent translation. #' @param administration way of administration, either `"oral"` or `"iv"` #' @param open browse the URL using [utils::browseURL()] -#' @param ... in case of [set_ab_names()] and `data` is a [data.frame]: variables to select (supports tidy selection such as `column1:column4`), otherwise other arguments passed on to [as.ab()] +#' @param ... in case of [set_ab_names()] and `data` is a [data.frame]: columns to select (supports tidy selection such as `column1:column4`), otherwise other arguments passed on to [as.ab()] #' @param data a [data.frame] of which the columns need to be renamed, or a [character] vector of column names #' @param snake_case a [logical] to indicate whether the names should be in so-called [snake case](https://en.wikipedia.org/wiki/Snake_case): in lower case and all spaces/slashes replaced with an underscore (`_`) #' @param only_first a [logical] to indicate whether only the first ATC code must be returned, with giving preference to J0-codes (i.e., the antimicrobial drug group) diff --git a/R/bug_drug_combinations.R b/R/bug_drug_combinations.R index a896b921..01574bdc 100644 --- a/R/bug_drug_combinations.R +++ b/R/bug_drug_combinations.R @@ -161,7 +161,7 @@ bug_drug_combinations <- function(x, out <- run_it(x) } rownames(out) <- NULL - out <- as_original_data_class(out, class(x.bak)) + out <- as_original_data_class(out, class(x.bak)) # will remove tibble groups structure(out, class = c("bug_drug_combinations", ifelse(data_has_groups, "grouped", character(0)), class(out))) } @@ -322,7 +322,7 @@ format.bug_drug_combinations <- function(x, } rownames(y) <- NULL - as_original_data_class(y, class(x.bak)) + as_original_data_class(y, class(x.bak)) # will remove tibble groups } #' @method print bug_drug_combinations diff --git a/R/custom_eucast_rules.R b/R/custom_eucast_rules.R index ce96d115..8f3e8267 100644 --- a/R/custom_eucast_rules.R +++ b/R/custom_eucast_rules.R @@ -77,7 +77,7 @@ #' #' ### Using taxonomic properties in rules #' -#' There is one exception in variables used for the rules: all column names of the [microorganisms] data set can also be used, but do not have to exist in the data set. These column names are: `r vector_and(colnames(microorganisms), sort = FALSE)`. Thus, this next example will work as well, despite the fact that the `df` data set does not contain a column `genus`: +#' There is one exception in columns used for the rules: all column names of the [microorganisms] data set can also be used, but do not have to exist in the data set. These column names are: `r vector_and(colnames(microorganisms), sort = FALSE)`. Thus, this next example will work as well, despite the fact that the `df` data set does not contain a column `genus`: #' #' ```r #' y <- custom_eucast_rules(TZP == "S" & genus == "Klebsiella" ~ aminopenicillins == "S", diff --git a/R/eucast_rules.R b/R/eucast_rules.R index 3bba72f5..14045934 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -1035,7 +1035,7 @@ eucast_rules <- function(x, # Return data set --------------------------------------------------------- if (isTRUE(verbose)) { - as_original_data_class(verbose_info, old_attributes$class) + as_original_data_class(verbose_info, old_attributes$class) # will remove tibble groups } else { # x was analysed with only unique rows, so join everything together again x <- x[, c(cols_ab, ".rowid"), drop = FALSE] @@ -1043,8 +1043,9 @@ eucast_rules <- function(x, x.bak <- x.bak %pm>% pm_left_join(x, by = ".rowid") x.bak <- x.bak[, old_cols, drop = FALSE] - # reset original attributes, no need for as_original_data_class() here + # reset original attributes attributes(x.bak) <- old_attributes + x.bak <- as_original_data_class(x.bak, old_class = class(x.bak)) # will remove tibble groups x.bak } } diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R index 7194c160..d6b75fd0 100755 --- a/R/join_microorganisms.R +++ b/R/join_microorganisms.R @@ -185,5 +185,5 @@ join_microorganisms <- function(type, x, by, suffix, ...) { warning_("in `", type, "_microorganisms()`: the newly joined data set contains ", nrow(joined) - nrow(x), " rows more than the number of rows of `x`.") } - as_original_data_class(joined, class(x.bak)) + as_original_data_class(joined, class(x.bak)) # will remove tibble groups } diff --git a/R/mean_amr_distance.R b/R/mean_amr_distance.R index e2493a83..591c6bfc 100644 --- a/R/mean_amr_distance.R +++ b/R/mean_amr_distance.R @@ -30,30 +30,40 @@ #' Calculate the Mean AMR Distance #' #' Calculates a normalised mean for antimicrobial resistance between multiple observations, to help to identify similar isolates without comparing antibiograms by hand. -#' @param x a vector of class [rsi][as.rsi()], [rsi][as.rsi()] or [rsi][as.rsi()], or a [data.frame] containing columns of any of these classes +#' @param x a vector of class [rsi][as.rsi()], [mic][as.mic()] or [disk][as.disk()], or a [data.frame] containing columns of any of these classes #' @param ... variables to select (supports [tidyselect language][tidyselect::language] such as `column1:column4` and `where(is.mic)`, and can thus also be [antibiotic selectors][ab_selector()] #' @param combine_SI a [logical] to indicate whether all values of S and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant), defaults to `TRUE` -#' @details The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand. For common numeric data this distance is equal to [Z scores](https://en.wikipedia.org/wiki/Standard_score) (the number of standard deviations from the mean). +#' @details The mean AMR distance is effectively [the Z-score](https://en.wikipedia.org/wiki/Standard_score); a normalised numeric value to compare AMR test results which can help to identify similar isolates, without comparing antibiograms by hand. #' -#' MIC values (see [as.mic()]) are transformed with [log2()] first; their distance is calculated as `(log2(x) - mean(log2(x))) / sd(log2(x))`. +#' MIC values (see [as.mic()]) are transformed with [log2()] first; their distance is thus calculated as `(log2(x) - mean(log2(x))) / sd(log2(x))`. #' #' R/SI values (see [as.rsi()]) are transformed using `"S"` = 1, `"I"` = 2, and `"R"` = 3. If `combine_SI` is `TRUE` (default), the `"I"` will be considered to be 1. #' -#' For data sets, the mean AMR distance will be calculated per variable, after which the mean of all columns will returned per row (using [rowMeans()]), see *Examples*. +#' For data sets, the mean AMR distance will be calculated per column, after which the mean per row will be returned, see *Examples*. #' #' Use [amr_distance_from_row()] to subtract distances from the distance of one row, see *Examples*. #' @section Interpretation: #' Isolates with distances less than 0.01 difference from each other should be considered similar. Differences lower than 0.025 should be considered suspicious. #' @export #' @examples -#' x <- random_mic(10) -#' x -#' mean_amr_distance(x) +#' rsi <- random_rsi(10) +#' rsi +#' mean_amr_distance(rsi) +#' +#' mic <- random_mic(10) +#' mic +#' mean_amr_distance(mic) +#' # equal to the Z-score of their log2: +#' (log2(mic) - mean(log2(mic))) / sd(log2(mic)) +#' +#' disk <- random_disk(10) +#' disk +#' mean_amr_distance(disk) #' #' y <- data.frame( #' id = LETTERS[1:10], -#' amox = random_mic(10, ab = "amox", mo = "Escherichia coli"), -#' cipr = random_mic(10, ab = "cipr", mo = "Escherichia coli"), +#' amox = random_rsi(10, ab = "amox", mo = "Escherichia coli"), +#' cipr = random_disk(10, ab = "cipr", mo = "Escherichia coli"), #' gent = random_mic(10, ab = "gent", mo = "Escherichia coli"), #' tobr = random_mic(10, ab = "tobr", mo = "Escherichia coli") #' ) @@ -65,7 +75,7 @@ #' if (require("dplyr")) { #' y %>% #' mutate( -#' amr_distance = mean_amr_distance(., where(is.mic)), +#' amr_distance = mean_amr_distance(y), #' check_id_C = amr_distance_from_row(amr_distance, id == "C") #' ) %>% #' arrange(check_id_C) @@ -76,8 +86,8 @@ #' filter(mo_genus() == "Enterococcus" & mo_species() != "") %>% #' select(mo, TCY, carbapenems()) %>% #' group_by(mo) %>% -#' mutate(d = mean_amr_distance(., where(is.rsi))) %>% -#' arrange(mo, d) +#' mutate(dist = mean_amr_distance(.)) %>% +#' arrange(mo, dist) #' } mean_amr_distance <- function(x, ...) { UseMethod("mean_amr_distance") @@ -87,6 +97,7 @@ mean_amr_distance <- function(x, ...) { #' @export mean_amr_distance.default <- function(x, ...) { x <- as.double(x) + # calculate z-score (x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE) } @@ -120,6 +131,7 @@ mean_amr_distance.data.frame <- function(x, ..., combine_SI = TRUE) { if (is_null_or_grouped_tbl(df)) { df <- get_current_data("x", -2) } + df <- as.data.frame(df, stringsAsFactors = FALSE) if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) { out <- tryCatch(suppressWarnings(c(...)), error = function(e) NULL) if (!is.null(out)) { @@ -128,13 +140,18 @@ mean_amr_distance.data.frame <- function(x, ..., combine_SI = TRUE) { df <- pm_select(df, ...) } } + df_classes <- colnames(df)[vapply(FUN.VALUE = logical(1), df, function(x) is.disk(x) | is.mic(x) | is.disk(x), USE.NAMES = FALSE)] + df_antibiotics <- unname(get_column_abx(df, info = FALSE)) + df <- df[, colnames(df)[colnames(df) %in% union(df_classes, df_antibiotics)], drop = FALSE] + stop_if(ncol(df) < 2, "data set must contain at least two variables", call = -2 ) if (message_not_thrown_before("mean_amr_distance", "groups")) { - message_("Calculating mean AMR distance based on columns ", vector_and(colnames(df))) + message_("Calculating mean AMR distance based on columns ", vector_and(colnames(df), sort = FALSE)) } + res <- vapply( FUN.VALUE = double(nrow(df)), df, @@ -149,7 +166,7 @@ mean_amr_distance.data.frame <- function(x, ..., combine_SI = TRUE) { } } res <- rowMeans(res, na.rm = TRUE) - res[is.infinite(res)] <- 0 + res[is.infinite(res) | is.nan(res)] <- 0 res } diff --git a/R/resistance_predict.R b/R/resistance_predict.R index 71be3380..da0ce7de 100755 --- a/R/resistance_predict.R +++ b/R/resistance_predict.R @@ -274,7 +274,7 @@ resistance_predict <- function(x, df_prediction$value <- ifelse(df_prediction$value > 1, 1, pmax(df_prediction$value, 0)) df_prediction <- df_prediction[order(df_prediction$year), , drop = FALSE] - out <- as_original_data_class(df_prediction, class(x.bak)) + out <- as_original_data_class(df_prediction, class(x.bak)) # will remove tibble groups structure(out, class = c("resistance_predict", class(out)), I_as_S = I_as_S, diff --git a/R/rsi_calc.R b/R/rsi_calc.R index 5efbc462..cc8a6510 100755 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -371,6 +371,6 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" } rownames(out) <- NULL - out <- as_original_data_class(out, class(data.bak)) + out <- as_original_data_class(out, class(data.bak)) # will remove tibble groups structure(out, class = c("rsi_df", class(out))) }