diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml index 1fda8806..9566ed24 100644 --- a/.github/workflows/check.yaml +++ b/.github/workflows/check.yaml @@ -156,4 +156,4 @@ jobs: uses: actions/upload-artifact@v2 with: name: artifacts-${{ matrix.config.os }}-r${{ matrix.config.r }} - path: ${{ github.workspace }}/AMR.Rcheck + path: /home/runner/work/AMR.Rcheck diff --git a/DESCRIPTION b/DESCRIPTION index 7d4bdd5c..d301033e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.8.1.9046 -Date: 2022-08-29 +Version: 1.8.1.9047 +Date: 2022-08-30 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by @@ -13,7 +13,7 @@ Authors@R: c( person(family = "Hassing", c("Erwin", "E.", "A."), role = c("aut", "ctb")), person(family = "Albers", c("Casper", "J."), role = "ths", comment = c(ORCID = "0000-0002-9213-6743")), person(family = "Dutey-Magni", c("Peter"), role = "ctb", comment = c(ORCID = "0000-0002-8942-9836")), - person(family = "Fonville", c("Judith", "M"), role = "ctb"), + person(family = "Fonville", c("Judith", "M."), role = "ctb"), person(family = "Friedrich", c("Alex", "W."), role = "ths", comment = c(ORCID = "0000-0003-4881-038X")), person(family = "Glasner", c("Corinna"), role = "ths", comment = c(ORCID = "0000-0003-1241-1328")), person(family = "Hazenberg", c("Eric", "H.", "L.", "C.", "M."), role = "ctb"), diff --git a/NAMESPACE b/NAMESPACE index 71da678e..5044c894 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -101,6 +101,11 @@ S3method(log1p,mic) S3method(log2,mic) S3method(max,mic) S3method(mean,mic) +S3method(mean_amr_distance,data.frame) +S3method(mean_amr_distance,default) +S3method(mean_amr_distance,disk) +S3method(mean_amr_distance,mic) +S3method(mean_amr_distance,rsi) S3method(median,mic) S3method(min,mic) S3method(plot,disk) @@ -220,6 +225,7 @@ export(count_resistant) export(count_susceptible) export(custom_eucast_rules) export(custom_mdro_guideline) +export(distance_from_row) export(eucast_dosage) export(eucast_exceptional_phenotypes) export(eucast_rules) @@ -259,6 +265,7 @@ export(macrolides) export(mdr_cmi2012) export(mdr_tb) export(mdro) +export(mean_amr_distance) export(mo_authors) export(mo_class) export(mo_domain) diff --git a/NEWS.md b/NEWS.md index 49406de8..2cc040e6 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,8 @@ -# AMR 1.8.1.9046 +# AMR 1.8.1.9047 ### New * EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations. +* Function to calculate the mean AMR distance: `mean_amr_distance()`. The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand. * Support for `data.frame`-enhancing R packages, more specifically: `data.table`, `tibble`, and `tsibble`. AMR package functions that have a data set as output (such as `rsi_df()` and `bug_drug_combinations()`), will now return the same data type as the input. Furthermore, all our data sets are now in `tibble` format. * Our data sets are now also continually exported to Apache Feather and Apache Parquet formats. You can find more info [in this article on our website](https://msberends.github.io/AMR/articles/datasets.html). * Support for the following languages: Chinese, Greek, Japanese, Polish, Turkish and Ukrainian. We are very grateful for the valuable input by our colleagues from other countries. The `AMR` package is now available in 16 languages. diff --git a/R/ab_property.R b/R/ab_property.R index 67cd9e81..91028128 100644 --- a/R/ab_property.R +++ b/R/ab_property.R @@ -374,7 +374,12 @@ set_ab_names <- function(data, ..., property = "name", language = get_AMR_locale if (is.data.frame(data)) { if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) { - df <- pm_select(data, ...) + out <- tryCatch(suppressWarnings(c(...)), error = function(e) NULL) + if (!is.null(out)) { + df <- data[, out, drop = FALSE] + } else { + df <- pm_select(data, ...) + } } else { df <- data } diff --git a/R/mean_amr_distance.R b/R/mean_amr_distance.R new file mode 100644 index 00000000..f8a7c592 --- /dev/null +++ b/R/mean_amr_distance.R @@ -0,0 +1,163 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Data Analysis for R # +# # +# SOURCE # +# https://github.com/msberends/AMR # +# # +# LICENCE # +# (c) 2018-2022 Berends MS, Luz CF et al. # +# Developed at the University of Groningen, the Netherlands, in # +# collaboration with non-profit organisations Certe Medical # +# Diagnostics & Advice, and University Medical Center Groningen. # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# We created this package for both routine data analysis and academic # +# research and it was publicly released in the hope that it will be # +# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # +# # +# Visit our website for the full manual and a complete tutorial about # +# how to conduct AMR data analysis: https://msberends.github.io/AMR/ # +# ==================================================================== # + +#' Mean AMR Distance +#' +#' This function calculates a normalised mean for antimicrobial resistance between multiple observations. +#' @param x a vector of class [rsi][as.rsi()], [rsi][as.rsi()] or [rsi][as.rsi()], or a [data.frame] containing columns of any of these classes +#' @param ... variables to select (supports tidy selection such as `column1:column4` and [`where(is.mic)`][tidyselect::language]), and can thus also be [antibiotic selectors][ab_selector()] +#' @param combine_SI a [logical] to indicate whether all values of S and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant), defaults to `TRUE` +#' @details The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand. For common numeric data this distance is equal to [Z scores](https://en.wikipedia.org/wiki/Standard_score) (the number of standard deviations from the mean). +#' +#' MIC values (see [as.mic()]) are transformed with [log2()] first; their distance is calculated as `(log2(x) - mean(log2(x))) / sd(log2(x))`. +#' +#' R/SI values (see [as.rsi()]) are transformed using `"S"` = 1, `"I"` = 2, and `"R"` = 3. If `combine_SI` is `TRUE` (default), the `"I"` will be considered to be 1. +#' +#' For data sets, the mean AMR distance will be calculated per variable, after which the mean of all columns will returned per row (using [rowMeans()]), see *Examples*. +#' +#' Use [distance_from_row()] to subtract distances from the distance of one row, see *Examples*. +#' @section Interpretation: +#' Isolates with distances less than 0.01 difference from each other should be considered similar. Differences lower than 0.025 should be considered suspicious. +#' @export +#' @examples +#' x <- random_mic(10) +#' x +#' mean_amr_distance(x) +#' +#' y <- data.frame( +#' id = LETTERS[1:10], +#' amox = random_mic(10, ab = "amox", mo = "Escherichia coli"), +#' cipr = random_mic(10, ab = "cipr", mo = "Escherichia coli"), +#' gent = random_mic(10, ab = "gent", mo = "Escherichia coli"), +#' tobr = random_mic(10, ab = "tobr", mo = "Escherichia coli") +#' ) +#' y +#' mean_amr_distance(y) +#' y$amr_distance <- mean_amr_distance(y, where(is.mic)) +#' y[order(y$amr_distance), ] +#' +#' if (require("dplyr")) { +#' y %>% +#' mutate( +#' amr_distance = mean_amr_distance(., where(is.mic)), +#' check_id_C = distance_from_row(amr_distance, id == "C") +#' ) %>% +#' arrange(check_id_C) +#' } +#' if (require("dplyr")) { +#' # support for groups +#' example_isolates %>% +#' filter(mo_genus() == "Enterococcus" & mo_species() != "") %>% +#' select(mo, TCY, carbapenems()) %>% +#' group_by(mo) %>% +#' mutate(d = mean_amr_distance(., where(is.rsi))) %>% +#' arrange(mo, d) +#' } +mean_amr_distance <- function(x, ...) { + UseMethod("mean_amr_distance") +} + +#' @rdname mean_amr_distance +#' @export +mean_amr_distance.default <- function(x, ...) { + x <- as.double(x) + (x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE) +} + +#' @rdname mean_amr_distance +#' @export +mean_amr_distance.mic <- function(x, ...) { + mean_amr_distance(log2(x)) +} + +#' @rdname mean_amr_distance +#' @export +mean_amr_distance.disk <- function(x, ...) { + mean_amr_distance(as.double(x)) +} + +#' @rdname mean_amr_distance +#' @export +mean_amr_distance.rsi <- function(x, combine_SI = TRUE, ...) { + meet_criteria(combine_SI, allow_class = "logical", has_length = 1, .call_depth = -1) + if (isTRUE(combine_SI)) { + x[x == "I"] <- "S" + } + mean_amr_distance(as.double(x)) +} + +#' @rdname mean_amr_distance +#' @export +mean_amr_distance.data.frame <- function(x, ..., combine_SI = TRUE) { + meet_criteria(combine_SI, allow_class = "logical", has_length = 1, .call_depth = -1) + df <- x + if (is_null_or_grouped_tbl(df)) { + df <- get_current_data("x", -2) + } + if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) { + out <- tryCatch(suppressWarnings(c(...)), error = function(e) NULL) + if (!is.null(out)) { + df <- df[, out, drop = FALSE] + } else { + df <- pm_select(df, ...) + } + } + stop_if(ncol(df) < 2, + "data set must contain at least two variables", + call = -2 + ) + if (message_not_thrown_before("mean_amr_distance", "groups")) { + message_("Calculating mean AMR distance based on columns ", vector_and(colnames(df))) + } + res <- vapply( + FUN.VALUE = double(nrow(df)), + df, + mean_amr_distance, + combine_SI = combine_SI + ) + if (is.null(dim(res))) { + if (all(is.na(res))) { + return(NA_real_) + } else { + return(mean(res, na.rm = TRUE)) + } + } + res <- rowMeans(res, na.rm = TRUE) + res[is.infinite(res)] <- 0 + res +} + +#' @rdname mean_amr_distance +#' @param mean_distance the outcome of [mean_amr_distance()] +#' @param row an index, such as a row number +#' @export +distance_from_row <- function(mean_distance, row) { + meet_criteria(mean_distance, allow_class = c("double", "numeric"), is_finite = TRUE) + meet_criteria(row, allow_class = c("logical", "double", "numeric")) + if (is.logical(row)) { + row <- which(row) + } + abs(mean_distance[row] - mean_distance) +} diff --git a/R/vctrs.R b/R/vctrs.R index 07079208..4e4e5aef 100644 --- a/R/vctrs.R +++ b/R/vctrs.R @@ -84,3 +84,15 @@ vec_ptype2.disk.integer <- function(x, y, ...) { vec_cast.integer.disk <- function(x, to, ...) { unclass(x) } + +# S3: mic +vec_cast.character.mic <- function(x, to, ...) { + as.character(x) +} +vec_cast.double.mic <- function(x, to, ...) { + # this calls as.double.mic() + as.double(x) +} +vec_math.mic <- function(.fn, x, ...) { + .fn(as.double(x), ...) +} diff --git a/R/zzz.R b/R/zzz.R index 8dadb750..d7a5adc8 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -96,6 +96,9 @@ if (utf8_supported && !is_latex) { s3_register("vctrs::vec_ptype2", "disk.integer") s3_register("vctrs::vec_ptype2", "integer.disk") s3_register("vctrs::vec_cast", "integer.disk") + s3_register("vctrs::vec_cast", "character.mic") + s3_register("vctrs::vec_cast", "double.mic") + s3_register("vctrs::vec_math", "mic") # if mo source exists, fire it up (see mo_source()) try( diff --git a/inst/tinytest/test-mean_amr_distance.R b/inst/tinytest/test-mean_amr_distance.R new file mode 100644 index 00000000..c9ad6fa6 --- /dev/null +++ b/inst/tinytest/test-mean_amr_distance.R @@ -0,0 +1,59 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Data Analysis for R # +# # +# SOURCE # +# https://github.com/msberends/AMR # +# # +# LICENCE # +# (c) 2018-2022 Berends MS, Luz CF et al. # +# Developed at the University of Groningen, the Netherlands, in # +# collaboration with non-profit organisations Certe Medical # +# Diagnostics & Advice, and University Medical Center Groningen. # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# We created this package for both routine data analysis and academic # +# research and it was publicly released in the hope that it will be # +# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # +# # +# Visit our website for the full manual and a complete tutorial about # +# how to conduct AMR data analysis: https://msberends.github.io/AMR/ # +# ==================================================================== # + +vctr_disk <- as.disk(c(20:25)) +vctr_mic <- as.mic(2^c(0:5)) +vctr_rsi <- as.rsi(c("S", "S", "I", "I", "R", "R")) + +expect_identical( + mean_amr_distance(vctr_disk), + (as.double(vctr_disk) - mean(as.double(vctr_disk))) / sd(as.double(vctr_disk)) +) + +expect_identical( + mean_amr_distance(vctr_mic), + (log2(vctr_mic) - mean(log2(vctr_mic))) / sd(log2(vctr_mic)) +) + +expect_identical( + mean_amr_distance(vctr_rsi, combine_SI = FALSE), + (c(1, 1, 2, 2, 3, 3) - mean(c(1, 1, 2, 2, 3, 3))) / sd(c(1, 1, 2, 2, 3, 3)) +) +expect_identical( + mean_amr_distance(vctr_rsi, combine_SI = TRUE), + (c(1, 1, 1, 1, 3, 3) - mean(c(1, 1, 1, 1, 3, 3))) / sd(c(1, 1, 1, 1, 3, 3)) +) + +expect_equal( + mean_amr_distance(data.frame(vctr_mic, vctr_rsi, vctr_disk)), + c(-1.10603655, -0.74968823, -0.39333990, -0.03699158, 0.96485397, 1.32120229), + tolerance = 0.00001 +) + +expect_equal( + mean_amr_distance(data.frame(vctr_mic, vctr_rsi, vctr_disk), 2:3), + c(-0.9909017, -0.7236405, -0.4563792, -0.1891180, 1.0463891, 1.3136503), + tolerance = 0.00001 +) diff --git a/man/mean_amr_distance.Rd b/man/mean_amr_distance.Rd new file mode 100644 index 00000000..b7f7c196 --- /dev/null +++ b/man/mean_amr_distance.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mean_amr_distance.R +\name{mean_amr_distance} +\alias{mean_amr_distance} +\alias{mean_amr_distance.default} +\alias{mean_amr_distance.mic} +\alias{mean_amr_distance.disk} +\alias{mean_amr_distance.rsi} +\alias{mean_amr_distance.data.frame} +\alias{distance_from_row} +\title{Mean AMR Distance} +\usage{ +mean_amr_distance(x, ...) + +\method{mean_amr_distance}{default}(x, ...) + +\method{mean_amr_distance}{mic}(x, ...) + +\method{mean_amr_distance}{disk}(x, ...) + +\method{mean_amr_distance}{rsi}(x, combine_SI = TRUE, ...) + +\method{mean_amr_distance}{data.frame}(x, ..., combine_SI = TRUE) + +distance_from_row(mean_distance, row) +} +\arguments{ +\item{x}{a vector of class \link[=as.rsi]{rsi}, \link[=as.rsi]{rsi} or \link[=as.rsi]{rsi}, or a \link{data.frame} containing columns of any of these classes} + +\item{...}{variables to select (supports tidy selection such as \code{column1:column4} and \code{\link[tidyselect:language]{where(is.mic)}}), and can thus also be \link[=ab_selector]{antibiotic selectors}} + +\item{combine_SI}{a \link{logical} to indicate whether all values of S and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant), defaults to \code{TRUE}} + +\item{mean_distance}{the outcome of \code{\link[=mean_amr_distance]{mean_amr_distance()}}} + +\item{row}{an index, such as a row number} +} +\description{ +This function calculates a normalised mean for antimicrobial resistance between multiple observations. +} +\details{ +The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand. For common numeric data this distance is equal to \href{https://en.wikipedia.org/wiki/Standard_score}{Z scores} (the number of standard deviations from the mean). + +MIC values (see \code{\link[=as.mic]{as.mic()}}) are transformed with \code{\link[=log2]{log2()}} first; their distance is calculated as \code{(log2(x) - mean(log2(x))) / sd(log2(x))}. + +R/SI values (see \code{\link[=as.rsi]{as.rsi()}}) are transformed using \code{"S"} = 1, \code{"I"} = 2, and \code{"R"} = 3. If \code{combine_SI} is \code{TRUE} (default), the \code{"I"} will be considered to be 1. + +For data sets, the mean AMR distance will be calculated per variable, after which the mean of all columns will returned per row (using \code{\link[=rowMeans]{rowMeans()}}), see \emph{Examples}. + +Use \code{\link[=distance_from_row]{distance_from_row()}} to subtract distances from the distance of one row, see \emph{Examples}. +} +\section{Interpretation}{ + +Isolates with distances less than 0.01 difference from each other should be considered similar. Differences lower than 0.025 should be considered suspicious. +} + +\examples{ +x <- random_mic(10) +x +mean_amr_distance(x) + +y <- data.frame(id = LETTERS[1:10], + amox = random_mic(10, ab = "amox", mo = "Escherichia coli"), + cipr = random_mic(10, ab = "cipr", mo = "Escherichia coli"), + gent = random_mic(10, ab = "gent", mo = "Escherichia coli"), + tobr = random_mic(10, ab = "tobr", mo = "Escherichia coli")) +y +mean_amr_distance(y) +y$amr_distance <- mean_amr_distance(y, where(is.mic)) +y[order(y$amr_distance), ] + +if (require("dplyr")) { + y \%>\% + mutate(amr_distance = mean_amr_distance(., where(is.mic)), + check_id_C = distance_from_row(amr_distance, id == "C")) \%>\% + arrange(check_id_C) +} +if (require("dplyr")) { + # support for groups + example_isolates \%>\% + filter(mo_genus() == "Enterococcus" & mo_species() != "") \%>\% + select(mo, TCY, carbapenems()) \%>\% + group_by(mo) \%>\% + mutate(d = mean_amr_distance(., where(is.rsi))) \%>\% + arrange(mo, d) +} +}