new AMR distance function

2026-02-06 23:52:52 +01:00 · 2022-08-30 21:48:02 +02:00
parent fbd5d32541
commit 3fca703fbf
10 changed files with 343 additions and 6 deletions
--- a/.github/workflows/check.yaml
+++ b/.github/workflows/check.yaml
@@ -156,4 +156,4 @@ jobs:
        uses: actions/upload-artifact@v2
        with:
          name: artifacts-${{ matrix.config.os }}-r${{ matrix.config.r }}
-          path: ${{ github.workspace }}/AMR.Rcheck
+          path: /home/runner/work/AMR.Rcheck
--- a/6
+++ b/6
@@ -1,6 +1,6 @@
 Package: AMR
-Version: 1.8.1.9046
-Date: 2022-08-29
+Version: 1.8.1.9047
+Date: 2022-08-30
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
    data analysis and to work with microbial and antimicrobial properties by
@@ -13,7 +13,7 @@ Authors@R: c(
 person(family = "Hassing",     c("Erwin", "E.", "A."), role = c("aut", "ctb")),
 person(family = "Albers",      c("Casper", "J."), role = "ths", comment = c(ORCID = "0000-0002-9213-6743")),
 person(family = "Dutey-Magni", c("Peter"), role = "ctb", comment = c(ORCID = "0000-0002-8942-9836")),
- person(family = "Fonville",    c("Judith", "M"), role = "ctb"),
+ person(family = "Fonville",    c("Judith", "M."), role = "ctb"),
 person(family = "Friedrich",   c("Alex", "W."), role = "ths", comment = c(ORCID = "0000-0003-4881-038X")),
 person(family = "Glasner",     c("Corinna"), role = "ths", comment = c(ORCID = "0000-0003-1241-1328")),
 person(family = "Hazenberg",   c("Eric", "H.", "L.", "C.", "M."), role = "ctb"),
--- a/7
+++ b/7
@@ -101,6 +101,11 @@ S3method(log1p,mic)
 S3method(log2,mic)
 S3method(max,mic)
 S3method(mean,mic)
+S3method(mean_amr_distance,data.frame)
+S3method(mean_amr_distance,default)
+S3method(mean_amr_distance,disk)
+S3method(mean_amr_distance,mic)
+S3method(mean_amr_distance,rsi)
 S3method(median,mic)
 S3method(min,mic)
 S3method(plot,disk)
@@ -220,6 +225,7 @@ export(count_resistant)
 export(count_susceptible)
 export(custom_eucast_rules)
 export(custom_mdro_guideline)
+export(distance_from_row)
 export(eucast_dosage)
 export(eucast_exceptional_phenotypes)
 export(eucast_rules)
@@ -259,6 +265,7 @@ export(macrolides)
 export(mdr_cmi2012)
 export(mdr_tb)
 export(mdro)
+export(mean_amr_distance)
 export(mo_authors)
 export(mo_class)
 export(mo_domain)
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,7 +1,8 @@
-# AMR 1.8.1.9046
+# AMR 1.8.1.9047

 ### New
 * EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations.
+* Function to calculate the mean AMR distance: `mean_amr_distance()`. The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand.
 * Support for `data.frame`-enhancing R packages, more specifically: `data.table`, `tibble`, and `tsibble`. AMR package functions that have a data set as output (such as `rsi_df()` and `bug_drug_combinations()`), will now return the same data type as the input. Furthermore, all our data sets are now in `tibble` format.
 * Our data sets are now also continually exported to Apache Feather and Apache Parquet formats. You can find more info [in this article on our website](https://msberends.github.io/AMR/articles/datasets.html).
 * Support for the following languages: Chinese, Greek, Japanese, Polish, Turkish and Ukrainian. We are very grateful for the valuable input by our colleagues from other countries. The `AMR` package is now available in 16 languages.
--- a/R/ab_property.R
+++ b/R/ab_property.R
@@ -374,7 +374,12 @@ set_ab_names <- function(data, ..., property = "name", language = get_AMR_locale

  if (is.data.frame(data)) {
    if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) {
-      df <- pm_select(data, ...)
+      out <- tryCatch(suppressWarnings(c(...)), error = function(e) NULL)
+      if (!is.null(out)) {
+        df <- data[, out, drop = FALSE]
+      } else {
+        df <- pm_select(data, ...)
+      }
    } else {
      df <- data
    }
--- a/R/mean_amr_distance.R
+++ b/R/mean_amr_distance.R
@@ -0,0 +1,163 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Data Analysis for R                   #
+#                                                                      #
+# SOURCE                                                               #
+# https://github.com/msberends/AMR                                     #
+#                                                                      #
+# LICENCE                                                              #
+# (c) 2018-2022 Berends MS, Luz CF et al.                              #
+# Developed at the University of Groningen, the Netherlands, in        #
+# collaboration with non-profit organisations Certe Medical            #
+# Diagnostics & Advice, and University Medical Center Groningen.       #
+#                                                                      #
+# This R package is free software; you can freely use and distribute   #
+# it for both personal and commercial purposes under the terms of the  #
+# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
+# the Free Software Foundation.                                        #
+# We created this package for both routine data analysis and academic  #
+# research and it was publicly released in the hope that it will be    #
+# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
+#                                                                      #
+# Visit our website for the full manual and a complete tutorial about  #
+# how to conduct AMR data analysis: https://msberends.github.io/AMR/   #
+# ==================================================================== #
+
+#' Mean AMR Distance
+#'
+#' This function calculates a normalised mean for antimicrobial resistance between multiple observations.
+#' @param x a vector of class [rsi][as.rsi()], [rsi][as.rsi()] or [rsi][as.rsi()], or a [data.frame] containing columns of any of these classes
+#' @param ... variables to select (supports tidy selection such as `column1:column4` and [`where(is.mic)`][tidyselect::language]), and can thus also be [antibiotic selectors][ab_selector()]
+#' @param combine_SI 	a [logical] to indicate whether all values of S and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant), defaults to `TRUE`
+#' @details The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand. For common numeric data this distance is equal to [Z scores](https://en.wikipedia.org/wiki/Standard_score) (the number of standard deviations from the mean).
+#'
+#' MIC values (see [as.mic()]) are transformed with [log2()] first; their distance is calculated as `(log2(x) - mean(log2(x))) / sd(log2(x))`.
+#'
+#' R/SI values (see [as.rsi()]) are transformed using `"S"` = 1, `"I"` = 2, and `"R"` = 3. If `combine_SI` is `TRUE` (default), the `"I"` will be considered to be 1.
+#'
+#' For data sets, the mean AMR distance will be calculated per variable, after which the mean of all columns will returned per row (using [rowMeans()]), see *Examples*.
+#'
+#' Use [distance_from_row()] to subtract distances from the distance of one row, see *Examples*.
+#' @section Interpretation:
+#' Isolates with distances less than 0.01 difference from each other should be considered similar. Differences lower than 0.025 should be considered suspicious.
+#' @export
+#' @examples
+#' x <- random_mic(10)
+#' x
+#' mean_amr_distance(x)
+#'
+#' y <- data.frame(
+#'   id = LETTERS[1:10],
+#'   amox = random_mic(10, ab = "amox", mo = "Escherichia coli"),
+#'   cipr = random_mic(10, ab = "cipr", mo = "Escherichia coli"),
+#'   gent = random_mic(10, ab = "gent", mo = "Escherichia coli"),
+#'   tobr = random_mic(10, ab = "tobr", mo = "Escherichia coli")
+#' )
+#' y
+#' mean_amr_distance(y)
+#' y$amr_distance <- mean_amr_distance(y, where(is.mic))
+#' y[order(y$amr_distance), ]
+#'
+#' if (require("dplyr")) {
+#'   y %>%
+#'     mutate(
+#'       amr_distance = mean_amr_distance(., where(is.mic)),
+#'       check_id_C = distance_from_row(amr_distance, id == "C")
+#'     ) %>%
+#'     arrange(check_id_C)
+#' }
+#' if (require("dplyr")) {
+#'   # support for groups
+#'   example_isolates %>%
+#'     filter(mo_genus() == "Enterococcus" & mo_species() != "") %>%
+#'     select(mo, TCY, carbapenems()) %>%
+#'     group_by(mo) %>%
+#'     mutate(d = mean_amr_distance(., where(is.rsi))) %>%
+#'     arrange(mo, d)
+#' }
+mean_amr_distance <- function(x, ...) {
+  UseMethod("mean_amr_distance")
+}
+
+#' @rdname mean_amr_distance
+#' @export
+mean_amr_distance.default <- function(x, ...) {
+  x <- as.double(x)
+  (x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE)
+}
+
+#' @rdname mean_amr_distance
+#' @export
+mean_amr_distance.mic <- function(x, ...) {
+  mean_amr_distance(log2(x))
+}
+
+#' @rdname mean_amr_distance
+#' @export
+mean_amr_distance.disk <- function(x, ...) {
+  mean_amr_distance(as.double(x))
+}
+
+#' @rdname mean_amr_distance
+#' @export
+mean_amr_distance.rsi <- function(x, combine_SI = TRUE, ...) {
+  meet_criteria(combine_SI, allow_class = "logical", has_length = 1, .call_depth = -1)
+  if (isTRUE(combine_SI)) {
+    x[x == "I"] <- "S"
+  }
+  mean_amr_distance(as.double(x))
+}
+
+#' @rdname mean_amr_distance
+#' @export
+mean_amr_distance.data.frame <- function(x, ..., combine_SI = TRUE) {
+  meet_criteria(combine_SI, allow_class = "logical", has_length = 1, .call_depth = -1)
+  df <- x
+  if (is_null_or_grouped_tbl(df)) {
+    df <- get_current_data("x", -2)
+  }
+  if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) {
+    out <- tryCatch(suppressWarnings(c(...)), error = function(e) NULL)
+    if (!is.null(out)) {
+      df <- df[, out, drop = FALSE]
+    } else {
+      df <- pm_select(df, ...)
+    }
+  }
+  stop_if(ncol(df) < 2,
+    "data set must contain at least two variables",
+    call = -2
+  )
+  if (message_not_thrown_before("mean_amr_distance", "groups")) {
+    message_("Calculating mean AMR distance based on columns ", vector_and(colnames(df)))
+  }
+  res <- vapply(
+    FUN.VALUE = double(nrow(df)),
+    df,
+    mean_amr_distance,
+    combine_SI = combine_SI
+  )
+  if (is.null(dim(res))) {
+    if (all(is.na(res))) {
+      return(NA_real_)
+    } else {
+      return(mean(res, na.rm = TRUE))
+    }
+  }
+  res <- rowMeans(res, na.rm = TRUE)
+  res[is.infinite(res)] <- 0
+  res
+}
+
+#' @rdname mean_amr_distance
+#' @param mean_distance the outcome of [mean_amr_distance()]
+#' @param row an index, such as a row number
+#' @export
+distance_from_row <- function(mean_distance, row) {
+  meet_criteria(mean_distance, allow_class = c("double", "numeric"), is_finite = TRUE)
+  meet_criteria(row, allow_class = c("logical", "double", "numeric"))
+  if (is.logical(row)) {
+    row <- which(row)
+  }
+  abs(mean_distance[row] - mean_distance)
+}
--- a/R/vctrs.R
+++ b/R/vctrs.R
@@ -84,3 +84,15 @@ vec_ptype2.disk.integer <- function(x, y, ...) {
 vec_cast.integer.disk <- function(x, to, ...) {
  unclass(x)
 }
+
+# S3: mic
+vec_cast.character.mic <- function(x, to, ...) {
+  as.character(x)
+}
+vec_cast.double.mic <- function(x, to, ...) {
+  # this calls as.double.mic()
+  as.double(x)
+}
+vec_math.mic <- function(.fn, x, ...) {
+  .fn(as.double(x), ...)
+}
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -96,6 +96,9 @@ if (utf8_supported && !is_latex) {
  s3_register("vctrs::vec_ptype2", "disk.integer")
  s3_register("vctrs::vec_ptype2", "integer.disk")
  s3_register("vctrs::vec_cast", "integer.disk")
+  s3_register("vctrs::vec_cast", "character.mic")
+  s3_register("vctrs::vec_cast", "double.mic")
+  s3_register("vctrs::vec_math", "mic")

  # if mo source exists, fire it up (see mo_source())
  try(
--- a/inst/tinytest/test-mean_amr_distance.R
+++ b/inst/tinytest/test-mean_amr_distance.R
@@ -0,0 +1,59 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Data Analysis for R                   #
+#                                                                      #
+# SOURCE                                                               #
+# https://github.com/msberends/AMR                                     #
+#                                                                      #
+# LICENCE                                                              #
+# (c) 2018-2022 Berends MS, Luz CF et al.                              #
+# Developed at the University of Groningen, the Netherlands, in        #
+# collaboration with non-profit organisations Certe Medical            #
+# Diagnostics & Advice, and University Medical Center Groningen.       #
+#                                                                      #
+# This R package is free software; you can freely use and distribute   #
+# it for both personal and commercial purposes under the terms of the  #
+# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
+# the Free Software Foundation.                                        #
+# We created this package for both routine data analysis and academic  #
+# research and it was publicly released in the hope that it will be    #
+# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
+#                                                                      #
+# Visit our website for the full manual and a complete tutorial about  #
+# how to conduct AMR data analysis: https://msberends.github.io/AMR/   #
+# ==================================================================== #
+
+vctr_disk <- as.disk(c(20:25))
+vctr_mic <- as.mic(2^c(0:5))
+vctr_rsi <- as.rsi(c("S", "S", "I", "I", "R", "R"))
+
+expect_identical(
+  mean_amr_distance(vctr_disk),
+  (as.double(vctr_disk) - mean(as.double(vctr_disk))) / sd(as.double(vctr_disk))
+)
+
+expect_identical(
+  mean_amr_distance(vctr_mic),
+  (log2(vctr_mic) - mean(log2(vctr_mic))) / sd(log2(vctr_mic))
+)
+
+expect_identical(
+  mean_amr_distance(vctr_rsi, combine_SI = FALSE),
+  (c(1, 1, 2, 2, 3, 3) - mean(c(1, 1, 2, 2, 3, 3))) / sd(c(1, 1, 2, 2, 3, 3))
+)
+expect_identical(
+  mean_amr_distance(vctr_rsi, combine_SI = TRUE),
+  (c(1, 1, 1, 1, 3, 3) - mean(c(1, 1, 1, 1, 3, 3))) / sd(c(1, 1, 1, 1, 3, 3))
+)
+
+expect_equal(
+  mean_amr_distance(data.frame(vctr_mic, vctr_rsi, vctr_disk)),
+  c(-1.10603655, -0.74968823, -0.39333990, -0.03699158, 0.96485397, 1.32120229),
+  tolerance = 0.00001
+)
+
+expect_equal(
+  mean_amr_distance(data.frame(vctr_mic, vctr_rsi, vctr_disk), 2:3),
+  c(-0.9909017, -0.7236405, -0.4563792, -0.1891180, 1.0463891, 1.3136503),
+  tolerance = 0.00001
+)
--- a/man/mean_amr_distance.Rd
+++ b/man/mean_amr_distance.Rd
@@ -0,0 +1,87 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mean_amr_distance.R
+\name{mean_amr_distance}
+\alias{mean_amr_distance}
+\alias{mean_amr_distance.default}
+\alias{mean_amr_distance.mic}
+\alias{mean_amr_distance.disk}
+\alias{mean_amr_distance.rsi}
+\alias{mean_amr_distance.data.frame}
+\alias{distance_from_row}
+\title{Mean AMR Distance}
+\usage{
+mean_amr_distance(x, ...)
+
+\method{mean_amr_distance}{default}(x, ...)
+
+\method{mean_amr_distance}{mic}(x, ...)
+
+\method{mean_amr_distance}{disk}(x, ...)
+
+\method{mean_amr_distance}{rsi}(x, combine_SI = TRUE, ...)
+
+\method{mean_amr_distance}{data.frame}(x, ..., combine_SI = TRUE)
+
+distance_from_row(mean_distance, row)
+}
+\arguments{
+\item{x}{a vector of class \link[=as.rsi]{rsi}, \link[=as.rsi]{rsi} or \link[=as.rsi]{rsi}, or a \link{data.frame} containing columns of any of these classes}
+
+\item{...}{variables to select (supports tidy selection such as \code{column1:column4} and \code{\link[tidyselect:language]{where(is.mic)}}), and can thus also be \link[=ab_selector]{antibiotic selectors}}
+
+\item{combine_SI}{a \link{logical} to indicate whether all values of S and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant), defaults to \code{TRUE}}
+
+\item{mean_distance}{the outcome of \code{\link[=mean_amr_distance]{mean_amr_distance()}}}
+
+\item{row}{an index, such as a row number}
+}
+\description{
+This function calculates a normalised mean for antimicrobial resistance between multiple observations.
+}
+\details{
+The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand. For common numeric data this distance is equal to \href{https://en.wikipedia.org/wiki/Standard_score}{Z scores} (the number of standard deviations from the mean).
+
+MIC values (see \code{\link[=as.mic]{as.mic()}}) are transformed with \code{\link[=log2]{log2()}} first; their distance is calculated as \code{(log2(x) - mean(log2(x))) / sd(log2(x))}.
+
+R/SI values (see \code{\link[=as.rsi]{as.rsi()}}) are transformed using \code{"S"} = 1, \code{"I"} = 2, and \code{"R"} = 3. If \code{combine_SI} is \code{TRUE} (default), the \code{"I"} will be considered to be 1.
+
+For data sets, the mean AMR distance will be calculated per variable, after which the mean of all columns will returned per row (using \code{\link[=rowMeans]{rowMeans()}}), see \emph{Examples}.
+
+Use \code{\link[=distance_from_row]{distance_from_row()}} to subtract distances from the distance of one row, see \emph{Examples}.
+}
+\section{Interpretation}{
+
+Isolates with distances less than 0.01 difference from each other should be considered similar. Differences lower than 0.025 should be considered suspicious.
+}
+
+\examples{
+x <- random_mic(10)
+x
+mean_amr_distance(x)
+
+y <- data.frame(id = LETTERS[1:10],
+                amox = random_mic(10, ab = "amox", mo = "Escherichia coli"),
+                cipr = random_mic(10, ab = "cipr", mo = "Escherichia coli"),
+                gent = random_mic(10, ab = "gent", mo = "Escherichia coli"),
+                tobr = random_mic(10, ab = "tobr", mo = "Escherichia coli"))
+y
+mean_amr_distance(y)
+y$amr_distance <- mean_amr_distance(y, where(is.mic))
+y[order(y$amr_distance), ]
+
+if (require("dplyr")) {
+  y \%>\% 
+    mutate(amr_distance = mean_amr_distance(., where(is.mic)),
+           check_id_C = distance_from_row(amr_distance, id == "C")) \%>\% 
+    arrange(check_id_C)
+}
+if (require("dplyr")) {
+  # support for groups
+  example_isolates \%>\%
+    filter(mo_genus() == "Enterococcus" & mo_species() != "") \%>\%
+    select(mo, TCY, carbapenems()) \%>\%
+    group_by(mo) \%>\%
+    mutate(d = mean_amr_distance(., where(is.rsi))) \%>\%
+    arrange(mo, d)
+}
+}