mirror of
https://github.com/msberends/AMR.git
synced 2024-12-24 18:46:14 +01:00
new AMR distance function
This commit is contained in:
parent
fbd5d32541
commit
3fca703fbf
2
.github/workflows/check.yaml
vendored
2
.github/workflows/check.yaml
vendored
@ -156,4 +156,4 @@ jobs:
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: artifacts-${{ matrix.config.os }}-r${{ matrix.config.r }}
|
||||
path: ${{ github.workspace }}/AMR.Rcheck
|
||||
path: /home/runner/work/AMR.Rcheck
|
||||
|
@ -1,6 +1,6 @@
|
||||
Package: AMR
|
||||
Version: 1.8.1.9046
|
||||
Date: 2022-08-29
|
||||
Version: 1.8.1.9047
|
||||
Date: 2022-08-30
|
||||
Title: Antimicrobial Resistance Data Analysis
|
||||
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
||||
data analysis and to work with microbial and antimicrobial properties by
|
||||
@ -13,7 +13,7 @@ Authors@R: c(
|
||||
person(family = "Hassing", c("Erwin", "E.", "A."), role = c("aut", "ctb")),
|
||||
person(family = "Albers", c("Casper", "J."), role = "ths", comment = c(ORCID = "0000-0002-9213-6743")),
|
||||
person(family = "Dutey-Magni", c("Peter"), role = "ctb", comment = c(ORCID = "0000-0002-8942-9836")),
|
||||
person(family = "Fonville", c("Judith", "M"), role = "ctb"),
|
||||
person(family = "Fonville", c("Judith", "M."), role = "ctb"),
|
||||
person(family = "Friedrich", c("Alex", "W."), role = "ths", comment = c(ORCID = "0000-0003-4881-038X")),
|
||||
person(family = "Glasner", c("Corinna"), role = "ths", comment = c(ORCID = "0000-0003-1241-1328")),
|
||||
person(family = "Hazenberg", c("Eric", "H.", "L.", "C.", "M."), role = "ctb"),
|
||||
|
@ -101,6 +101,11 @@ S3method(log1p,mic)
|
||||
S3method(log2,mic)
|
||||
S3method(max,mic)
|
||||
S3method(mean,mic)
|
||||
S3method(mean_amr_distance,data.frame)
|
||||
S3method(mean_amr_distance,default)
|
||||
S3method(mean_amr_distance,disk)
|
||||
S3method(mean_amr_distance,mic)
|
||||
S3method(mean_amr_distance,rsi)
|
||||
S3method(median,mic)
|
||||
S3method(min,mic)
|
||||
S3method(plot,disk)
|
||||
@ -220,6 +225,7 @@ export(count_resistant)
|
||||
export(count_susceptible)
|
||||
export(custom_eucast_rules)
|
||||
export(custom_mdro_guideline)
|
||||
export(distance_from_row)
|
||||
export(eucast_dosage)
|
||||
export(eucast_exceptional_phenotypes)
|
||||
export(eucast_rules)
|
||||
@ -259,6 +265,7 @@ export(macrolides)
|
||||
export(mdr_cmi2012)
|
||||
export(mdr_tb)
|
||||
export(mdro)
|
||||
export(mean_amr_distance)
|
||||
export(mo_authors)
|
||||
export(mo_class)
|
||||
export(mo_domain)
|
||||
|
3
NEWS.md
3
NEWS.md
@ -1,7 +1,8 @@
|
||||
# AMR 1.8.1.9046
|
||||
# AMR 1.8.1.9047
|
||||
|
||||
### New
|
||||
* EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations.
|
||||
* Function to calculate the mean AMR distance: `mean_amr_distance()`. The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand.
|
||||
* Support for `data.frame`-enhancing R packages, more specifically: `data.table`, `tibble`, and `tsibble`. AMR package functions that have a data set as output (such as `rsi_df()` and `bug_drug_combinations()`), will now return the same data type as the input. Furthermore, all our data sets are now in `tibble` format.
|
||||
* Our data sets are now also continually exported to Apache Feather and Apache Parquet formats. You can find more info [in this article on our website](https://msberends.github.io/AMR/articles/datasets.html).
|
||||
* Support for the following languages: Chinese, Greek, Japanese, Polish, Turkish and Ukrainian. We are very grateful for the valuable input by our colleagues from other countries. The `AMR` package is now available in 16 languages.
|
||||
|
@ -374,7 +374,12 @@ set_ab_names <- function(data, ..., property = "name", language = get_AMR_locale
|
||||
|
||||
if (is.data.frame(data)) {
|
||||
if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) {
|
||||
df <- pm_select(data, ...)
|
||||
out <- tryCatch(suppressWarnings(c(...)), error = function(e) NULL)
|
||||
if (!is.null(out)) {
|
||||
df <- data[, out, drop = FALSE]
|
||||
} else {
|
||||
df <- pm_select(data, ...)
|
||||
}
|
||||
} else {
|
||||
df <- data
|
||||
}
|
||||
|
163
R/mean_amr_distance.R
Normal file
163
R/mean_amr_distance.R
Normal file
@ -0,0 +1,163 @@
|
||||
# ==================================================================== #
|
||||
# TITLE #
|
||||
# Antimicrobial Resistance (AMR) Data Analysis for R #
|
||||
# #
|
||||
# SOURCE #
|
||||
# https://github.com/msberends/AMR #
|
||||
# #
|
||||
# LICENCE #
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
||||
# the Free Software Foundation. #
|
||||
# We created this package for both routine data analysis and academic #
|
||||
# research and it was publicly released in the hope that it will be #
|
||||
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
||||
# #
|
||||
# Visit our website for the full manual and a complete tutorial about #
|
||||
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
||||
# ==================================================================== #
|
||||
|
||||
#' Mean AMR Distance
|
||||
#'
|
||||
#' This function calculates a normalised mean for antimicrobial resistance between multiple observations.
|
||||
#' @param x a vector of class [rsi][as.rsi()], [rsi][as.rsi()] or [rsi][as.rsi()], or a [data.frame] containing columns of any of these classes
|
||||
#' @param ... variables to select (supports tidy selection such as `column1:column4` and [`where(is.mic)`][tidyselect::language]), and can thus also be [antibiotic selectors][ab_selector()]
|
||||
#' @param combine_SI a [logical] to indicate whether all values of S and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant), defaults to `TRUE`
|
||||
#' @details The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand. For common numeric data this distance is equal to [Z scores](https://en.wikipedia.org/wiki/Standard_score) (the number of standard deviations from the mean).
|
||||
#'
|
||||
#' MIC values (see [as.mic()]) are transformed with [log2()] first; their distance is calculated as `(log2(x) - mean(log2(x))) / sd(log2(x))`.
|
||||
#'
|
||||
#' R/SI values (see [as.rsi()]) are transformed using `"S"` = 1, `"I"` = 2, and `"R"` = 3. If `combine_SI` is `TRUE` (default), the `"I"` will be considered to be 1.
|
||||
#'
|
||||
#' For data sets, the mean AMR distance will be calculated per variable, after which the mean of all columns will returned per row (using [rowMeans()]), see *Examples*.
|
||||
#'
|
||||
#' Use [distance_from_row()] to subtract distances from the distance of one row, see *Examples*.
|
||||
#' @section Interpretation:
|
||||
#' Isolates with distances less than 0.01 difference from each other should be considered similar. Differences lower than 0.025 should be considered suspicious.
|
||||
#' @export
|
||||
#' @examples
|
||||
#' x <- random_mic(10)
|
||||
#' x
|
||||
#' mean_amr_distance(x)
|
||||
#'
|
||||
#' y <- data.frame(
|
||||
#' id = LETTERS[1:10],
|
||||
#' amox = random_mic(10, ab = "amox", mo = "Escherichia coli"),
|
||||
#' cipr = random_mic(10, ab = "cipr", mo = "Escherichia coli"),
|
||||
#' gent = random_mic(10, ab = "gent", mo = "Escherichia coli"),
|
||||
#' tobr = random_mic(10, ab = "tobr", mo = "Escherichia coli")
|
||||
#' )
|
||||
#' y
|
||||
#' mean_amr_distance(y)
|
||||
#' y$amr_distance <- mean_amr_distance(y, where(is.mic))
|
||||
#' y[order(y$amr_distance), ]
|
||||
#'
|
||||
#' if (require("dplyr")) {
|
||||
#' y %>%
|
||||
#' mutate(
|
||||
#' amr_distance = mean_amr_distance(., where(is.mic)),
|
||||
#' check_id_C = distance_from_row(amr_distance, id == "C")
|
||||
#' ) %>%
|
||||
#' arrange(check_id_C)
|
||||
#' }
|
||||
#' if (require("dplyr")) {
|
||||
#' # support for groups
|
||||
#' example_isolates %>%
|
||||
#' filter(mo_genus() == "Enterococcus" & mo_species() != "") %>%
|
||||
#' select(mo, TCY, carbapenems()) %>%
|
||||
#' group_by(mo) %>%
|
||||
#' mutate(d = mean_amr_distance(., where(is.rsi))) %>%
|
||||
#' arrange(mo, d)
|
||||
#' }
|
||||
mean_amr_distance <- function(x, ...) {
|
||||
UseMethod("mean_amr_distance")
|
||||
}
|
||||
|
||||
#' @rdname mean_amr_distance
|
||||
#' @export
|
||||
mean_amr_distance.default <- function(x, ...) {
|
||||
x <- as.double(x)
|
||||
(x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE)
|
||||
}
|
||||
|
||||
#' @rdname mean_amr_distance
|
||||
#' @export
|
||||
mean_amr_distance.mic <- function(x, ...) {
|
||||
mean_amr_distance(log2(x))
|
||||
}
|
||||
|
||||
#' @rdname mean_amr_distance
|
||||
#' @export
|
||||
mean_amr_distance.disk <- function(x, ...) {
|
||||
mean_amr_distance(as.double(x))
|
||||
}
|
||||
|
||||
#' @rdname mean_amr_distance
|
||||
#' @export
|
||||
mean_amr_distance.rsi <- function(x, combine_SI = TRUE, ...) {
|
||||
meet_criteria(combine_SI, allow_class = "logical", has_length = 1, .call_depth = -1)
|
||||
if (isTRUE(combine_SI)) {
|
||||
x[x == "I"] <- "S"
|
||||
}
|
||||
mean_amr_distance(as.double(x))
|
||||
}
|
||||
|
||||
#' @rdname mean_amr_distance
|
||||
#' @export
|
||||
mean_amr_distance.data.frame <- function(x, ..., combine_SI = TRUE) {
|
||||
meet_criteria(combine_SI, allow_class = "logical", has_length = 1, .call_depth = -1)
|
||||
df <- x
|
||||
if (is_null_or_grouped_tbl(df)) {
|
||||
df <- get_current_data("x", -2)
|
||||
}
|
||||
if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) {
|
||||
out <- tryCatch(suppressWarnings(c(...)), error = function(e) NULL)
|
||||
if (!is.null(out)) {
|
||||
df <- df[, out, drop = FALSE]
|
||||
} else {
|
||||
df <- pm_select(df, ...)
|
||||
}
|
||||
}
|
||||
stop_if(ncol(df) < 2,
|
||||
"data set must contain at least two variables",
|
||||
call = -2
|
||||
)
|
||||
if (message_not_thrown_before("mean_amr_distance", "groups")) {
|
||||
message_("Calculating mean AMR distance based on columns ", vector_and(colnames(df)))
|
||||
}
|
||||
res <- vapply(
|
||||
FUN.VALUE = double(nrow(df)),
|
||||
df,
|
||||
mean_amr_distance,
|
||||
combine_SI = combine_SI
|
||||
)
|
||||
if (is.null(dim(res))) {
|
||||
if (all(is.na(res))) {
|
||||
return(NA_real_)
|
||||
} else {
|
||||
return(mean(res, na.rm = TRUE))
|
||||
}
|
||||
}
|
||||
res <- rowMeans(res, na.rm = TRUE)
|
||||
res[is.infinite(res)] <- 0
|
||||
res
|
||||
}
|
||||
|
||||
#' @rdname mean_amr_distance
|
||||
#' @param mean_distance the outcome of [mean_amr_distance()]
|
||||
#' @param row an index, such as a row number
|
||||
#' @export
|
||||
distance_from_row <- function(mean_distance, row) {
|
||||
meet_criteria(mean_distance, allow_class = c("double", "numeric"), is_finite = TRUE)
|
||||
meet_criteria(row, allow_class = c("logical", "double", "numeric"))
|
||||
if (is.logical(row)) {
|
||||
row <- which(row)
|
||||
}
|
||||
abs(mean_distance[row] - mean_distance)
|
||||
}
|
12
R/vctrs.R
12
R/vctrs.R
@ -84,3 +84,15 @@ vec_ptype2.disk.integer <- function(x, y, ...) {
|
||||
vec_cast.integer.disk <- function(x, to, ...) {
|
||||
unclass(x)
|
||||
}
|
||||
|
||||
# S3: mic
|
||||
vec_cast.character.mic <- function(x, to, ...) {
|
||||
as.character(x)
|
||||
}
|
||||
vec_cast.double.mic <- function(x, to, ...) {
|
||||
# this calls as.double.mic()
|
||||
as.double(x)
|
||||
}
|
||||
vec_math.mic <- function(.fn, x, ...) {
|
||||
.fn(as.double(x), ...)
|
||||
}
|
||||
|
3
R/zzz.R
3
R/zzz.R
@ -96,6 +96,9 @@ if (utf8_supported && !is_latex) {
|
||||
s3_register("vctrs::vec_ptype2", "disk.integer")
|
||||
s3_register("vctrs::vec_ptype2", "integer.disk")
|
||||
s3_register("vctrs::vec_cast", "integer.disk")
|
||||
s3_register("vctrs::vec_cast", "character.mic")
|
||||
s3_register("vctrs::vec_cast", "double.mic")
|
||||
s3_register("vctrs::vec_math", "mic")
|
||||
|
||||
# if mo source exists, fire it up (see mo_source())
|
||||
try(
|
||||
|
59
inst/tinytest/test-mean_amr_distance.R
Normal file
59
inst/tinytest/test-mean_amr_distance.R
Normal file
@ -0,0 +1,59 @@
|
||||
# ==================================================================== #
|
||||
# TITLE #
|
||||
# Antimicrobial Resistance (AMR) Data Analysis for R #
|
||||
# #
|
||||
# SOURCE #
|
||||
# https://github.com/msberends/AMR #
|
||||
# #
|
||||
# LICENCE #
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
||||
# the Free Software Foundation. #
|
||||
# We created this package for both routine data analysis and academic #
|
||||
# research and it was publicly released in the hope that it will be #
|
||||
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
||||
# #
|
||||
# Visit our website for the full manual and a complete tutorial about #
|
||||
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
||||
# ==================================================================== #
|
||||
|
||||
vctr_disk <- as.disk(c(20:25))
|
||||
vctr_mic <- as.mic(2^c(0:5))
|
||||
vctr_rsi <- as.rsi(c("S", "S", "I", "I", "R", "R"))
|
||||
|
||||
expect_identical(
|
||||
mean_amr_distance(vctr_disk),
|
||||
(as.double(vctr_disk) - mean(as.double(vctr_disk))) / sd(as.double(vctr_disk))
|
||||
)
|
||||
|
||||
expect_identical(
|
||||
mean_amr_distance(vctr_mic),
|
||||
(log2(vctr_mic) - mean(log2(vctr_mic))) / sd(log2(vctr_mic))
|
||||
)
|
||||
|
||||
expect_identical(
|
||||
mean_amr_distance(vctr_rsi, combine_SI = FALSE),
|
||||
(c(1, 1, 2, 2, 3, 3) - mean(c(1, 1, 2, 2, 3, 3))) / sd(c(1, 1, 2, 2, 3, 3))
|
||||
)
|
||||
expect_identical(
|
||||
mean_amr_distance(vctr_rsi, combine_SI = TRUE),
|
||||
(c(1, 1, 1, 1, 3, 3) - mean(c(1, 1, 1, 1, 3, 3))) / sd(c(1, 1, 1, 1, 3, 3))
|
||||
)
|
||||
|
||||
expect_equal(
|
||||
mean_amr_distance(data.frame(vctr_mic, vctr_rsi, vctr_disk)),
|
||||
c(-1.10603655, -0.74968823, -0.39333990, -0.03699158, 0.96485397, 1.32120229),
|
||||
tolerance = 0.00001
|
||||
)
|
||||
|
||||
expect_equal(
|
||||
mean_amr_distance(data.frame(vctr_mic, vctr_rsi, vctr_disk), 2:3),
|
||||
c(-0.9909017, -0.7236405, -0.4563792, -0.1891180, 1.0463891, 1.3136503),
|
||||
tolerance = 0.00001
|
||||
)
|
87
man/mean_amr_distance.Rd
Normal file
87
man/mean_amr_distance.Rd
Normal file
@ -0,0 +1,87 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/mean_amr_distance.R
|
||||
\name{mean_amr_distance}
|
||||
\alias{mean_amr_distance}
|
||||
\alias{mean_amr_distance.default}
|
||||
\alias{mean_amr_distance.mic}
|
||||
\alias{mean_amr_distance.disk}
|
||||
\alias{mean_amr_distance.rsi}
|
||||
\alias{mean_amr_distance.data.frame}
|
||||
\alias{distance_from_row}
|
||||
\title{Mean AMR Distance}
|
||||
\usage{
|
||||
mean_amr_distance(x, ...)
|
||||
|
||||
\method{mean_amr_distance}{default}(x, ...)
|
||||
|
||||
\method{mean_amr_distance}{mic}(x, ...)
|
||||
|
||||
\method{mean_amr_distance}{disk}(x, ...)
|
||||
|
||||
\method{mean_amr_distance}{rsi}(x, combine_SI = TRUE, ...)
|
||||
|
||||
\method{mean_amr_distance}{data.frame}(x, ..., combine_SI = TRUE)
|
||||
|
||||
distance_from_row(mean_distance, row)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{a vector of class \link[=as.rsi]{rsi}, \link[=as.rsi]{rsi} or \link[=as.rsi]{rsi}, or a \link{data.frame} containing columns of any of these classes}
|
||||
|
||||
\item{...}{variables to select (supports tidy selection such as \code{column1:column4} and \code{\link[tidyselect:language]{where(is.mic)}}), and can thus also be \link[=ab_selector]{antibiotic selectors}}
|
||||
|
||||
\item{combine_SI}{a \link{logical} to indicate whether all values of S and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant), defaults to \code{TRUE}}
|
||||
|
||||
\item{mean_distance}{the outcome of \code{\link[=mean_amr_distance]{mean_amr_distance()}}}
|
||||
|
||||
\item{row}{an index, such as a row number}
|
||||
}
|
||||
\description{
|
||||
This function calculates a normalised mean for antimicrobial resistance between multiple observations.
|
||||
}
|
||||
\details{
|
||||
The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand. For common numeric data this distance is equal to \href{https://en.wikipedia.org/wiki/Standard_score}{Z scores} (the number of standard deviations from the mean).
|
||||
|
||||
MIC values (see \code{\link[=as.mic]{as.mic()}}) are transformed with \code{\link[=log2]{log2()}} first; their distance is calculated as \code{(log2(x) - mean(log2(x))) / sd(log2(x))}.
|
||||
|
||||
R/SI values (see \code{\link[=as.rsi]{as.rsi()}}) are transformed using \code{"S"} = 1, \code{"I"} = 2, and \code{"R"} = 3. If \code{combine_SI} is \code{TRUE} (default), the \code{"I"} will be considered to be 1.
|
||||
|
||||
For data sets, the mean AMR distance will be calculated per variable, after which the mean of all columns will returned per row (using \code{\link[=rowMeans]{rowMeans()}}), see \emph{Examples}.
|
||||
|
||||
Use \code{\link[=distance_from_row]{distance_from_row()}} to subtract distances from the distance of one row, see \emph{Examples}.
|
||||
}
|
||||
\section{Interpretation}{
|
||||
|
||||
Isolates with distances less than 0.01 difference from each other should be considered similar. Differences lower than 0.025 should be considered suspicious.
|
||||
}
|
||||
|
||||
\examples{
|
||||
x <- random_mic(10)
|
||||
x
|
||||
mean_amr_distance(x)
|
||||
|
||||
y <- data.frame(id = LETTERS[1:10],
|
||||
amox = random_mic(10, ab = "amox", mo = "Escherichia coli"),
|
||||
cipr = random_mic(10, ab = "cipr", mo = "Escherichia coli"),
|
||||
gent = random_mic(10, ab = "gent", mo = "Escherichia coli"),
|
||||
tobr = random_mic(10, ab = "tobr", mo = "Escherichia coli"))
|
||||
y
|
||||
mean_amr_distance(y)
|
||||
y$amr_distance <- mean_amr_distance(y, where(is.mic))
|
||||
y[order(y$amr_distance), ]
|
||||
|
||||
if (require("dplyr")) {
|
||||
y \%>\%
|
||||
mutate(amr_distance = mean_amr_distance(., where(is.mic)),
|
||||
check_id_C = distance_from_row(amr_distance, id == "C")) \%>\%
|
||||
arrange(check_id_C)
|
||||
}
|
||||
if (require("dplyr")) {
|
||||
# support for groups
|
||||
example_isolates \%>\%
|
||||
filter(mo_genus() == "Enterococcus" & mo_species() != "") \%>\%
|
||||
select(mo, TCY, carbapenems()) \%>\%
|
||||
group_by(mo) \%>\%
|
||||
mutate(d = mean_amr_distance(., where(is.rsi))) \%>\%
|
||||
arrange(mo, d)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user