Compare commits

...

2 Commits

Author SHA1 Message Date
dr. M.S. (Matthijs) Berends a98d0d75ea styler dep 2023-01-05 14:46:44 +01:00
dr. M.S. (Matthijs) Berends a0ee86536a unit tests 2023-01-05 14:43:18 +01:00
19 changed files with 86 additions and 51 deletions

View File

@ -33,7 +33,7 @@ echo "Running pre-commit hook..."
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if command -v Rscript > /dev/null; then
if [ "$(Rscript -e 'cat(all(c('"'pkgload'"', '"'devtools'"', '"'dplyr'"', '"'styler'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then
if [ "$(Rscript -e 'cat(all(c('"'pkgload'"', '"'devtools'"', '"'dplyr'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then
Rscript -e "source('data-raw/_pre_commit_hook.R')"
currentpkg=`Rscript -e "cat(pkgload::pkg_name())"`
echo "-> Adding all files in 'data-raw' to this commit"

View File

@ -58,6 +58,9 @@ jobs:
- {os: ubuntu-latest, r: 'release', allowfail: false}
- {os: windows-latest, r: 'devel', allowfail: false}
- {os: windows-latest, r: 'release', allowfail: false}
- {os: macOS-latest, r: '3.6', allowfail: false}
- {os: ubuntu-latest, r: '3.6', allowfail: false}
- {os: windows-latest, r: '3.6', allowfail: false}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

View File

@ -59,6 +59,7 @@ jobs:
- {os: ubuntu-22.04, r: '4.0', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"}
- {os: ubuntu-22.04, r: '3.6', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"}
# R 3.5 returns a strange GC error when running examples, omit the checks for that
# - {os: ubuntu-22.04, r: '3.5', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"}
- {os: ubuntu-22.04, r: '3.4', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"}
- {os: ubuntu-22.04, r: '3.3', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"}
- {os: ubuntu-22.04, r: '3.2', allowfail: false, rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"}

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 1.8.2.9076
Date: 2022-12-30
Version: 1.8.2.9078
Date: 2023-01-05
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
data analysis and to work with microbial and antimicrobial properties by

View File

@ -1,4 +1,4 @@
# 1.8.2.9076
# AMR 1.8.2.9078
*(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)*

View File

@ -966,7 +966,7 @@ unique_call_id <- function(entire_session = FALSE, match_fn = NULL) {
# and relevant system call (where 'match_fn' is being called in)
calls <- sys.calls()
in_test <- any(as.character(calls[[1]]) %like_case% "run_test_dir|run_test_file|test_all|tinytest|test_package|testthat", na.rm = TRUE)
if (!isTRUE(in_test)) {
if (!isTRUE(in_test) && !is.null(match_fn)) {
for (i in seq_len(length(calls))) {
call_clean <- gsub("[^a-zA-Z0-9_().-]", "", as.character(calls[[i]]), perl = TRUE)
if (match_fn %in% call_clean || any(call_clean %like% paste0(match_fn, "\\("), na.rm = TRUE)) {
@ -1262,6 +1262,7 @@ create_pillar_column <- function(x, ...) {
as_original_data_class <- function(df, old_class = NULL) {
if ("tbl_df" %in% old_class && pkg_is_available("tibble", also_load = FALSE)) {
# this will then also remove groups
fn <- import_fn("as_tibble", "tibble")
} else if ("tbl_ts" %in% old_class && pkg_is_available("tsibble", also_load = FALSE)) {
fn <- import_fn("as_tsibble", "tsibble")
@ -1270,7 +1271,7 @@ as_original_data_class <- function(df, old_class = NULL) {
} else if ("tabyl" %in% old_class && pkg_is_available("janitor", also_load = FALSE)) {
fn <- import_fn("as_tabyl", "janitor")
} else {
fn <- base::as.data.frame
fn <- function(x) base::as.data.frame(df, stringsAsFactors = FALSE)
}
fn(df)
}

View File

@ -36,7 +36,7 @@
#' @param language language of the returned text, defaults to system language (see [get_AMR_locale()]) and can also be set with `getOption("AMR_locale")`. Use `language = NULL` or `language = ""` to prevent translation.
#' @param administration way of administration, either `"oral"` or `"iv"`
#' @param open browse the URL using [utils::browseURL()]
#' @param ... in case of [set_ab_names()] and `data` is a [data.frame]: variables to select (supports tidy selection such as `column1:column4`), otherwise other arguments passed on to [as.ab()]
#' @param ... in case of [set_ab_names()] and `data` is a [data.frame]: columns to select (supports tidy selection such as `column1:column4`), otherwise other arguments passed on to [as.ab()]
#' @param data a [data.frame] of which the columns need to be renamed, or a [character] vector of column names
#' @param snake_case a [logical] to indicate whether the names should be in so-called [snake case](https://en.wikipedia.org/wiki/Snake_case): in lower case and all spaces/slashes replaced with an underscore (`_`)
#' @param only_first a [logical] to indicate whether only the first ATC code must be returned, with giving preference to J0-codes (i.e., the antimicrobial drug group)

View File

@ -161,7 +161,7 @@ bug_drug_combinations <- function(x,
out <- run_it(x)
}
rownames(out) <- NULL
out <- as_original_data_class(out, class(x.bak))
out <- as_original_data_class(out, class(x.bak)) # will remove tibble groups
structure(out, class = c("bug_drug_combinations", ifelse(data_has_groups, "grouped", character(0)), class(out)))
}
@ -322,7 +322,7 @@ format.bug_drug_combinations <- function(x,
}
rownames(y) <- NULL
as_original_data_class(y, class(x.bak))
as_original_data_class(y, class(x.bak)) # will remove tibble groups
}
#' @method print bug_drug_combinations

View File

@ -77,7 +77,7 @@
#'
#' ### Using taxonomic properties in rules
#'
#' There is one exception in variables used for the rules: all column names of the [microorganisms] data set can also be used, but do not have to exist in the data set. These column names are: `r vector_and(colnames(microorganisms), sort = FALSE)`. Thus, this next example will work as well, despite the fact that the `df` data set does not contain a column `genus`:
#' There is one exception in columns used for the rules: all column names of the [microorganisms] data set can also be used, but do not have to exist in the data set. These column names are: `r vector_and(colnames(microorganisms), sort = FALSE)`. Thus, this next example will work as well, despite the fact that the `df` data set does not contain a column `genus`:
#'
#' ```r
#' y <- custom_eucast_rules(TZP == "S" & genus == "Klebsiella" ~ aminopenicillins == "S",

View File

@ -1035,7 +1035,7 @@ eucast_rules <- function(x,
# Return data set ---------------------------------------------------------
if (isTRUE(verbose)) {
as_original_data_class(verbose_info, old_attributes$class)
as_original_data_class(verbose_info, old_attributes$class) # will remove tibble groups
} else {
# x was analysed with only unique rows, so join everything together again
x <- x[, c(cols_ab, ".rowid"), drop = FALSE]
@ -1043,8 +1043,9 @@ eucast_rules <- function(x,
x.bak <- x.bak %pm>%
pm_left_join(x, by = ".rowid")
x.bak <- x.bak[, old_cols, drop = FALSE]
# reset original attributes, no need for as_original_data_class() here
# reset original attributes
attributes(x.bak) <- old_attributes
x.bak <- as_original_data_class(x.bak, old_class = class(x.bak)) # will remove tibble groups
x.bak
}
}

View File

@ -185,5 +185,5 @@ join_microorganisms <- function(type, x, by, suffix, ...) {
warning_("in `", type, "_microorganisms()`: the newly joined data set contains ", nrow(joined) - nrow(x), " rows more than the number of rows of `x`.")
}
as_original_data_class(joined, class(x.bak))
as_original_data_class(joined, class(x.bak)) # will remove tibble groups
}

View File

@ -30,30 +30,40 @@
#' Calculate the Mean AMR Distance
#'
#' Calculates a normalised mean for antimicrobial resistance between multiple observations, to help to identify similar isolates without comparing antibiograms by hand.
#' @param x a vector of class [rsi][as.rsi()], [rsi][as.rsi()] or [rsi][as.rsi()], or a [data.frame] containing columns of any of these classes
#' @param x a vector of class [rsi][as.rsi()], [mic][as.mic()] or [disk][as.disk()], or a [data.frame] containing columns of any of these classes
#' @param ... variables to select (supports [tidyselect language][tidyselect::language] such as `column1:column4` and `where(is.mic)`, and can thus also be [antibiotic selectors][ab_selector()]
#' @param combine_SI a [logical] to indicate whether all values of S and I must be merged into one, so the input only consists of S+I vs. R (susceptible vs. resistant), defaults to `TRUE`
#' @details The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand. For common numeric data this distance is equal to [Z scores](https://en.wikipedia.org/wiki/Standard_score) (the number of standard deviations from the mean).
#' @details The mean AMR distance is effectively [the Z-score](https://en.wikipedia.org/wiki/Standard_score); a normalised numeric value to compare AMR test results which can help to identify similar isolates, without comparing antibiograms by hand.
#'
#' MIC values (see [as.mic()]) are transformed with [log2()] first; their distance is calculated as `(log2(x) - mean(log2(x))) / sd(log2(x))`.
#' MIC values (see [as.mic()]) are transformed with [log2()] first; their distance is thus calculated as `(log2(x) - mean(log2(x))) / sd(log2(x))`.
#'
#' R/SI values (see [as.rsi()]) are transformed using `"S"` = 1, `"I"` = 2, and `"R"` = 3. If `combine_SI` is `TRUE` (default), the `"I"` will be considered to be 1.
#'
#' For data sets, the mean AMR distance will be calculated per variable, after which the mean of all columns will returned per row (using [rowMeans()]), see *Examples*.
#' For data sets, the mean AMR distance will be calculated per column, after which the mean per row will be returned, see *Examples*.
#'
#' Use [amr_distance_from_row()] to subtract distances from the distance of one row, see *Examples*.
#' @section Interpretation:
#' Isolates with distances less than 0.01 difference from each other should be considered similar. Differences lower than 0.025 should be considered suspicious.
#' @export
#' @examples
#' x <- random_mic(10)
#' x
#' mean_amr_distance(x)
#' rsi <- random_rsi(10)
#' rsi
#' mean_amr_distance(rsi)
#'
#' mic <- random_mic(10)
#' mic
#' mean_amr_distance(mic)
#' # equal to the Z-score of their log2:
#' (log2(mic) - mean(log2(mic))) / sd(log2(mic))
#'
#' disk <- random_disk(10)
#' disk
#' mean_amr_distance(disk)
#'
#' y <- data.frame(
#' id = LETTERS[1:10],
#' amox = random_mic(10, ab = "amox", mo = "Escherichia coli"),
#' cipr = random_mic(10, ab = "cipr", mo = "Escherichia coli"),
#' amox = random_rsi(10, ab = "amox", mo = "Escherichia coli"),
#' cipr = random_disk(10, ab = "cipr", mo = "Escherichia coli"),
#' gent = random_mic(10, ab = "gent", mo = "Escherichia coli"),
#' tobr = random_mic(10, ab = "tobr", mo = "Escherichia coli")
#' )
@ -65,7 +75,7 @@
#' if (require("dplyr")) {
#' y %>%
#' mutate(
#' amr_distance = mean_amr_distance(., where(is.mic)),
#' amr_distance = mean_amr_distance(y),
#' check_id_C = amr_distance_from_row(amr_distance, id == "C")
#' ) %>%
#' arrange(check_id_C)
@ -76,8 +86,8 @@
#' filter(mo_genus() == "Enterococcus" & mo_species() != "") %>%
#' select(mo, TCY, carbapenems()) %>%
#' group_by(mo) %>%
#' mutate(d = mean_amr_distance(., where(is.rsi))) %>%
#' arrange(mo, d)
#' mutate(dist = mean_amr_distance(.)) %>%
#' arrange(mo, dist)
#' }
mean_amr_distance <- function(x, ...) {
UseMethod("mean_amr_distance")
@ -87,6 +97,7 @@ mean_amr_distance <- function(x, ...) {
#' @export
mean_amr_distance.default <- function(x, ...) {
x <- as.double(x)
# calculate z-score
(x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE)
}
@ -120,6 +131,7 @@ mean_amr_distance.data.frame <- function(x, ..., combine_SI = TRUE) {
if (is_null_or_grouped_tbl(df)) {
df <- get_current_data("x", -2)
}
df <- as.data.frame(df, stringsAsFactors = FALSE)
if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) {
out <- tryCatch(suppressWarnings(c(...)), error = function(e) NULL)
if (!is.null(out)) {
@ -128,13 +140,18 @@ mean_amr_distance.data.frame <- function(x, ..., combine_SI = TRUE) {
df <- pm_select(df, ...)
}
}
df_classes <- colnames(df)[vapply(FUN.VALUE = logical(1), df, function(x) is.disk(x) | is.mic(x) | is.disk(x), USE.NAMES = FALSE)]
df_antibiotics <- unname(get_column_abx(df, info = FALSE))
df <- df[, colnames(df)[colnames(df) %in% union(df_classes, df_antibiotics)], drop = FALSE]
stop_if(ncol(df) < 2,
"data set must contain at least two variables",
call = -2
)
if (message_not_thrown_before("mean_amr_distance", "groups")) {
message_("Calculating mean AMR distance based on columns ", vector_and(colnames(df)))
message_("Calculating mean AMR distance based on columns ", vector_and(colnames(df), sort = FALSE))
}
res <- vapply(
FUN.VALUE = double(nrow(df)),
df,
@ -149,7 +166,7 @@ mean_amr_distance.data.frame <- function(x, ..., combine_SI = TRUE) {
}
}
res <- rowMeans(res, na.rm = TRUE)
res[is.infinite(res)] <- 0
res[is.infinite(res) | is.nan(res)] <- 0
res
}

View File

@ -274,7 +274,7 @@ resistance_predict <- function(x,
df_prediction$value <- ifelse(df_prediction$value > 1, 1, pmax(df_prediction$value, 0))
df_prediction <- df_prediction[order(df_prediction$year), , drop = FALSE]
out <- as_original_data_class(df_prediction, class(x.bak))
out <- as_original_data_class(df_prediction, class(x.bak)) # will remove tibble groups
structure(out,
class = c("resistance_predict", class(out)),
I_as_S = I_as_S,

View File

@ -371,6 +371,6 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both"
}
rownames(out) <- NULL
out <- as_original_data_class(out, class(data.bak))
out <- as_original_data_class(out, class(data.bak)) # will remove tibble groups
structure(out, class = c("rsi_df", class(out)))
}

Binary file not shown.

View File

@ -486,14 +486,16 @@ suppressMessages(devtools::document(quiet = TRUE))
# Style pkg ---------------------------------------------------------------
# if (interactive()) {
# # only when sourcing this file ourselves
# usethis::ui_info("Styling package")
# styler::style_pkg(
# style = styler::tidyverse_style,
# filetype = c("R", "Rmd")
# )
# }
if (!"styler" %in% rownames(utils::installed.packages())) {
message("Package 'styler' not installed!")
} else if (interactive()) {
# # only when sourcing this file ourselves
# usethis::ui_info("Styling package")
# styler::style_pkg(
# style = styler::tidyverse_style,
# filetype = c("R", "Rmd")
# )
}
# Finished ----------------------------------------------------------------

View File

@ -62,7 +62,7 @@ set_ab_names(
\item{tolower}{a \link{logical} to indicate whether the first \link{character} of every output should be transformed to a lower case \link{character}. This will lead to e.g. "polymyxin B" and not "polymyxin b".}
\item{...}{in case of \code{\link[=set_ab_names]{set_ab_names()}} and \code{data} is a \link{data.frame}: variables to select (supports tidy selection such as \code{column1:column4}), otherwise other arguments passed on to \code{\link[=as.ab]{as.ab()}}}
\item{...}{in case of \code{\link[=set_ab_names]{set_ab_names()}} and \code{data} is a \link{data.frame}: columns to select (supports tidy selection such as \code{column1:column4}), otherwise other arguments passed on to \code{\link[=as.ab]{as.ab()}}}
\item{only_first}{a \link{logical} to indicate whether only the first ATC code must be returned, with giving preference to J0-codes (i.e., the antimicrobial drug group)}

View File

@ -60,7 +60,7 @@ eucast_rules(df, rules = "custom", custom_rules = x, info = FALSE)
\subsection{Using taxonomic properties in rules}{
There is one exception in variables used for the rules: all column names of the \link{microorganisms} data set can also be used, but do not have to exist in the data set. These column names are: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence" and "snomed". Thus, this next example will work as well, despite the fact that the \code{df} data set does not contain a column \code{genus}:
There is one exception in columns used for the rules: all column names of the \link{microorganisms} data set can also be used, but do not have to exist in the data set. These column names are: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence" and "snomed". Thus, this next example will work as well, despite the fact that the \code{df} data set does not contain a column \code{genus}:
\if{html}{\out{<div class="sourceCode r">}}\preformatted{y <- custom_eucast_rules(TZP == "S" & genus == "Klebsiella" ~ aminopenicillins == "S",
TZP == "R" & genus == "Klebsiella" ~ aminopenicillins == "R")

View File

@ -16,7 +16,7 @@ mean_amr_distance(x, ...)
amr_distance_from_row(amr_distance, row)
}
\arguments{
\item{x}{a vector of class \link[=as.rsi]{rsi}, \link[=as.rsi]{rsi} or \link[=as.rsi]{rsi}, or a \link{data.frame} containing columns of any of these classes}
\item{x}{a vector of class \link[=as.rsi]{rsi}, \link[=as.mic]{mic} or \link[=as.disk]{disk}, or a \link{data.frame} containing columns of any of these classes}
\item{...}{variables to select (supports \link[tidyselect:language]{tidyselect language} such as \code{column1:column4} and \code{where(is.mic)}, and can thus also be \link[=ab_selector]{antibiotic selectors}}
@ -30,13 +30,13 @@ amr_distance_from_row(amr_distance, row)
Calculates a normalised mean for antimicrobial resistance between multiple observations, to help to identify similar isolates without comparing antibiograms by hand.
}
\details{
The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand. For common numeric data this distance is equal to \href{https://en.wikipedia.org/wiki/Standard_score}{Z scores} (the number of standard deviations from the mean).
The mean AMR distance is effectively \href{https://en.wikipedia.org/wiki/Standard_score}{the Z-score}; a normalised numeric value to compare AMR test results which can help to identify similar isolates, without comparing antibiograms by hand.
MIC values (see \code{\link[=as.mic]{as.mic()}}) are transformed with \code{\link[=log2]{log2()}} first; their distance is calculated as \code{(log2(x) - mean(log2(x))) / sd(log2(x))}.
MIC values (see \code{\link[=as.mic]{as.mic()}}) are transformed with \code{\link[=log2]{log2()}} first; their distance is thus calculated as \code{(log2(x) - mean(log2(x))) / sd(log2(x))}.
R/SI values (see \code{\link[=as.rsi]{as.rsi()}}) are transformed using \code{"S"} = 1, \code{"I"} = 2, and \code{"R"} = 3. If \code{combine_SI} is \code{TRUE} (default), the \code{"I"} will be considered to be 1.
For data sets, the mean AMR distance will be calculated per variable, after which the mean of all columns will returned per row (using \code{\link[=rowMeans]{rowMeans()}}), see \emph{Examples}.
For data sets, the mean AMR distance will be calculated per column, after which the mean per row will be returned, see \emph{Examples}.
Use \code{\link[=amr_distance_from_row]{amr_distance_from_row()}} to subtract distances from the distance of one row, see \emph{Examples}.
}
@ -46,14 +46,24 @@ Isolates with distances less than 0.01 difference from each other should be cons
}
\examples{
x <- random_mic(10)
x
mean_amr_distance(x)
rsi <- random_rsi(10)
rsi
mean_amr_distance(rsi)
mic <- random_mic(10)
mic
mean_amr_distance(mic)
# equal to the Z-score of their log2:
(log2(mic) - mean(log2(mic))) / sd(log2(mic))
disk <- random_disk(10)
disk
mean_amr_distance(disk)
y <- data.frame(
id = LETTERS[1:10],
amox = random_mic(10, ab = "amox", mo = "Escherichia coli"),
cipr = random_mic(10, ab = "cipr", mo = "Escherichia coli"),
amox = random_rsi(10, ab = "amox", mo = "Escherichia coli"),
cipr = random_disk(10, ab = "cipr", mo = "Escherichia coli"),
gent = random_mic(10, ab = "gent", mo = "Escherichia coli"),
tobr = random_mic(10, ab = "tobr", mo = "Escherichia coli")
)
@ -65,7 +75,7 @@ y[order(y$amr_distance), ]
if (require("dplyr")) {
y \%>\%
mutate(
amr_distance = mean_amr_distance(., where(is.mic)),
amr_distance = mean_amr_distance(y),
check_id_C = amr_distance_from_row(amr_distance, id == "C")
) \%>\%
arrange(check_id_C)
@ -76,7 +86,7 @@ if (require("dplyr")) {
filter(mo_genus() == "Enterococcus" & mo_species() != "") \%>\%
select(mo, TCY, carbapenems()) \%>\%
group_by(mo) \%>\%
mutate(d = mean_amr_distance(., where(is.rsi))) \%>\%
arrange(mo, d)
mutate(dist = mean_amr_distance(.)) \%>\%
arrange(mo, dist)
}
}