fix for as.mo, added also_single_tested

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-10-19 13:53:31 +02:00
parent 032c0d51ef
commit 86b03577a7
14 changed files with 128 additions and 66 deletions

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 0.4.0.9004
Date: 2018-10-17
Version: 0.4.0.9005
Date: 2018-10-19
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(

View File

@ -175,6 +175,8 @@ importFrom(data.table,as.data.table)
importFrom(data.table,data.table)
importFrom(data.table,setkey)
importFrom(dplyr,"%>%")
importFrom(dplyr,all_vars)
importFrom(dplyr,any_vars)
importFrom(dplyr,arrange)
importFrom(dplyr,arrange_at)
importFrom(dplyr,as_tibble)
@ -184,7 +186,9 @@ importFrom(dplyr,case_when)
importFrom(dplyr,desc)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,filter_all)
importFrom(dplyr,full_join)
importFrom(dplyr,funs)
importFrom(dplyr,group_by)
importFrom(dplyr,group_by_at)
importFrom(dplyr,group_vars)
@ -192,6 +196,7 @@ importFrom(dplyr,if_else)
importFrom(dplyr,lag)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_all)
importFrom(dplyr,mutate_at)
importFrom(dplyr,n_distinct)
importFrom(dplyr,progress_estimated)

View File

@ -11,10 +11,11 @@
* Better error handling when rules cannot be applied (i.e. new values could not be inserted)
* The amount of affected values will now only be measured once per row/column combination
* Data set `septic_patients` now reflects these changes
* Empty values as input for `as.mo` will be processed faster
* Tremendous speed improvement for `as.mo` (and consequently all `mo_*` functions), as empty values wil be ignored a priori
* Fewer than 3 characters as input for `as.mo` will return NA
* Added parameter `combine_IR` (TRUE/FALSE) to functions `portion_df` and `count_df`, to indicate that all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)
* Fix for `portion_*(..., as_percent = TRUE)` when minimal amount of isolates would not be met
* Added parameter `also_single_tested` for `portion_*` and `count_*` functions to also include cases where not all antibiotics were tested but at least one of the tested antibiotics includes the target antimicribial interpretation, see `?portion`
* Using `portion_*` functions now throws a warning when total available isolate is below parameter `minimum`
* Functions `as.mo`, `as.rsi` and `as.mic` will not set package name as attribute anymore
* Data set `septic_patients` is now a `data.frame`, not a tibble anymore

View File

@ -92,56 +92,61 @@
#' group_by(hospital_id) %>%
#' count_df(translate = FALSE)
#'
count_R <- function(...) {
count_R <- function(..., also_single_tested = FALSE) {
rsi_calc(...,
type = "R",
include_I = FALSE,
minimum = 0,
as_percent = FALSE,
also_single_tested = FALSE,
only_count = TRUE)
}
#' @rdname count
#' @export
count_IR <- function(...) {
count_IR <- function(..., also_single_tested = FALSE) {
rsi_calc(...,
type = "R",
include_I = TRUE,
minimum = 0,
as_percent = FALSE,
also_single_tested = FALSE,
only_count = TRUE)
}
#' @rdname count
#' @export
count_I <- function(...) {
count_I <- function(..., also_single_tested = FALSE) {
rsi_calc(...,
type = "I",
include_I = FALSE,
minimum = 0,
as_percent = FALSE,
also_single_tested = FALSE,
only_count = TRUE)
}
#' @rdname count
#' @export
count_SI <- function(...) {
count_SI <- function(..., also_single_tested = FALSE) {
rsi_calc(...,
type = "S",
include_I = TRUE,
minimum = 0,
as_percent = FALSE,
also_single_tested = FALSE,
only_count = TRUE)
}
#' @rdname count
#' @export
count_S <- function(...) {
count_S <- function(..., also_single_tested = FALSE) {
rsi_calc(...,
type = "S",
include_I = FALSE,
minimum = 0,
as_percent = FALSE,
also_single_tested = FALSE,
only_count = TRUE)
}

View File

@ -175,7 +175,7 @@
#' Translation table for Certe
#'
#' A data set containing all bacteria codes of Certe MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$mo} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{MO}s with \code{\link{guess_mo}}.
#' @format A \code{\link{tibble}} with 2,664 observations and 2 variables:
#' @format A \code{\link{tibble}} with 2,665 observations and 2 variables:
#' \describe{
#' \item{\code{certe}}{Code of microorganism according to Certe MMB}
#' \item{\code{mo}}{Code of microorganism in \code{\link{microorganisms}}}

View File

@ -361,15 +361,20 @@ EUCAST_rules <- function(tbl,
changed_results <<- changed_results + sum(before != after, na.rm = TRUE) # will be reset at start of every rule
if (verbose == TRUE) {
verbose_new <- data.frame(rule_type = rule[1],
rule_set = rule[2],
force_to = to,
found = length(before),
changed = sum(before != after, na.rm = TRUE),
stringsAsFactors = FALSE)
verbose_new$target_columns <- list(unname(cols))
verbose_new$target_rows <- list(unname(rows))
verbose_info <<- rbind(verbose_info, verbose_new)
for (i in 1:length(cols)) {
# add new row for every affected column
verbose_new <- data.frame(rule_type = rule[1],
rule_set = rule[2],
force_to = to,
found = length(before),
changed = sum(before != after, na.rm = TRUE),
target_column = cols[i],
stringsAsFactors = FALSE)
verbose_new$target_rows <- list(unname(rows))
rownames(verbose_new) <- NULL
verbose_info <<- rbind(verbose_info, verbose_new)
}
}
}
}
@ -410,15 +415,15 @@ EUCAST_rules <- function(tbl,
# since ampicillin ^= amoxicillin, get the first from the latter (not in original table)
if (!is.na(ampi) & !is.na(amox)) {
if (verbose == TRUE) {
cat("\n VERBOSE: transforming",
length(which(tbl[, amox] == "S" & !tbl[, ampi] %in% c("S", "I", "R"))),
"empty ampicillin fields to 'S' based on amoxicillin.")
cat("\n VERBOSE: transforming",
length(which(tbl[, amox] == "I" & !tbl[, ampi] %in% c("S", "I", "R"))),
"empty ampicillin fields to 'I' based on amoxicillin.")
cat("\n VERBOSE: transforming",
length(which(tbl[, amox] == "R" & !tbl[, ampi] %in% c("S", "I", "R"))),
"empty ampicillin fields to 'R' based on amoxicillin.\n")
cat(bgGreen("\n VERBOSE: transforming",
length(which(tbl[, amox] == "S" & !tbl[, ampi] %in% c("S", "I", "R"))),
"empty ampicillin fields to 'S' based on amoxicillin. "))
cat(bgGreen("\n VERBOSE: transforming",
length(which(tbl[, amox] == "I" & !tbl[, ampi] %in% c("S", "I", "R"))),
"empty ampicillin fields to 'I' based on amoxicillin. "))
cat(bgGreen("\n VERBOSE: transforming",
length(which(tbl[, amox] == "R" & !tbl[, ampi] %in% c("S", "I", "R"))),
"empty ampicillin fields to 'R' based on amoxicillin. \n"))
}
tbl[which(tbl[, amox] == "S" & !tbl[, ampi] %in% c("S", "I", "R")), ampi] <- "S"
tbl[which(tbl[, amox] == "I" & !tbl[, ampi] %in% c("S", "I", "R")), ampi] <- "I"

11
R/mo.R
View File

@ -195,7 +195,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
if (all(x %in% AMR::microorganisms[, property])) {
# already existing mo
} else if (all(x %in% AMR::microorganisms[, "mo"])) {
# existing mo codes
# existing mo codes when not looking for property "mo"
suppressWarnings(
x <- data.frame(mo = x, stringsAsFactors = FALSE) %>%
left_join(AMR::microorganisms, by = "mo") %>%
@ -661,13 +661,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
x[x == MOs[mo == 'B_STRPTC_SAL', ..property][[1]][1L]] <- MOs[mo == 'B_STRPTC_GRK', ..property][[1]][1L]
}
x_input_unique <- unique(x_input)
# fill in empty values again
x[is.na(x_input_unique) | is.null(x_input_unique) | identical(x_input_unique, "")] <- NA
# comply to x, which is also unique and without empty values
x_input_unique_nonempty <- unique(x_input[!is.na(x_input) & !is.null(x_input) & !identical(x_input, "")])
# left join the found results to the original input values (x_input)
df_found <- data.frame(input = as.character(x_input_unique),
found = x,
df_found <- data.frame(input = as.character(x_input_unique_nonempty),
found = as.character(x),
stringsAsFactors = FALSE)
df_input <- data.frame(input = as.character(x_input),
stringsAsFactors = FALSE)

View File

@ -24,6 +24,7 @@
#' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed. Use multiple columns to calculate (the lack of) co-resistance: the probability where one of two drugs have a resistant or susceptible result. See Examples.
#' @param minimum the minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.
#' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.
#' @param also_single_tested a logical to indicate whether (in combination therapies) also observations should be included where not all antibiotics were tested, but at least one of the tested antibiotics contains a target interpretation (e.g. S in case of \code{portion_S} and R in case of \code{portion_R}). \strong{This would lead to selection bias in almost all cases.}
#' @param data a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})
#' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.
#' @param combine_IR a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)
@ -134,12 +135,14 @@
#' }
portion_R <- function(...,
minimum = 30,
as_percent = FALSE) {
as_percent = FALSE,
also_single_tested = FALSE) {
rsi_calc(...,
type = "R",
include_I = FALSE,
minimum = minimum,
as_percent = as_percent,
also_single_tested = also_single_tested,
only_count = FALSE)
}
@ -147,12 +150,14 @@ portion_R <- function(...,
#' @export
portion_IR <- function(...,
minimum = 30,
as_percent = FALSE) {
as_percent = FALSE,
also_single_tested = FALSE) {
rsi_calc(...,
type = "R",
include_I = TRUE,
minimum = minimum,
as_percent = as_percent,
also_single_tested = also_single_tested,
only_count = FALSE)
}
@ -160,12 +165,14 @@ portion_IR <- function(...,
#' @export
portion_I <- function(...,
minimum = 30,
as_percent = FALSE) {
as_percent = FALSE,
also_single_tested = FALSE) {
rsi_calc(...,
type = "I",
include_I = FALSE,
minimum = minimum,
as_percent = as_percent,
also_single_tested = also_single_tested,
only_count = FALSE)
}
@ -173,12 +180,14 @@ portion_I <- function(...,
#' @export
portion_SI <- function(...,
minimum = 30,
as_percent = FALSE) {
as_percent = FALSE,
also_single_tested = FALSE) {
rsi_calc(...,
type = "S",
include_I = TRUE,
minimum = minimum,
as_percent = as_percent,
also_single_tested = also_single_tested,
only_count = FALSE)
}
@ -186,12 +195,14 @@ portion_SI <- function(...,
#' @export
portion_S <- function(...,
minimum = 30,
as_percent = FALSE) {
as_percent = FALSE,
also_single_tested = FALSE) {
rsi_calc(...,
type = "S",
include_I = FALSE,
minimum = minimum,
as_percent = as_percent,
also_single_tested = also_single_tested,
only_count = FALSE)
}

View File

@ -16,12 +16,13 @@
# GNU General Public License for more details. #
# ==================================================================== #
#' @importFrom dplyr %>% pull
#' @importFrom dplyr %>% pull all_vars any_vars filter_all funs mutate_all
rsi_calc <- function(...,
type,
include_I,
minimum,
as_percent,
also_single_tested,
only_count) {
if (!is.logical(include_I)) {
@ -33,6 +34,9 @@ rsi_calc <- function(...,
if (!is.logical(as_percent)) {
stop('`as_percent` must be logical', call. = FALSE)
}
if (!is.logical(also_single_tested)) {
stop('`also_single_tested` must be logical', call. = FALSE)
}
dots_df <- ...elt(1) # it needs this evaluation
dots <- base::eval(base::substitute(base::alist(...)))
@ -67,23 +71,53 @@ rsi_calc <- function(...,
}
print_warning <- FALSE
# check integrity of columns: force rsi class
type_trans <- as.integer(as.rsi(type))
type_others <- setdiff(1:3, type_trans)
if (is.data.frame(x)) {
rsi_integrity_check <- character(0)
for (i in 1:ncol(x)) {
# check integrity of columns: force rsi class
if (!is.rsi(x %>% pull(i))) {
x[, i] <- as.rsi(x[, i])
rsi_integrity_check <- c(rsi_integrity_check, x %>% pull(i) %>% as.character())
x[, i] <- suppressWarnings(as.rsi(x[, i])) # warning will be given later
print_warning <- TRUE
}
x[, i] <- x %>% pull(i) %>% as.integer()
}
x <- apply(X = x,
MARGIN = 1,
FUN = min)
if (length(rsi_integrity_check) > 0) {
# this will give a warning for invalid results, of all input columns (so only 1 warning)
rsi_integrity_check <- as.rsi(rsi_integrity_check)
}
if (include_I == TRUE) {
x <- x %>% mutate_all(funs(ifelse(. == 2, type_trans, .)))
}
if (also_single_tested == TRUE) {
# THE CHANCE THAT AT LEAST ONE RESULT IS type
found <- x %>% filter_all(any_vars(. == type_trans)) %>% nrow()
# THE CHANCE THAT AT LEAST ONE RESULT IS type OR ALL ARE TESTED
total <- found + x %>% filter_all(all_vars(. %in% type_others)) %>% nrow()
} else {
x <- apply(X = x,
MARGIN = 1,
FUN = min)
found <- sum(as.integer(x) == type_trans, na.rm = TRUE)
total <- length(x) - sum(is.na(x))
}
} else {
if (!is.rsi(x)) {
x <- as.rsi(x)
print_warning <- TRUE
}
x <- as.integer(x)
if (include_I == TRUE) {
x[x == 2] <- type_trans
}
found <- sum(x == type_trans, na.rm = TRUE)
total <- length(x) - sum(is.na(x))
}
if (print_warning == TRUE) {
@ -91,21 +125,10 @@ rsi_calc <- function(...,
call. = FALSE)
}
if (type == "S") {
found <- sum(as.integer(x) <= 1 + include_I, na.rm = TRUE)
} else if (type == "I") {
found <- sum(as.integer(x) == 2, na.rm = TRUE)
} else if (type == "R") {
found <- sum(as.integer(x) >= 3 - include_I, na.rm = TRUE)
} else {
stop("invalid type")
}
if (only_count == TRUE) {
return(found)
}
total <- length(x) - sum(is.na(x))
if (total < minimum) {
warning("Introducing NA: only ", total, " results available (minimum set to ", minimum, ").", call. = FALSE)
result <- NA

Binary file not shown.

View File

@ -15,15 +15,15 @@
Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html}
}
\usage{
count_R(...)
count_R(..., also_single_tested = FALSE)
count_IR(...)
count_IR(..., also_single_tested = FALSE)
count_I(...)
count_I(..., also_single_tested = FALSE)
count_SI(...)
count_SI(..., also_single_tested = FALSE)
count_S(...)
count_S(..., also_single_tested = FALSE)
count_all(...)
@ -35,6 +35,8 @@ count_df(data, translate_ab = getOption("get_antibiotic_names",
\arguments{
\item{...}{one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed.}
\item{also_single_tested}{a logical to indicate whether (in combination therapies) also observations should be included where not all antibiotics were tested, but at least one of the tested antibiotics contains a target interpretation (e.g. S in case of \code{portion_S} and R in case of \code{portion_R}). \strong{This would lead to selection bias in almost all cases.}}
\item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})}
\item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.}

View File

@ -4,7 +4,7 @@
\name{microorganisms.certe}
\alias{microorganisms.certe}
\title{Translation table for Certe}
\format{A \code{\link{tibble}} with 2,664 observations and 2 variables:
\format{A \code{\link{tibble}} with 2,665 observations and 2 variables:
\describe{
\item{\code{certe}}{Code of microorganism according to Certe MMB}
\item{\code{mo}}{Code of microorganism in \code{\link{microorganisms}}}

View File

@ -15,15 +15,20 @@
Wickham H. \strong{Tidy Data.} The Journal of Statistical Software, vol. 59, 2014. \url{http://vita.had.co.nz/papers/tidy-data.html}
}
\usage{
portion_R(..., minimum = 30, as_percent = FALSE)
portion_R(..., minimum = 30, as_percent = FALSE,
also_single_tested = FALSE)
portion_IR(..., minimum = 30, as_percent = FALSE)
portion_IR(..., minimum = 30, as_percent = FALSE,
also_single_tested = FALSE)
portion_I(..., minimum = 30, as_percent = FALSE)
portion_I(..., minimum = 30, as_percent = FALSE,
also_single_tested = FALSE)
portion_SI(..., minimum = 30, as_percent = FALSE)
portion_SI(..., minimum = 30, as_percent = FALSE,
also_single_tested = FALSE)
portion_S(..., minimum = 30, as_percent = FALSE)
portion_S(..., minimum = 30, as_percent = FALSE,
also_single_tested = FALSE)
portion_df(data, translate_ab = getOption("get_antibiotic_names",
"official"), minimum = 30, as_percent = FALSE, combine_IR = FALSE)
@ -35,6 +40,8 @@ portion_df(data, translate_ab = getOption("get_antibiotic_names",
\item{as_percent}{a logical to indicate whether the output must be returned as a hundred fold with \% sign (a character). A value of \code{0.123456} will then be returned as \code{"12.3\%"}.}
\item{also_single_tested}{a logical to indicate whether (in combination therapies) also observations should be included where not all antibiotics were tested, but at least one of the tested antibiotics contains a target interpretation (e.g. S in case of \code{portion_S} and R in case of \code{portion_R}). \strong{This would lead to selection bias in almost all cases.}}
\item{data}{a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})}
\item{translate_ab}{a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{abname}}. This can be set with \code{\link{getOption}("get_antibiotic_names")}.}

View File

@ -17,6 +17,9 @@ test_that("portions works", {
expect_equal(septic_patients %>% portion_S(amcl, gent),
0.9210074,
tolerance = 0.001)
expect_equal(septic_patients %>% portion_S(amcl, gent, also_single_tested = TRUE),
0.9239669,
tolerance = 0.001)
# amcl+genta susceptibility around 92.1%
expect_equal(suppressWarnings(rsi(septic_patients$amcl,
@ -66,6 +69,7 @@ test_that("portions works", {
expect_error(portion_I("test", as_percent = "test"))
expect_error(portion_S("test", minimum = "test"))
expect_error(portion_S("test", as_percent = "test"))
expect_error(portion_S("test", also_single_tested = "test"))
# check too low amount of isolates
expect_identical(suppressWarnings(portion_R(septic_patients$amox, minimum = nrow(septic_patients) + 1)),