1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 11:01:57 +02:00
This commit is contained in:
2019-05-13 10:10:16 +02:00
parent 0444c4ed9d
commit 38a4421450
36 changed files with 475 additions and 213 deletions

10
R/ab.R
View File

@ -28,7 +28,9 @@
#' @inheritSection WHOCC WHOCC
#' @export
#' @importFrom dplyr %>% filter slice pull
#' @details Use the \code{\link{ab_property}} functions to get properties based on the returned ATC code, see Examples.
#' @details All entries in the \code{\{link{antibiotics}} data set have three different identifiers: a human readable EARS-Net code (column \code{ab}, used by ECDC and WHONET), an ATC code (column \code{atc}, used by WHO), and a CID code (column \code{cid}, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem.
#'
#' Use the \code{\link{ab_property}} functions to get properties based on the returned ATC code, see Examples.
#'
#' In the ATC classification system, the active substances are classified in a hierarchy with five different levels. The system has fourteen main anatomical/pharmacological groups or 1st levels. Each ATC main group is divided into 2nd levels which could be either pharmacological or therapeutic groups. The 3rd and 4th levels are chemical, pharmacological or therapeutic subgroups and the 5th level is the chemical substance. The 2nd, 3rd and 4th levels are often used to identify pharmacological subgroups when that is considered more appropriate than therapeutic or chemical subgroups.
#' Source: \url{https://www.whocc.no/atc/structure_and_principles/}
@ -38,7 +40,7 @@
#' WHONET 2019 software: \url{http://www.whonet.org/software.html}
#'
#' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{http://ec.europa.eu/health/documents/community-register/html/atc.htm}
#' @return Character (vector) with class \code{"act"}. Unknown values will return \code{NA}.
#' @return Character (vector) with class \code{"ab"}. Unknown values will return \code{NA}.
#' @seealso \code{\link{antibiotics}} for the dataframe that is being used to determine ATCs.
#' @inheritSection AMR Read more on our website!
#' @examples
@ -64,9 +66,9 @@ as.ab <- function(x) {
}
x_bak <- x
# remove suffices
x_bak_clean <- gsub("_(mic|rsi)$", "", x)
x_bak_clean <- gsub("_(mic|rsi|disk|disc)$", "", x)
# remove disk concentrations, like LVX_NM -> LVX
x_bak_clean <- gsub("_[A-Z]{2}[0-9_]{0,3}$", "", x_bak_clean)
x_bak_clean <- gsub("_[A-Z]{2}[0-9_]{0,3}$", "", x_bak_clean, ignore.case = TRUE)
# clean rest of it
x_bak_clean <- gsub("[^a-zA-Z0-9/-]", "", x_bak_clean)
# keep only a-z when it's not an ATC code or only numbers

View File

@ -26,6 +26,7 @@
#' \code{count_R} and \code{count_IR} can be used to count resistant isolates, \code{count_S} and \code{count_SI} can be used to count susceptible isolates.\cr
#' @param ... one or more vectors (or columns) with antibiotic interpretations. They will be transformed internally with \code{\link{as.rsi}} if needed.
#' @inheritParams portion
#' @inheritSection as.rsi Interpretation of S, I and R
#' @details These functions are meant to count isolates. Use the \code{\link{portion}_*} functions to calculate microbial resistance.
#'
#' \code{n_rsi} is an alias of \code{count_all}. They can be used to count all available isolates, i.e. where all input antibiotics have an available result (S, I or R). Their use is equal to \code{\link{n_distinct}}. Their function is equal to \code{count_S(...) + count_IR(...)}.
@ -174,61 +175,14 @@ n_rsi <- function(...) {
count_df <- function(data,
translate_ab = "name",
language = get_locale(),
combine_SI = TRUE,
combine_IR = FALSE) {
if (!"data.frame" %in% class(data)) {
stop("`count_df` must be called on a data.frame")
}
if (data %>% select_if(is.rsi) %>% ncol() == 0) {
stop("No columns with class 'rsi' found. See ?as.rsi.")
}
if (as.character(translate_ab) %in% c("TRUE", "official")) {
translate_ab <- "name"
}
resS <- summarise_if(.tbl = data,
.predicate = is.rsi,
.funs = count_S) %>%
mutate(Interpretation = "S") %>%
select(Interpretation, everything())
if (combine_IR == FALSE) {
resI <- summarise_if(.tbl = data,
.predicate = is.rsi,
.funs = count_I) %>%
mutate(Interpretation = "I") %>%
select(Interpretation, everything())
resR <- summarise_if(.tbl = data,
.predicate = is.rsi,
.funs = count_R) %>%
mutate(Interpretation = "R") %>%
select(Interpretation, everything())
data.groups <- group_vars(data)
res <- bind_rows(resS, resI, resR) %>%
mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>%
tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups)
} else {
resIR <- summarise_if(.tbl = data,
.predicate = is.rsi,
.funs = count_IR) %>%
mutate(Interpretation = "IR") %>%
select(Interpretation, everything())
data.groups <- group_vars(data)
res <- bind_rows(resS, resIR) %>%
mutate(Interpretation = factor(Interpretation, levels = c("IR", "S"), ordered = TRUE)) %>%
tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups)
}
if (!translate_ab == FALSE) {
res <- res %>% mutate(Antibiotic = ab_property(Antibiotic, property = translate_ab, language = language))
}
res
rsi_calc_df(type = "count",
data = data,
translate_ab = translate_ab,
language = language,
combine_SI = combine_SI,
combine_IR = combine_IR,
combine_SI_missing = missing(combine_SI))
}

View File

@ -29,9 +29,8 @@
#' @param breaks numeric vector of positions
#' @param limits numeric vector of length two providing limits of the scale, use \code{NA} to refer to the existing minimum or maximum
#' @param facet variable to split plots by, either \code{"Interpretation"} (default) or \code{"Antibiotic"} or a grouping variable
#' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations into, using \code{\link{ab_name}}. Default behaviour is to translate to official names according to the WHO. Use \code{translate_ab = FALSE} to disable translation.
#' @param language the language used for translation of antibiotic names
#' @param fun function to transform \code{data}, either \code{\link{count_df}} (default) or \code{\link{portion_df}}
#' @inheritParams portion
#' @param nrow (when using \code{facet}) number of rows
#' @param datalabels show datalabels using \code{labels_rsi_count}, will at default only be shown when \code{fun = count_df}
#' @param datalabels.size size of the datalabels
@ -158,6 +157,8 @@ ggplot_rsi <- function(data,
breaks = seq(0, 1, 0.1),
limits = NULL,
translate_ab = "name",
combine_SI = TRUE,
combine_IR = FALSE,
language = get_locale(),
fun = count_df,
nrow = NULL,
@ -196,7 +197,8 @@ ggplot_rsi <- function(data,
}
p <- ggplot2::ggplot(data = data) +
geom_rsi(position = position, x = x, fill = fill, translate_ab = translate_ab, fun = fun, ...) +
geom_rsi(position = position, x = x, fill = fill, translate_ab = translate_ab,
fun = fun, combine_SI = combine_SI, combine_IR = combine_IR, ...) +
theme_rsi()
if (fill == "Interpretation") {
@ -233,11 +235,17 @@ geom_rsi <- function(position = NULL,
fill = "Interpretation",
translate_ab = "name",
language = get_locale(),
combine_SI = TRUE,
combine_IR = FALSE,
fun = count_df,
...) {
stopifnot_installed_package("ggplot2")
if (is.data.frame(position)) {
stop("`position` is invalid. Did you accidentally use '%>%' instead of '+'?", call. = FALSE)
}
fun_name <- deparse(substitute(fun))
if (!fun_name %in% c("portion_df", "count_df", "fun")) {
stop("`fun` must be portion_df or count_df")
@ -272,7 +280,13 @@ geom_rsi <- function(position = NULL,
ggplot2::layer(geom = "bar", stat = "identity", position = position,
mapping = ggplot2::aes_string(x = x, y = y, fill = fill),
data = fun, params = list(...))
params = list(...), data = function(x) {
fun(data = x,
translate_ab = translate_ab,
language = language,
combine_SI = combine_SI,
combine_IR = combine_IR)
})
}
@ -320,7 +334,16 @@ scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) {
scale_rsi_colours <- function() {
stopifnot_installed_package("ggplot2")
#ggplot2::scale_fill_brewer(palette = "RdYlGn")
ggplot2::scale_fill_manual(values = c("#b22222", "#ae9c20", "#7cfc00"))
#ggplot2::scale_fill_manual(values = c("#b22222", "#ae9c20", "#7cfc00"))
# mixed using https://www.colorhexa.com/b22222
# and https://www.w3schools.com/colors/colors_mixer.asp
ggplot2::scale_fill_manual(values = c(S = "#22b222",
SI = "#22b222",
I = "#548022",
IR = "#b22222",
R = "#b22222"))
}
#' @rdname ggplot_rsi

View File

@ -22,11 +22,12 @@
#' Guess antibiotic column
#'
#' This tries to find a column name in a data set based on information from the \code{\link{antibiotics}} data set. Also supports WHONET abbreviations. You can look for an antibiotic (trade) name or abbreviation and it will search the \code{data.frame} for any column containing a name or ATC code of that antibiotic.
#' @param tbl a \code{data.frame}
#' @param col a character to look for
#' @param x a \code{data.frame}
#' @param search_string a text to search \code{x} for
#' @param verbose a logical to indicate whether additional info should be printed
#' @importFrom dplyr %>% select filter_all any_vars
#' @importFrom crayon blue
#' @return A column name of \code{x}, or \code{NULL} when no result is found.
#' @export
#' @inheritSection AMR Read more on our website!
#' @examples
@ -39,7 +40,7 @@
#' # [1] "tetr"
#'
#' guess_ab_col(df, "J01AA07", verbose = TRUE)
#' # using column `tetr` for col "J01AA07"
#' # Note: Using column `tetr` as input for "J01AA07".
#' # [1] "tetr"
#'
#' # WHONET codes
@ -51,40 +52,40 @@
#' # [1] "AMC_ED20"
#' guess_ab_col(df, as.ab("augmentin"))
#' # [1] "AMC_ED20"
guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) {
if (is.null(tbl) & is.null(col)) {
guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE) {
if (is.null(x) & is.null(search_string)) {
return(as.name("guess_ab_col"))
}
if (length(col) > 1) {
warning("argument 'col' has length > 1 and only the first element will be used")
col <- col[1]
if (length(search_string) > 1) {
warning("argument 'search_string' has length > 1 and only the first element will be used")
search_string <- search_string[1]
}
col <- as.character(col)
if (!is.data.frame(tbl)) {
stop("`tbl` must be a data.frame")
search_string <- as.character(search_string)
if (!is.data.frame(x)) {
stop("`x` must be a data.frame")
}
if (col %in% colnames(tbl)) {
ab_result <- col
if (search_string %in% colnames(x)) {
ab_result <- search_string
} else {
# sort colnames on length - longest first
cols <- colnames(tbl[, tbl %>% colnames() %>% nchar() %>% order() %>% rev()])
cols <- colnames(x[, x %>% colnames() %>% nchar() %>% order() %>% rev()])
df_trans <- data.frame(cols = cols,
abs = suppressWarnings(as.ab(cols)),
stringsAsFactors = FALSE)
ab_result <- df_trans[which(df_trans$abs == as.ab(col)), "cols"]
ab_result <- df_trans[which(df_trans$abs == as.ab(search_string)), "cols"]
ab_result <- ab_result[!is.na(ab_result)][1L]
}
if (length(ab_result) == 0) {
if (verbose == TRUE) {
message('No column found as input for `', col, '`.')
message('No column found as input for `', search_string, '`.')
}
return(NULL)
} else {
if (verbose == TRUE) {
message(blue(paste0("NOTE: Using column `", bold(ab_result), "` as input for `", col, "`.")))
message(blue(paste0("NOTE: Using column `", bold(ab_result), "` as input for `", search_string, "`.")))
}
return(ab_result)
}

View File

@ -31,7 +31,8 @@
#' @param data a \code{data.frame} containing columns with class \code{rsi} (see \code{\link{as.rsi}})
#' @param translate_ab a column name of the \code{\link{antibiotics}} data set to translate the antibiotic abbreviations to, using \code{\link{ab_property}}
#' @inheritParams ab_property
#' @param combine_IR a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)
#' @param combine_SI a logical to indicate whether all values of I and S must be merged into one, so the output only consists of S+I vs. R (susceptible vs. resistant). This used to be the parameter \code{combine_IR}, but this now follows the redefinition by EUCAST about the interpretion of I (increased exposure) in 2019, see below. Default is now \code{TRUE}.
#' @inheritSection as.rsi Interpretation of S, I and R
#' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
#'
#' These functions are not meant to count isolates, but to calculate the portion of resistance/susceptibility. Use the \code{\link[AMR]{count}} functions to count isolates. \emph{Low counts can infuence the outcome - these \code{portion} functions may camouflage this, since they only return the portion albeit being dependent on the \code{minimum} parameter.}
@ -220,69 +221,16 @@ portion_df <- function(data,
language = get_locale(),
minimum = 30,
as_percent = FALSE,
combine_SI = TRUE,
combine_IR = FALSE) {
if (!"data.frame" %in% class(data)) {
stop("`portion_df` must be called on a data.frame")
}
if (data %>% select_if(is.rsi) %>% ncol() == 0) {
stop("No columns with class 'rsi' found. See ?as.rsi.")
}
if (as.character(translate_ab) %in% c("TRUE", "official")) {
translate_ab <- "name"
}
resS <- summarise_if(.tbl = data,
.predicate = is.rsi,
.funs = portion_S,
minimum = minimum,
as_percent = as_percent) %>%
mutate(Interpretation = "S") %>%
select(Interpretation, everything())
if (combine_IR == FALSE) {
resI <- summarise_if(.tbl = data,
.predicate = is.rsi,
.funs = portion_I,
minimum = minimum,
as_percent = as_percent) %>%
mutate(Interpretation = "I") %>%
select(Interpretation, everything())
resR <- summarise_if(.tbl = data,
.predicate = is.rsi,
.funs = portion_R,
minimum = minimum,
as_percent = as_percent) %>%
mutate(Interpretation = "R") %>%
select(Interpretation, everything())
data.groups <- group_vars(data)
res <- bind_rows(resS, resI, resR) %>%
mutate(Interpretation = factor(Interpretation, levels = c("R", "I", "S"), ordered = TRUE)) %>%
tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups)
} else {
resIR <- summarise_if(.tbl = data,
.predicate = is.rsi,
.funs = portion_IR,
minimum = minimum,
as_percent = as_percent) %>%
mutate(Interpretation = "IR") %>%
select(Interpretation, everything())
data.groups <- group_vars(data)
res <- bind_rows(resS, resIR) %>%
mutate(Interpretation = factor(Interpretation, levels = c("IR", "S"), ordered = TRUE)) %>%
tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups)
}
if (!translate_ab == FALSE) {
res <- res %>% mutate(Antibiotic = ab_property(Antibiotic, property = translate_ab, language = language))
}
res
rsi_calc_df(type = "portion",
data = data,
translate_ab = translate_ab,
language = language,
minimum = minimum,
as_percent = as_percent,
combine_SI = combine_SI,
combine_IR = combine_IR,
combine_SI_missing = missing(combine_SI))
}

42
R/rsi.R
View File

@ -35,6 +35,20 @@
#' After using \code{as.rsi}, you can use \code{\link{eucast_rules}} to (1) apply inferred susceptibility and resistance based on results of other antibiotics and (2) apply intrinsic resistance based on taxonomic properties of a microorganism.
#'
#' The function \code{is.rsi.eligible} returns \code{TRUE} when a columns contains at most 5\% invalid antimicrobial interpretations (not S and/or I and/or R), and \code{FALSE} otherwise. The threshold of 5\% can be set with the \code{threshold} parameter.
#' @section Interpretation of S, I and R:
#' In 2019, EUCAST has decided to change the definitions of susceptibility testing categories S, I and R as shown below. Results of several consultations on the new definitions are available on the EUCAST website under "Consultations".
#'
#' \itemize{
#' \item{\strong{S}}{Susceptible, standard dosing regimen: A microorganism is categorised as "Susceptible, standard dosing regimen", when there is a high likelihood of therapeutic success using a standard dosing regimen of the agent.}
#' \item{\strong{I}}{Susceptible, increased exposure: A microorganism is categorised as "Susceptible, Increased exposure" when there is a high likelihood of therapeutic success because exposure to the agent is increased by adjusting the dosing regimen or by its concentration at the site of infection.}
#' \item{\strong{R}}{Resistant: A microorganism is categorised as "Resistant" when there is a high likelihood of therapeutic failure even when there is increased exposure.}
#' }
#'
#' Exposure is a function of how the mode of administration, dose, dosing interval, infusion time, as well as distribution and excretion of the antimicrobial agent will influence the infecting organism at the site of infection.
#'
#' Source: \url{http://www.eucast.org/newsiandr/}.
#'
#' \strong{This AMR package honours this new insight.}
#' @return Ordered factor with new class \code{rsi}
#' @keywords rsi
#' @export
@ -182,17 +196,17 @@ exec_as.rsi <- function(method, x, mo, ab, guideline) {
mo_becker <- as.mo(mo, Becker = TRUE)
mo_lancefield <- as.mo(mo, Lancefield = TRUE)
guideline <- toupper(guideline)
if (guideline %in% c("CLSI", "EUCAST")) {
guideline <- AMR::rsi_translation %>%
filter(guideline %like% guideline) %>%
guideline_param <- toupper(guideline)
if (guideline_param %in% c("CLSI", "EUCAST")) {
guideline_param <- AMR::rsi_translation %>%
filter(guideline %like% guideline_param) %>%
pull(guideline) %>%
sort() %>%
rev() %>%
.[1]
}
if (!guideline %in% AMR::rsi_translation$guideline) {
if (!guideline_param %in% AMR::rsi_translation$guideline) {
stop(paste0("invalid guideline: '", guideline,
"'.\nValid guidelines are: ", paste0("'", rev(sort(unique(AMR::rsi_translation$guideline))), "'", collapse = ", ")),
call. = FALSE)
@ -200,7 +214,7 @@ exec_as.rsi <- function(method, x, mo, ab, guideline) {
new_rsi <- rep(NA_character_, length(x))
trans <- AMR::rsi_translation %>%
filter(guideline == guideline) %>%
filter(guideline == guideline_param) %>%
mutate(lookup = paste(mo, ab))
lookup_mo <- paste(mo, ab)
@ -224,15 +238,15 @@ exec_as.rsi <- function(method, x, mo, ab, guideline) {
if (NROW(get_record) > 0) {
if (method == "mic") {
new_rsi[i] <- case_when(is.na(get_record$S_mic) | is.na(get_record$R_mic) ~ NA_character_,
x[i] <= get_record$S_mic ~ "S",
x[i] >= get_record$R_mic ~ "R",
TRUE ~ "I")
new_rsi[i] <- case_when(isTRUE(x[i] <= get_record$S_mic) ~ "S",
isTRUE(x[i] >= get_record$R_mic) ~ "R",
!is.na(get_record$S_mic) & !is.na(get_record$R_mic) ~ "I",
TRUE ~ NA_character_)
} else if (method == "disk") {
new_rsi[i] <- case_when(is.na(get_record$S_disk) | is.na(get_record$R_disk) ~ NA_character_,
x[i] <= get_record$S_disk ~ "S",
x[i] >= get_record$R_disk ~ "R",
TRUE ~ "I")
new_rsi[i] <- case_when(isTRUE(x[i] >= get_record$S_disk) ~ "S",
isTRUE(x[i] <= get_record$R_disk) ~ "R",
!is.na(get_record$S_disk) & !is.na(get_record$R_disk) ~ "I",
TRUE ~ NA_character_)
}
}

View File

@ -150,3 +150,88 @@ rsi_calc <- function(...,
result
}
}
rsi_calc_df <- function(type, # "portion" or "count"
data,
translate_ab = "name",
language = get_locale(),
minimum = 30,
as_percent = FALSE,
combine_SI = TRUE,
combine_IR = FALSE,
combine_SI_missing = FALSE) {
if (!"data.frame" %in% class(data)) {
stop(paste0("`", type, "_df` must be called on a data.frame"), call. = FALSE)
}
if (isTRUE(combine_IR) & isTRUE(combine_SI_missing)) {
combine_SI <- FALSE
}
if (isTRUE(combine_SI) & isTRUE(combine_IR)) {
stop("either `combine_SI` or `combine_IR` can be TRUE", call. = FALSE)
}
if (data %>% select_if(is.rsi) %>% ncol() == 0) {
stop("No columns with class 'rsi' found. See ?as.rsi.", call. = FALSE)
}
if (as.character(translate_ab) %in% c("TRUE", "official")) {
translate_ab <- "name"
}
get_summaryfunction <- function(int) {
# look for portion_S, count_S, etc:
int_fn <- get(paste0(type, "_", int), envir = asNamespace("AMR"))
if (type == "portion") {
summ <- summarise_if(.tbl = data,
.predicate = is.rsi,
.funs = int_fn,
minimum = minimum,
as_percent = as_percent)
} else if (type == "count") {
summ <- summarise_if(.tbl = data,
.predicate = is.rsi,
.funs = int_fn)
}
summ %>%
mutate(Interpretation = int) %>%
select(Interpretation, everything())
}
resS <- get_summaryfunction("S")
resI <- get_summaryfunction("I")
resR <- get_summaryfunction("R")
resSI <- get_summaryfunction("SI")
resIR <- get_summaryfunction("IR")
data.groups <- group_vars(data)
if (isFALSE(combine_SI) & isFALSE(combine_IR)) {
res <- bind_rows(resS, resI, resR) %>%
mutate(Interpretation = factor(Interpretation,
levels = c("S", "I", "R"),
ordered = TRUE))
} else if (isTRUE(combine_IR)) {
res <- bind_rows(resS, resIR) %>%
mutate(Interpretation = factor(Interpretation,
levels = c("S", "IR"),
ordered = TRUE))
} else if (isTRUE(combine_SI)) {
res <- bind_rows(resSI, resR) %>%
mutate(Interpretation = factor(Interpretation,
levels = c("SI", "R"),
ordered = TRUE))
}
res <- res %>%
tidyr::gather(Antibiotic, Value, -Interpretation, -data.groups)
if (!translate_ab == FALSE) {
res <- res %>% mutate(Antibiotic = ab_property(Antibiotic, property = translate_ab, language = language))
}
res
}

View File

@ -24,7 +24,7 @@
#' All antimicrobial drugs and their official names, ATC codes, ATC groups and defined daily dose (DDD) are included in this package, using the WHO Collaborating Centre for Drug Statistics Methodology.
#' @section WHOCC:
#' \if{html}{\figure{logo_who.png}{options: height=60px style=margin-bottom:5px} \cr}
#' This package contains \strong{all ~500 antimicrobial drugs} and their Anatomical Therapeutic Chemical (ATC) codes, ATC groups and Defined Daily Dose (DDD) from the World Health Organization Collaborating Centre for Drug Statistics Methodology (WHOCC, \url{https://www.whocc.no}) and the Pharmaceuticals Community Register of the European Commission (\url{http://ec.europa.eu/health/documents/community-register/html/atc.htm}).
#' This package contains \strong{all ~450 antimicrobial drugs} and their Anatomical Therapeutic Chemical (ATC) codes, ATC groups and Defined Daily Dose (DDD) from the World Health Organization Collaborating Centre for Drug Statistics Methodology (WHOCC, \url{https://www.whocc.no}) and the Pharmaceuticals Community Register of the European Commission (\url{http://ec.europa.eu/health/documents/community-register/html/atc.htm}).
#'
#' These have become the gold standard for international drug utilisation monitoring and research.
#'