1
0
mirror of https://github.com/msberends/AMR.git synced 2025-01-27 04:24:37 +01:00

(v1.0.1.9000) first PCA implementation

This commit is contained in:
dr. M.S. (Matthijs) Berends 2020-03-07 21:48:21 +01:00
parent f444c24ed3
commit fa0d9c58d9
40 changed files with 2224 additions and 172 deletions

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 1.0.1
Date: 2020-02-22
Version: 1.0.1.9000
Date: 2020-03-07
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(role = c("aut", "cre"),

View File

@ -37,6 +37,7 @@ S3method(pillar_shaft,rsi)
S3method(plot,mic)
S3method(plot,resistance_predict)
S3method(plot,rsi)
S3method(prcomp,data.frame)
S3method(print,ab)
S3method(print,bug_drug_combinations)
S3method(print,catalogue_of_life_version)
@ -120,6 +121,7 @@ export(g.test)
export(geom_rsi)
export(get_locale)
export(get_mo_source)
export(ggplot_pca)
export(ggplot_rsi)
export(ggplot_rsi_predict)
export(guess_ab_col)
@ -169,6 +171,7 @@ export(mrgn)
export(n_rsi)
export(p.symbol)
export(p_symbol)
export(pca)
export(portion_I)
export(portion_IR)
export(portion_R)
@ -224,6 +227,7 @@ exportMethods(kurtosis.default)
exportMethods(kurtosis.matrix)
exportMethods(plot.mic)
exportMethods(plot.rsi)
exportMethods(prcomp.data.frame)
exportMethods(print.ab)
exportMethods(print.bug_drug_combinations)
exportMethods(print.catalogue_of_life_version)
@ -318,6 +322,7 @@ importFrom(pillar,pillar_shaft)
importFrom(pillar,type_sum)
importFrom(rlang,as_label)
importFrom(rlang,enquos)
importFrom(rlang,eval_tidy)
importFrom(stats,complete.cases)
importFrom(stats,glm)
importFrom(stats,lm)

View File

@ -1,3 +1,9 @@
# AMR 1.0.1.9000
### New
* Support for easy principal component analysis for AMR, using the new `pca()` function
* Plotting biplots for principal component analysis using the new `ggplot_pca()` function
# AMR 1.0.1
### Changed

10
R/age.R
View File

@ -27,7 +27,7 @@
#' @param reference reference date(s) (defaults to today), will be coerced with [as.POSIXlt()] and cannot be lower than `x`
#' @param exact a logical to indicate whether age calculation should be exact, i.e. with decimals. It divides the number of days of [year-to-date](https://en.wikipedia.org/wiki/Year-to-date) (YTD) of `x` by the number of days in the year of `reference` (either 365 or 366).
#' @param na.rm a logical to indicate whether missing values should be removed
#' @return An integer (no decimals) if `exact = FALSE`, a double (with decimals) otherwise
#' @return An [integer] (no decimals) if `exact = FALSE`, a [double] (with decimals) otherwise
#' @seealso To split ages into groups, use the [age_groups()] function.
#' @importFrom dplyr if_else
#' @inheritSection AMR Read more on our website!
@ -95,8 +95,8 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE) {
#' @inheritSection lifecycle Stable lifecycle
#' @param x age, e.g. calculated with [age()]
#' @param split_at values to split `x` at, defaults to age groups 0-11, 12-24, 25-54, 55-74 and 75+. See Details.
#' @param na.rm a logical to indicate whether missing values should be removed
#' @details To split ages, the input can be:
#' @param na.rm a [logical] to indicate whether missing values should be removed
#' @details To split ages, the input for the `split_at` parameter can be:
#'
#' * A numeric vector. A vector of e.g. `c(10, 20)` will split on 0-9, 10-19 and 20+. A value of only `50` will split on 0-49 and 50+.
#' The default is to split on young children (0-11), youth (12-24), young adults (25-54), middle-aged adults (55-74) and elderly (75+).
@ -104,8 +104,8 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE) {
#' - `"children"` or `"kids"`, equivalent of: `c(0, 1, 2, 4, 6, 13, 18)`. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.
#' - `"elderly"` or `"seniors"`, equivalent of: `c(65, 75, 85)`. This will split on 0-64, 65-74, 75-84, 85+.
#' - `"fives"`, equivalent of: `1:20 * 5`. This will split on 0-4, 5-9, 10-14, ..., 90-94, 95-99, 100+.
#' - `"tens"`, equivalent of: `1:10 * 10`. This will split on 0-9, 10-19, 20-29, ... 80-89, 90-99, 100+.
#' @return Ordered [`factor`]
#' - `"tens"`, equivalent of: `1:10 * 10`. This will split on 0-9, 10-19, 20-29, ..., 80-89, 90-99, 100+.
#' @return Ordered [factor]
#' @seealso To determine ages, based on one or more reference dates, use the [age()] function.
#' @export
#' @inheritSection AMR Read more on our website!

349
R/ggplot_pca.R Executable file
View File

@ -0,0 +1,349 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://gitlab.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
# ==================================================================== #
#' PCA biplot with `ggplot2`
#'
#' This function is to produce a `ggplot2` variant of a so-called [biplot](https://en.wikipedia.org/wiki/Biplot) for PCA (principal component analysis), but is more flexible and more appealing than the base \R [biplot()] function.
#' @inheritSection lifecycle Maturing lifecycle
#' @param x an object returned by [pca()], [prcomp()] or [princomp()]
#' @inheritParams stats::biplot.prcomp
#' @param labels an optional vector of labels for the observations. If set, the labels will be placed below their respective points. When using the [pca()] function as input for `x`, this will be determined automatically based on the attribute `non_numeric_cols`, see [pca()].
#' @param labels_textsize the size of the text used for the labels
#' @param labels_text_placement adjustment factor the placement of the variable names (`>=1` means further away from the arrow head)
#' @param groups an optional vector of groups for the labels, with the same length as `labels`. If set, the points and labels will be coloured according to these groups. When using the [pca()] function as input for `x`, this will be determined automatically based on the attribute `non_numeric_cols`, see [pca()].
#' @param ellipse a logical to indicate whether a normal data ellipse should be drawn for each group (set with `groups`)
#' @param ellipse_prob statistical size of the ellipse in normal probability
#' @param ellipse_size the size of the ellipse line
#' @param ellipse_alpha the alpha (transparency) of the ellipse line
#' @param points_alpha the alpha (transparency) of the points
#' @param arrows a logical to indicate whether arrows should be drawn
#' @param arrows_textsize the size of the text for variable names
#' @param arrows_colour the colour of the arrow and their text
#' @param arrows_size the size (thickness) of the arrow lines
#' @param arrows_textsize the size of the text at the end of the arrows
#' @param arrows_alpha the alpha (transparency) of the arrows and their text
#' @param base_textsize the text size for all plot elements except the labels and arrows
#' @param ... Parameters passed on to functions
#' @source The [ggplot_pca()] function is based on the [ggbiplot()] function from the `ggbiplot` package by Vince Vu, as found on GitHub: <https://github.com/vqv/ggbiplot> (retrieved: 2 March 2020, their latest commit: [`7325e88`](https://github.com/vqv/ggbiplot/commit/7325e880485bea4c07465a0304c470608fffb5d9); 12 February 2015).
#'
#' As per their GPL-2 licence that demands documentation of code changes, the changes made based on the source code were:
#' 1. Rewritten code to remove the dependency on packages `plyr`, `scales` and `grid`
#' 2. Parametrised more options, like arrow and ellipse settings
#' 3. Added total amount of explained variance as a caption in the plot
#' 4. Cleaned all syntax based on the `lintr` package
#' 5. Updated documentation
#' @details The default colours for labels and points is set with [scale_colour_viridis_d()], but these can be changed by adding another scale for colour, like [scale_colour_brewer()].
#' @rdname ggplot_pca
#' @export
#' @examples
#' # `example_isolates` is a dataset available in the AMR package.
#' # See ?example_isolates.
#'
#' # See ?pca for more info about Principal Component Analysis (PCA).
#' library(dplyr)
#' pca_model <- example_isolates %>%
#' filter(mo_genus(mo) == "Staphylococcus") %>%
#' group_by(species = mo_shortname(mo)) %>%
#' summarise_if (is.rsi, resistance) %>%
#' pca(FLC, AMC, CXM, GEN, TOB, TMP, SXT, CIP, TEC, TCY, ERY)
#'
#' # old
#' biplot(pca_model)
#'
#' # new
#' ggplot_pca(pca_model)
ggplot_pca <- function(x,
choices = 1:2,
scale = TRUE,
labels = NULL,
labels_textsize = 3,
labels_text_placement = 1.5,
groups = NULL,
ellipse = FALSE,
ellipse_prob = 0.68,
ellipse_size = 0.5,
ellipse_alpha = 0.25,
points_size = 2,
points_alpha = 0.25,
arrows = TRUE,
arrows_colour = "darkblue",
arrows_size = 0.5,
arrows_textsize = 3,
arrows_alpha = 0.75,
base_textsize = 10,
...) {
stopifnot_installed_package("ggplot2")
calculations <- pca_calculations(pca_model = x,
groups = groups,
groups_missing = missing(groups),
labels = labels,
labels_missing = missing(labels),
choices = choices,
scale = scale,
ellipse_prob = ellipse_prob,
labels_text_placement = labels_text_placement)
nobs.factor <- calculations$nobs.factor
d <- calculations$d
u <- calculations$u
v <- calculations$v
choices <- calculations$choices
df.u <- calculations$df.u
df.v <- calculations$df.v
r <- calculations$r
ell <- calculations$ell
groups <- calculations$groups
group_name <- calculations$group_name
labels <- calculations$labels
stopifnot(length(choices) == 2)
# Append the proportion of explained variance to the axis labels
if ((1 - as.integer(scale)) == 0) {
u.axis.labs <- paste("Standardised PC", choices, sep = "")
} else {
u.axis.labs <- paste("PC", choices, sep = "")
}
u.axis.labs <- paste(u.axis.labs,
paste0("\n(explained var: ",
percentage(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2)), ")"))
# Score Labels
if (!is.null(labels)) {
df.u$labels <- labels
}
# Grouping variable
if (!is.null(groups)) {
df.u$groups <- groups
}
# Base plot
g <- ggplot2::ggplot(data = df.u,
ggplot2::aes(x = xvar, y = yvar)) +
ggplot2::xlab(u.axis.labs[1]) +
ggplot2::ylab(u.axis.labs[2]) +
ggplot2::expand_limits(x = c(-1.15, 1.15),
y = c(-1.15, 1.15))
# Draw either labels or points
if (!is.null(df.u$labels)) {
if (!is.null(df.u$groups)) {
g <- g +
ggplot2::geom_point(ggplot2::aes(colour = groups),
alpha = points_alpha,
size = points_size) +
ggplot2::geom_text(ggplot2::aes(label = labels, colour = groups),
nudge_y = -0.05,
size = labels_textsize) +
ggplot2::scale_colour_viridis_d() +
ggplot2::labs(colour = group_name)
} else {
g <- g +
ggplot2::geom_point(alpha = points_alpha,
size = points_size) +
ggplot2::geom_text(ggplot2::aes(label = labels),
nudge_y = -0.05,
size = labels_textsize)
}
} else {
if (!is.null(df.u$groups)) {
g <- g +
ggplot2::geom_point(ggplot2::aes(colour = groups),
alpha = points_alpha,
size = points_size) +
ggplot2::scale_colour_viridis_d() +
ggplot2::labs(colour = group_name)
} else {
g <- g + ggplot2::geom_point(alpha = points_alpha,
size = points_size)
}
}
# Overlay a concentration ellipse if there are groups
if (!is.null(df.u$groups) & isTRUE(ellipse)) {
g <- g +
ggplot2::geom_path(data = ell,
ggplot2::aes(colour = groups, group = groups),
size = ellipse_size,
alpha = points_alpha)
}
# Label the variable axes
if (arrows == TRUE) {
g <- g +
ggplot2::geom_segment(data = df.v,
ggplot2::aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = ggplot2::arrow(length = ggplot2::unit(0.5, "picas"),
angle = 20,
ends = "last",
type = "open"),
colour = arrows_colour,
size = arrows_size,
alpha = arrows_alpha) +
ggplot2::geom_text(data = df.v,
ggplot2::aes(label = varname, x = xvar, y = yvar, angle = angle, hjust = hjust),
colour = arrows_colour,
size = arrows_textsize,
alpha = arrows_alpha)
}
# Add caption label about total explained variance
g <- g + ggplot2::labs(caption = paste0("Total explained variance: ",
percentage(sum(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2)))))
# mark-up nicely
g <- g + ggplot2::theme_minimal(base_size = base_textsize) +
ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "grey85"),
panel.grid.minor = ggplot2::element_blank(),
# centre title and subtitle
plot.title = ggplot2::element_text(hjust = 0.5),
plot.subtitle = ggplot2::element_text(hjust = 0.5))
g
}
#' @importFrom dplyr bind_rows
pca_calculations <- function(pca_model,
groups = NULL,
groups_missing = TRUE,
labels = NULL,
labels_missing = TRUE,
choices = 1:2,
scale = 1,
ellipse_prob = 0.68,
labels_text_placement = 1.5) {
non_numeric_cols <- attributes(pca_model)$non_numeric_cols
if (groups_missing) {
groups <- tryCatch(non_numeric_cols[[1]],
error = function(e) NULL)
group_name <- tryCatch(colnames(non_numeric_cols[1]),
error = function(e) NULL)
}
if (labels_missing) {
labels <- tryCatch(non_numeric_cols[[2]],
error = function(e) NULL)
}
if (!is.null(groups) & is.null(labels)) {
# turn them around
labels <- groups
groups <- NULL
group_name <- NULL
}
# Recover the SVD
if (inherits(pca_model, "prcomp")) {
nobs.factor <- sqrt(nrow(pca_model$x) - 1)
d <- pca_model$sdev
u <- sweep(pca_model$x, 2, 1 / (d * nobs.factor), FUN = "*")
v <- pca_model$rotation
} else if (inherits(pca_model, "princomp")) {
nobs.factor <- sqrt(pca_model$n.obs)
d <- pca_model$sdev
u <- sweep(pca_model$scores, 2, 1 / (d * nobs.factor), FUN = "*")
v <- pca_model$loadings
} else if (inherits(pca_model, "PCA")) {
nobs.factor <- sqrt(nrow(pca_model$call$X))
d <- unlist(sqrt(pca_model$eig)[1])
u <- sweep(pca_model$ind$coord, 2, 1 / (d * nobs.factor), FUN = "*")
v <- sweep(pca_model$var$coord, 2, sqrt(pca_model$eig[seq_len(ncol(pca_model$var$coord)), 1]), FUN = "/")
} else if (inherits(pca_model, "lda")) {
nobs.factor <- sqrt(pca_model$N)
d <- pca_model$svd
u <- predict(pca_model)$x / nobs.factor
v <- pca_model$scaling
d.total <- sum(d ^ 2)
} else {
stop("Expected a object of class prcomp, princomp, PCA, or lda")
}
# Scores
choices <- pmin(choices, ncol(u))
obs.scale <- 1 - as.integer(scale)
df.u <- as.data.frame(sweep(u[, choices], 2, d[choices] ^ obs.scale, FUN = "*"))
# Directions
v <- sweep(v, 2, d ^ as.integer(scale), FUN = "*")
df.v <- as.data.frame(v[, choices])
names(df.u) <- c("xvar", "yvar")
names(df.v) <- names(df.u)
df.u <- df.u * nobs.factor
# Scale the radius of the correlation circle so that it corresponds to
# a data ellipse for the standardized PC scores
circle_prob <- 0.69
r <- sqrt(qchisq(circle_prob, df = 2)) * prod(colMeans(df.u ^ 2)) ^ (0.25)
# Scale directions
v.scale <- rowSums(v ^ 2)
df.v <- r * df.v / sqrt(max(v.scale))
# Grouping variable
if (!is.null(groups)) {
df.u$groups <- groups
}
df.v$varname <- rownames(v)
# Variables for text label placement
df.v$angle <- with(df.v, (180 / pi) * atan(yvar / xvar))
df.v$hjust <- with(df.v, (1 - labels_text_placement * sign(xvar)) / 2)
if (!is.null(df.u$groups)) {
theta <<- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <<- cbind(cos(theta), sin(theta))
ell <- bind_rows(
sapply(unique(df.u$groups), function(g, df = df.u) {
x <- df[which(df$groups == g), , drop = FALSE]
if (nrow(x) <= 2) {
return(NULL)
}
sigma <- var(cbind(x$xvar, x$yvar))
mu <- c(mean(x$xvar), mean(x$yvar))
ed <- sqrt(qchisq(ellipse_prob, df = 2))
data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = "+"),
groups = x$groups[1])
}))
names(ell)[1:2] <- c("xvar", "yvar")
} else {
ell <- NULL
}
list(nobs.factor = nobs.factor,
d = d,
u = u,
v = v,
choices = choices,
df.u = df.u,
df.v = df.v,
r = r,
ell = ell,
groups = groups,
group_name = group_name,
labels = labels
)
}

View File

@ -186,12 +186,12 @@ ggplot_rsi <- function(data,
x.title = "Antimicrobial",
y.title = "Proportion",
...) {
stopifnot_installed_package("ggplot2")
x <- x[1]
facet <- facet[1]
# we work with aes_string later on
x_deparse <- deparse(substitute(x))
if (x_deparse != "x") {
@ -210,16 +210,16 @@ ggplot_rsi <- function(data,
if (facet %in% c("NULL", "")) {
facet <- NULL
}
if (is.null(position)) {
position <- "fill"
}
p <- ggplot2::ggplot(data = data) +
geom_rsi(position = position, x = x, fill = fill, translate_ab = translate_ab,
combine_SI = combine_SI, combine_IR = combine_IR, ...) +
theme_rsi()
if (fill == "interpretation") {
# set RSI colours
if (isFALSE(colours) & missing(datalabels.colour)) {
@ -228,12 +228,12 @@ ggplot_rsi <- function(data,
}
p <- p + scale_rsi_colours(colours = colours)
}
if (identical(position, "fill")) {
# proportions, so use y scale with percentage
p <- p + scale_y_percent(breaks = breaks, limits = limits)
}
if (datalabels == TRUE) {
p <- p + labels_rsi_count(position = position,
x = x,
@ -243,17 +243,17 @@ ggplot_rsi <- function(data,
datalabels.size = datalabels.size,
datalabels.colour = datalabels.colour)
}
if (!is.null(facet)) {
p <- p + facet_rsi(facet = facet, nrow = nrow)
}
p <- p + ggplot2::labs(title = title,
subtitle = subtitle,
caption = caption,
x = x.title,
y = y.title)
p
}
@ -267,24 +267,24 @@ geom_rsi <- function(position = NULL,
combine_SI = TRUE,
combine_IR = FALSE,
...) {
stopifnot_installed_package("ggplot2")
if (is.data.frame(position)) {
stop("`position` is invalid. Did you accidentally use '%>%' instead of '+'?", call. = FALSE)
}
y <- "value"
if (missing(position) | is.null(position)) {
position <- "fill"
}
if (identical(position, "fill")) {
position <- ggplot2::position_fill(vjust = 0.5, reverse = TRUE)
}
x <- x[1]
# we work with aes_string later on
x_deparse <- deparse(substitute(x))
if (x_deparse != "x") {
@ -293,33 +293,33 @@ geom_rsi <- function(position = NULL,
if (x %like% '".*"') {
x <- substr(x, 2, nchar(x) - 1)
}
if (tolower(x) %in% tolower(c("ab", "abx", "antibiotics"))) {
x <- "antibiotic"
} else if (tolower(x) %in% tolower(c("SIR", "RSI", "interpretations", "result"))) {
x <- "interpretation"
}
ggplot2::layer(geom = "bar", stat = "identity", position = position,
mapping = ggplot2::aes_string(x = x, y = y, fill = fill),
params = list(...), data = function(x) {
rsi_df(data = x,
translate_ab = translate_ab,
language = language,
combine_SI = combine_SI,
combine_IR = combine_IR)
translate_ab = translate_ab,
language = language,
combine_SI = combine_SI,
combine_IR = combine_IR)
})
}
#' @rdname ggplot_rsi
#' @export
facet_rsi <- function(facet = c("interpretation", "antibiotic"), nrow = NULL) {
stopifnot_installed_package("ggplot2")
facet <- facet[1]
# we work with aes_string later on
facet_deparse <- deparse(substitute(facet))
if (facet_deparse != "facet") {
@ -328,13 +328,13 @@ facet_rsi <- function(facet = c("interpretation", "antibiotic"), nrow = NULL) {
if (facet %like% '".*"') {
facet <- substr(facet, 2, nchar(facet) - 1)
}
if (tolower(facet) %in% tolower(c("SIR", "RSI", "interpretations", "result"))) {
facet <- "interpretation"
} else if (tolower(facet) %in% tolower(c("ab", "abx", "antibiotics"))) {
facet <- "antibiotic"
}
ggplot2::facet_wrap(facets = facet, scales = "free_x", nrow = nrow)
}
@ -343,7 +343,7 @@ facet_rsi <- function(facet = c("interpretation", "antibiotic"), nrow = NULL) {
#' @export
scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) {
stopifnot_installed_package("ggplot2")
if (all(breaks[breaks != 0] > 1)) {
breaks <- breaks / 100
}
@ -362,7 +362,7 @@ scale_rsi_colours <- function(colours = c(S = "#61a8ff",
stopifnot_installed_package("ggplot2")
# previous colour: palette = "RdYlGn"
# previous colours: values = c("#b22222", "#ae9c20", "#7cfc00")
if (!identical(colours, FALSE)) {
original_cols <- c(S = "#61a8ff",
SI = "#61a8ff",

View File

@ -32,7 +32,7 @@
#' This page contains a section for every lifecycle (with text borrowed from the aforementioned `tidyverse` website), so they can be used in the manual pages of our functions.
#' @section Experimental lifecycle:
#' \if{html}{\figure{lifecycle_experimental.svg}{options: style=margin-bottom:5px} \cr}
#' The [lifecycle][AMR::lifecycle] of this function is **experimental**. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN.
#' The [lifecycle][AMR::lifecycle] of this function is **experimental**. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN, since they have not yet matured enough.
#' @section Maturing lifecycle:
#' \if{html}{\figure{lifecycle_maturing.svg}{options: style=margin-bottom:5px} \cr}
#' The [lifecycle][AMR::lifecycle] of this function is **maturing**. The unlying code of a maturing function has been roughed out, but finer details might still change. We will strive to maintain backward compatibility, but the function needs wider usage and more extensive testing in order to optimise the unlying code.

128
R/pca.R Executable file
View File

@ -0,0 +1,128 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://gitlab.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
# ==================================================================== #
#' Principal Component Analysis (for AMR)
#'
#' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.
#' @inheritSection lifecycle Experimental lifecycle
#' @param x a [data.frame] containing numeric columns
#' @param ... columns of `x` to be selected for PCA
#' @inheritParams stats::prcomp
#' @details The [pca()] function takes a [data.frame] as input and performs the actual PCA with the R function [prcomp()].
#'
#' The result of the [pca()] function is a [`prcomp`] object, with an additional attribute `non_numeric_cols` which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by [ggplot_pca()].
#' @rdname pca
#' @exportMethod prcomp.data.frame
#' @export
#' @examples
#' # `example_isolates` is a dataset available in the AMR package.
#' # See ?example_isolates.
#'
#' # calculate the resistance per group first
#' library(dplyr)
#' resistance_data <- example_isolates %>%
#' group_by(order = mo_order(mo), # group on anything, like order
#' genus = mo_genus(mo)) %>% # and genus as we do here
#' summarise_if(is.rsi, resistance) # then get resistance of all drugs
#'
#' # now conduct PCA for certain antimicrobial agents
#' pca_result <- resistance_data %>%
#' pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT)
#'
#' pca_result
#' summary(pca_result)
#' biplot(pca_result)
#' ggplot_pca(pca_result) # a new and convenient plot function
prcomp.data.frame <- function(x,
...,
retx = TRUE,
center = TRUE,
scale. = TRUE,
tol = NULL,
rank. = NULL) {
x <- pca_transform_x(x = x, ... = ...)
pca_data <- x[, which(sapply(x, function(x) is.numeric(x)))]
message(blue(paste0("NOTE: Columns selected for PCA: ", paste0(bold(colnames(pca_data)), collapse = "/"),
".\n Total observations available: ", nrow(pca_data), ".")))
stats:::prcomp.default(pca_data, retx = retx, center = center, scale. = scale., tol = tol, rank. = rank.)
}
#' @rdname pca
#' @export
pca <- function(x, ...) {
if (!is.data.frame(x)) {
stop("this function only takes a data.frame as input")
}
pca_model <- prcomp(x, ...)
x <- pca_transform_x(x = x, ... = ...)
attr(pca_model, "non_numeric_cols") <- x[, sapply(x, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE]
pca_model
}
#' @importFrom dplyr ungroup %>% filter_all all_vars
#' @importFrom rlang enquos eval_tidy
pca_transform_x <- function(x, ...) {
# unset data.table, tbl_df, etc.
# also removes groups made by dplyr::group_by
x <- as.data.frame(x, stringsAsFactors = FALSE)
x.bak <- x
user_exprs <- enquos(...)
if (length(user_exprs) > 0) {
new_list <- list(0)
for (i in seq_len(length(user_exprs))) {
new_list[[i]] <- tryCatch(eval_tidy(user_exprs[[i]], data = x),
error = function(e) stop(e$message, call. = FALSE))
if (length(new_list[[i]]) == 1) {
if (i == 1) {
# only for first item:
if (is.character(new_list[[i]]) & new_list[[i]] %in% colnames(x)) {
# this is to support: df %>% pca("mycol")
new_list[[i]] <- x[, new_list[[i]]]
}
} else {
# remove item - it's a parameter like `center`
new_list[[i]] <- NULL
}
}
}
x <- as.data.frame(new_list, stringsAsFactors = FALSE)
if (any(sapply(x, function(y) !is.numeric(y)))) {
warning("Be sure to first calculate the resistance (or susceptibility) of variables with antimicrobial test results, since PCA works with numeric variables only. Please see Examples in ?pca.")
}
# set column names
tryCatch(colnames(x) <- sapply(user_exprs, function(y) as_label(y)),
error = function(e) warning("column names could not be set"))
# keep only numeric columns
x <- x[, sapply(x, function(y) is.numeric(y))]
# bind the data set with the non-numeric columns
x <- cbind(x.bak[, sapply(x.bak, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE], x)
}
x %>%
ungroup() %>% # would otherwise select the grouping vars
filter_all(all_vars(!is.na(.)))
}

View File

@ -44,6 +44,9 @@ navbar:
- text: "Predict antimicrobial resistance"
icon: "fa-dice"
href: "articles/resistance_predict.html"
- text: "Conduct principal component analysis for AMR"
icon: "fa-compress"
href: "articles/PCA.html"
- text: "Determine multi-drug resistance (MDR)"
icon: "fa-skull-crossbones"
href: "articles/MDR.html"
@ -94,7 +97,6 @@ reference:
- "`guess_ab_col`"
- "`mo_source`"
- "`read.4D`"
- "`rsi_translation`"
- title: "Enhancing your data"
desc: >
Functions to add new data to your existing data, such as the determination
@ -117,28 +119,31 @@ reference:
Functions for conducting AMR analysis, like counting isolates, calculating
resistance or susceptibility, or make plots.
contents:
- "`proportion`"
- "`count`"
- "`availability`"
- "`bug_drug_combinations`"
- "`count`"
- "`resistance_predict`"
- "`pca`"
- "`filter_ab_class`"
- "`g.test`"
- "`ggplot_rsi`"
- "`ggplot_pca`"
- "`kurtosis`"
- "`portion`"
- "`resistance_predict`"
- "`skewness`"
- title: "Included data sets"
desc: >
Scientifically reliable references for microorganisms and
antibiotics, and example data sets to use for practise.
contents:
- "`microorganisms`"
- "`antibiotics`"
- "`antivirals`"
- "`example_isolates`"
- "`example_isolates_unclean`"
- "`rsi_translation`"
- "`microorganisms.codes`"
- "`microorganisms.old`"
- "`microorganisms`"
- "`WHONET`"
- title: "Background information"
desc: >

View File

@ -154,7 +154,11 @@ data %>%
origin = 'iso2c',
destination = 'country.name')) %>%
summarise(first = min(timestamp_server)) %>%
arrange(desc(first))
arrange(desc(first)) %>%
mutate(frame = case_when(first <= as.POSIXct("2019-06-30") ~ "Q1-Q2 2019",
first <= as.POSIXct("2019-12-31") ~ "Q3-Q4 2019",
TRUE ~ "Q1-Q2 2020")) %>%
View()
#
# p1 <- data %>%
# group_by(country) %>%

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -114,6 +114,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -114,6 +114,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>

View File

@ -39,7 +39,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -75,6 +75,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress-alt"></span>
Conduct Principal Component Analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
@ -179,7 +186,7 @@
<h1>How to apply EUCAST rules</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">23 February 2020</h4>
<h4 class="date">07 March 2020</h4>
<div class="hidden name"><code>EUCAST.Rmd</code></div>
@ -293,7 +300,7 @@
</tr>
</tbody>
</table>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a><span class="kw"><a href="../reference/eucast_rules.html">eucast_rules</a></span>(data, <span class="dt">info =</span> <span class="ot">FALSE</span>)</span></code></pre></div>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a><span class="kw"><a href="../reference/eucast_rules.html">eucast_rules</a></span>(data)</span></code></pre></div>
<table class="table">
<thead><tr class="header">
<th align="left">mo</th>

345
docs/articles/PCA.html Normal file
View File

@ -0,0 +1,345 @@
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>How to conduct principal component analysis (PCA) for AMR • AMR (for R)</title>
<!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
<link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png">
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png">
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png">
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png">
<!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js" integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=" crossorigin="anonymous"></script><!-- Bootstrap --><link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.3.7/flatly/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha256-U5ZEeKfGNOja007MMD3YBI0A3OSZOQbeG6z2f2Y0hu8=" crossorigin="anonymous"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.7.1/css/all.min.css" integrity="sha256-nAmazAk6vS34Xqo0BSrTb+abbtFlgsFK7NKSi6o7Y78=" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.7.1/css/v4-shims.min.css" integrity="sha256-6qHlizsOWFskGlwVOKuns+D1nB6ssZrHQrNj1wGplHc=" crossorigin="anonymous">
<!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js" integrity="sha256-FiZwavyI2V6+EXO1U+xzLG3IKldpiTFf3153ea9zikQ=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.4/headroom.min.js" integrity="sha256-DJFC1kqIhelURkuza0AvYal5RxMtpzLjFhsnVIeuk+U=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.4/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet">
<script src="../pkgdown.js"></script><link href="../extra.css" rel="stylesheet">
<script src="../extra.js"></script><meta property="og:title" content="How to conduct principal component analysis (PCA) for AMR">
<meta property="og:description" content="">
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png">
<meta name="twitter:card" content="summary">
<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<div class="container template-article">
<header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="../index.html">
<span class="fa fa-home"></span>
Home
</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
<span class="fa fa-question-circle"></span>
How to
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="../articles/AMR.html">
<span class="fa fa-directions"></span>
Conduct AMR analysis
</a>
</li>
<li>
<a href="../articles/resistance_predict.html">
<span class="fa fa-dice"></span>
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
Determine multi-drug resistance (MDR)
</a>
</li>
<li>
<a href="../articles/WHONET.html">
<span class="fa fa-globe-americas"></span>
Work with WHONET data
</a>
</li>
<li>
<a href="../articles/SPSS.html">
<span class="fa fa-file-upload"></span>
Import data from SPSS/SAS/Stata
</a>
</li>
<li>
<a href="../articles/EUCAST.html">
<span class="fa fa-exchange-alt"></span>
Apply EUCAST rules
</a>
</li>
<li>
<a href="../reference/mo_property.html">
<span class="fa fa-bug"></span>
Get properties of a microorganism
</a>
</li>
<li>
<a href="../reference/ab_property.html">
<span class="fa fa-capsules"></span>
Get properties of an antibiotic
</a>
</li>
<li>
<a href="../articles/benchmarks.html">
<span class="fa fa-shipping-fast"></span>
Other: benchmarks
</a>
</li>
</ul>
</li>
<li>
<a href="../reference/">
<span class="fa fa-book-open"></span>
Manual
</a>
</li>
<li>
<a href="../authors.html">
<span class="fa fa-users"></span>
Authors
</a>
</li>
<li>
<a href="../news/">
<span class="far fa far fa-newspaper"></span>
Changelog
</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a href="https://gitlab.com/msberends/AMR">
<span class="fab fa fab fa-gitlab"></span>
Source Code
</a>
</li>
<li>
<a href="../LICENSE-text.html">
<span class="fa fa-book"></span>
Licence
</a>
</li>
</ul>
</div>
<!--/.nav-collapse -->
</div>
<!--/.container -->
</div>
<!--/.navbar -->
</header><div class="row">
<div class="col-md-9 contents">
<div class="page-header toc-ignore">
<h1>How to conduct principal component analysis (PCA) for AMR</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">07 March 2020</h4>
<div class="hidden name"><code>PCA.Rmd</code></div>
</div>
<p><strong>NOTE: This page will be updated soon, as the pca() function is currently being developed.</strong></p>
<div id="introduction" class="section level1">
<h1 class="hasAnchor">
<a href="#introduction" class="anchor"></a>Introduction</h1>
</div>
<div id="transforming" class="section level1">
<h1 class="hasAnchor">
<a href="#transforming" class="anchor"></a>Transforming</h1>
<p>For PCA, we need to transform our AMR data first. This is what the <code>example_isolates</code> data set in this package looks like:</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span>(AMR)</span>
<span id="cb1-2"><a href="#cb1-2"></a><span class="kw"><a href="https://rdrr.io/r/base/library.html">library</a></span>(dplyr)</span>
<span id="cb1-3"><a href="#cb1-3"></a><span class="kw"><a href="https://dplyr.tidyverse.org/reference/reexports.html">glimpse</a></span>(example_isolates)</span>
<span id="cb1-4"><a href="#cb1-4"></a><span class="co"># Observations: 2,000</span></span>
<span id="cb1-5"><a href="#cb1-5"></a><span class="co"># Variables: 49</span></span>
<span id="cb1-6"><a href="#cb1-6"></a><span class="co"># $ date &lt;date&gt; 2002-01-02, 2002-01-03, 2002-01-07, 2002-01-07, 2002…</span></span>
<span id="cb1-7"><a href="#cb1-7"></a><span class="co"># $ hospital_id &lt;fct&gt; D, D, B, B, B, B, D, D, B, B, D, D, D, D, D, B, B, B,…</span></span>
<span id="cb1-8"><a href="#cb1-8"></a><span class="co"># $ ward_icu &lt;lgl&gt; FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, T…</span></span>
<span id="cb1-9"><a href="#cb1-9"></a><span class="co"># $ ward_clinical &lt;lgl&gt; TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, F…</span></span>
<span id="cb1-10"><a href="#cb1-10"></a><span class="co"># $ ward_outpatient &lt;lgl&gt; FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…</span></span>
<span id="cb1-11"><a href="#cb1-11"></a><span class="co"># $ age &lt;dbl&gt; 65, 65, 45, 45, 45, 45, 78, 78, 45, 79, 67, 67, 71, 7…</span></span>
<span id="cb1-12"><a href="#cb1-12"></a><span class="co"># $ gender &lt;chr&gt; "F", "F", "F", "F", "F", "F", "M", "M", "F", "F", "M"…</span></span>
<span id="cb1-13"><a href="#cb1-13"></a><span class="co"># $ patient_id &lt;chr&gt; "A77334", "A77334", "067927", "067927", "067927", "06…</span></span>
<span id="cb1-14"><a href="#cb1-14"></a><span class="co"># $ mo &lt;mo&gt; B_ESCHR_COLI, B_ESCHR_COLI, B_STPHY_EPDR, B_STPHY_EPDR…</span></span>
<span id="cb1-15"><a href="#cb1-15"></a><span class="co"># $ PEN &lt;rsi&gt; R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R,…</span></span>
<span id="cb1-16"><a href="#cb1-16"></a><span class="co"># $ OXA &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-17"><a href="#cb1-17"></a><span class="co"># $ FLC &lt;rsi&gt; NA, NA, R, R, R, R, S, S, R, S, S, S, NA, NA, NA, NA,…</span></span>
<span id="cb1-18"><a href="#cb1-18"></a><span class="co"># $ AMX &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-19"><a href="#cb1-19"></a><span class="co"># $ AMC &lt;rsi&gt; I, I, NA, NA, NA, NA, S, S, NA, NA, S, S, I, I, R, I,…</span></span>
<span id="cb1-20"><a href="#cb1-20"></a><span class="co"># $ AMP &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-21"><a href="#cb1-21"></a><span class="co"># $ TZP &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-22"><a href="#cb1-22"></a><span class="co"># $ CZO &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-23"><a href="#cb1-23"></a><span class="co"># $ FEP &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-24"><a href="#cb1-24"></a><span class="co"># $ CXM &lt;rsi&gt; I, I, R, R, R, R, S, S, R, S, S, S, S, S, NA, S, S, R…</span></span>
<span id="cb1-25"><a href="#cb1-25"></a><span class="co"># $ FOX &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-26"><a href="#cb1-26"></a><span class="co"># $ CTX &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, S, S,…</span></span>
<span id="cb1-27"><a href="#cb1-27"></a><span class="co"># $ CAZ &lt;rsi&gt; NA, NA, R, R, R, R, R, R, R, R, R, R, NA, NA, NA, S, …</span></span>
<span id="cb1-28"><a href="#cb1-28"></a><span class="co"># $ CRO &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, S, S,…</span></span>
<span id="cb1-29"><a href="#cb1-29"></a><span class="co"># $ GEN &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-30"><a href="#cb1-30"></a><span class="co"># $ TOB &lt;rsi&gt; NA, NA, NA, NA, NA, NA, S, S, NA, NA, NA, NA, S, S, N…</span></span>
<span id="cb1-31"><a href="#cb1-31"></a><span class="co"># $ AMK &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-32"><a href="#cb1-32"></a><span class="co"># $ KAN &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-33"><a href="#cb1-33"></a><span class="co"># $ TMP &lt;rsi&gt; R, R, S, S, R, R, R, R, S, S, NA, NA, S, S, S, S, S, …</span></span>
<span id="cb1-34"><a href="#cb1-34"></a><span class="co"># $ SXT &lt;rsi&gt; R, R, S, S, NA, NA, NA, NA, S, S, NA, NA, S, S, S, S,…</span></span>
<span id="cb1-35"><a href="#cb1-35"></a><span class="co"># $ NIT &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-36"><a href="#cb1-36"></a><span class="co"># $ FOS &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-37"><a href="#cb1-37"></a><span class="co"># $ LNZ &lt;rsi&gt; R, R, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, R, R, R…</span></span>
<span id="cb1-38"><a href="#cb1-38"></a><span class="co"># $ CIP &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, S, S, NA, NA, NA, NA,…</span></span>
<span id="cb1-39"><a href="#cb1-39"></a><span class="co"># $ MFX &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-40"><a href="#cb1-40"></a><span class="co"># $ VAN &lt;rsi&gt; R, R, S, S, S, S, S, S, S, S, NA, NA, R, R, R, R, R, …</span></span>
<span id="cb1-41"><a href="#cb1-41"></a><span class="co"># $ TEC &lt;rsi&gt; R, R, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, R, R, R…</span></span>
<span id="cb1-42"><a href="#cb1-42"></a><span class="co"># $ TCY &lt;rsi&gt; R, R, S, S, S, S, S, S, S, I, S, S, NA, NA, I, R, R, …</span></span>
<span id="cb1-43"><a href="#cb1-43"></a><span class="co"># $ TGC &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-44"><a href="#cb1-44"></a><span class="co"># $ DOX &lt;rsi&gt; NA, NA, S, S, S, S, S, S, S, NA, S, S, NA, NA, NA, R,…</span></span>
<span id="cb1-45"><a href="#cb1-45"></a><span class="co"># $ ERY &lt;rsi&gt; R, R, R, R, R, R, S, S, R, S, S, S, R, R, R, R, R, R,…</span></span>
<span id="cb1-46"><a href="#cb1-46"></a><span class="co"># $ CLI &lt;rsi&gt; NA, NA, NA, NA, NA, R, NA, NA, NA, NA, NA, NA, NA, NA…</span></span>
<span id="cb1-47"><a href="#cb1-47"></a><span class="co"># $ AZM &lt;rsi&gt; R, R, R, R, R, R, S, S, R, S, S, S, R, R, R, R, R, R,…</span></span>
<span id="cb1-48"><a href="#cb1-48"></a><span class="co"># $ IPM &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, S, S,…</span></span>
<span id="cb1-49"><a href="#cb1-49"></a><span class="co"># $ MEM &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-50"><a href="#cb1-50"></a><span class="co"># $ MTR &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-51"><a href="#cb1-51"></a><span class="co"># $ CHL &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-52"><a href="#cb1-52"></a><span class="co"># $ COL &lt;rsi&gt; NA, NA, R, R, R, R, R, R, R, R, R, R, NA, NA, NA, R, …</span></span>
<span id="cb1-53"><a href="#cb1-53"></a><span class="co"># $ MUP &lt;rsi&gt; NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…</span></span>
<span id="cb1-54"><a href="#cb1-54"></a><span class="co"># $ RIF &lt;rsi&gt; R, R, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, R, R, R…</span></span></code></pre></div>
<p>Now to transform this to a data set with only resistance percentages per taxonomic order and genus:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1"></a>resistance_data &lt;-<span class="st"> </span>example_isolates <span class="op">%&gt;%</span><span class="st"> </span></span>
<span id="cb2-2"><a href="#cb2-2"></a><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/group_by.html">group_by</a></span>(<span class="dt">order =</span> <span class="kw"><a href="../reference/mo_property.html">mo_order</a></span>(mo), <span class="co"># group on anything, like order</span></span>
<span id="cb2-3"><a href="#cb2-3"></a> <span class="dt">genus =</span> <span class="kw"><a href="../reference/mo_property.html">mo_genus</a></span>(mo)) <span class="op">%&gt;%</span><span class="st"> </span><span class="co"># and genus as we do here</span></span>
<span id="cb2-4"><a href="#cb2-4"></a><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/summarise_all.html">summarise_if</a></span>(is.rsi, resistance) <span class="op">%&gt;%</span><span class="st"> </span><span class="co"># then get resistance of all drugs</span></span>
<span id="cb2-5"><a href="#cb2-5"></a><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/select.html">select</a></span>(order, genus, AMC, CXM, CTX, </span>
<span id="cb2-6"><a href="#cb2-6"></a> CAZ, GEN, TOB, TMP, SXT) <span class="co"># and select only relevant columns</span></span>
<span id="cb2-7"><a href="#cb2-7"></a></span>
<span id="cb2-8"><a href="#cb2-8"></a><span class="kw"><a href="https://rdrr.io/r/utils/head.html">head</a></span>(resistance_data)</span>
<span id="cb2-9"><a href="#cb2-9"></a><span class="co"># # A tibble: 6 x 10</span></span>
<span id="cb2-10"><a href="#cb2-10"></a><span class="co"># # Groups: order [2]</span></span>
<span id="cb2-11"><a href="#cb2-11"></a><span class="co"># order genus AMC CXM CTX CAZ GEN TOB TMP SXT</span></span>
<span id="cb2-12"><a href="#cb2-12"></a><span class="co"># &lt;chr&gt; &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span>
<span id="cb2-13"><a href="#cb2-13"></a><span class="co"># 1 (unknown orde… Micrococcoides NA NA NA NA NA NA NA NA</span></span>
<span id="cb2-14"><a href="#cb2-14"></a><span class="co"># 2 Actinomycetal… Actinomyces NA NA NA NA NA NA NA NA</span></span>
<span id="cb2-15"><a href="#cb2-15"></a><span class="co"># 3 Actinomycetal… Corynebacterium NA NA NA NA NA NA NA NA</span></span>
<span id="cb2-16"><a href="#cb2-16"></a><span class="co"># 4 Actinomycetal… Dermabacter NA NA NA NA NA NA NA NA</span></span>
<span id="cb2-17"><a href="#cb2-17"></a><span class="co"># 5 Actinomycetal… Micrococcus NA NA NA NA NA NA NA NA</span></span>
<span id="cb2-18"><a href="#cb2-18"></a><span class="co"># 6 Actinomycetal… Propionibacter… NA NA NA NA NA NA NA NA</span></span></code></pre></div>
</div>
<div id="perform-principal-component-analysis" class="section level1">
<h1 class="hasAnchor">
<a href="#perform-principal-component-analysis" class="anchor"></a>Perform principal component analysis</h1>
<p>The new <code><a href="../reference/pca.html">pca()</a></code> function will automatically filter on rows that contain numeric values in all selected variables, so we now only need to do:</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1"></a>pca_result &lt;-<span class="st"> </span><span class="kw"><a href="../reference/pca.html">pca</a></span>(resistance_data)</span>
<span id="cb3-2"><a href="#cb3-2"></a><span class="co"># </span><span class="al">NOTE</span><span class="co">: Columns selected for PCA: AMC/CXM/CTX/CAZ/GEN/TOB/TMP/SXT.</span></span>
<span id="cb3-3"><a href="#cb3-3"></a><span class="co"># Total observations available: 7.</span></span></code></pre></div>
<p>The result can be reviewed with the good old <code><a href="https://rdrr.io/r/base/summary.html">summary()</a></code> function:</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a><span class="kw"><a href="https://rdrr.io/r/base/summary.html">summary</a></span>(pca_result)</span>
<span id="cb4-2"><a href="#cb4-2"></a><span class="co"># Importance of components:</span></span>
<span id="cb4-3"><a href="#cb4-3"></a><span class="co"># PC1 PC2 PC3 PC4 PC5 PC6 PC7</span></span>
<span id="cb4-4"><a href="#cb4-4"></a><span class="co"># Standard deviation 2.1580 1.6783 0.61282 0.33017 0.20150 0.03190 2.123e-16</span></span>
<span id="cb4-5"><a href="#cb4-5"></a><span class="co"># Proportion of Variance 0.5821 0.3521 0.04694 0.01363 0.00508 0.00013 0.000e+00</span></span>
<span id="cb4-6"><a href="#cb4-6"></a><span class="co"># Cumulative Proportion 0.5821 0.9342 0.98117 0.99480 0.99987 1.00000 1.000e+00</span></span></code></pre></div>
<p>Good news. The first two components explain a total of 93.4% of the variance (see the PC1 and PC2 values of the <em>Proportion of Variance</em>. We can create a so-called biplot with the base R <code><a href="https://rdrr.io/r/stats/biplot.html">biplot()</a></code> function, to see which antimicrobial resistance per drug explain the difference per microorganism.</p>
</div>
<div id="plotting-the-results" class="section level1">
<h1 class="hasAnchor">
<a href="#plotting-the-results" class="anchor"></a>Plotting the results</h1>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1"></a><span class="kw"><a href="https://rdrr.io/r/stats/biplot.html">biplot</a></span>(pca_result)</span></code></pre></div>
<p><img src="PCA_files/figure-html/unnamed-chunk-5-1.png" width="750"></p>
<p>But we cant see the explanation of the points. Perhaps this works better with the new <code><a href="../reference/ggplot_pca.html">ggplot_pca()</a></code> function, that automatically adds the right labels and even groups:</p>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1"></a><span class="kw"><a href="../reference/ggplot_pca.html">ggplot_pca</a></span>(pca_result)</span></code></pre></div>
<p><img src="PCA_files/figure-html/unnamed-chunk-6-1.png" width="750"></p>
<p>You can also print an ellipse per group, and edit the appearance:</p>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1"></a><span class="kw"><a href="../reference/ggplot_pca.html">ggplot_pca</a></span>(pca_result, <span class="dt">ellipse =</span> <span class="ot">TRUE</span>) <span class="op">+</span></span>
<span id="cb7-2"><a href="#cb7-2"></a><span class="st"> </span>ggplot2<span class="op">::</span><span class="kw"><a href="https://ggplot2.tidyverse.org/reference/labs.html">labs</a></span>(<span class="dt">title =</span> <span class="st">"An AMR/PCA biplot!"</span>)</span></code></pre></div>
<p><img src="PCA_files/figure-html/unnamed-chunk-7-1.png" width="750"></p>
</div>
</div>
<div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
<div id="tocnav">
<h2 class="hasAnchor">
<a href="#tocnav" class="anchor"></a>Contents</h2>
<ul class="nav nav-pills nav-stacked">
<li><a href="#introduction">Introduction</a></li>
<li><a href="#transforming">Transforming</a></li>
<li><a href="#perform-principal-component-analysis">Perform principal component analysis</a></li>
<li><a href="#plotting-the-results">Plotting the results</a></li>
</ul>
</div>
</div>
</div>
<footer><div class="copyright">
<p>Developed by <a href="https://www.rug.nl/staff/m.s.berends/">Matthijs S. Berends</a>, <a href="https://www.rug.nl/staff/c.f.luz/">Christian F. Luz</a>, <a href="https://www.rug.nl/staff/a.w.friedrich/">Alexander W. Friedrich</a>, <a href="https://www.rug.nl/staff/b.sinha/">Bhanu N. M. Sinha</a>, <a href="https://www.rug.nl/staff/c.j.albers/">Casper J. Albers</a>, <a href="https://www.rug.nl/staff/c.glasner/">Corinna Glasner</a>.</p>
</div>
<div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.4.1.</p>
</div>
</footer>
</div>
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 142 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

View File

@ -39,7 +39,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -75,6 +75,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
@ -179,7 +186,7 @@
<h1>Benchmarks</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">23 February 2020</h4>
<h4 class="date">07 March 2020</h4>
<div class="hidden name"><code>benchmarks.Rmd</code></div>
@ -213,21 +220,36 @@
<span id="cb2-16"><a href="#cb2-16"></a> <span class="dt">times =</span> <span class="dv">10</span>)</span>
<span id="cb2-17"><a href="#cb2-17"></a><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(S.aureus, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">2</span>)</span>
<span id="cb2-18"><a href="#cb2-18"></a><span class="co"># Unit: milliseconds</span></span>
<span id="cb2-19"><a href="#cb2-19"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb2-20"><a href="#cb2-20"></a><span class="co"># as.mo("sau") 8.2 8.5 12.0 9.1 9.5 34 10</span></span>
<span id="cb2-21"><a href="#cb2-21"></a><span class="co"># as.mo("stau") 36.0 37.0 50.0 40.0 65.0 82 10</span></span>
<span id="cb2-22"><a href="#cb2-22"></a><span class="co"># as.mo("STAU") 38.0 41.0 49.0 41.0 64.0 72 10</span></span>
<span id="cb2-23"><a href="#cb2-23"></a><span class="co"># as.mo("staaur") 8.4 8.9 9.2 9.2 9.3 10 10</span></span>
<span id="cb2-24"><a href="#cb2-24"></a><span class="co"># as.mo("STAAUR") 8.4 8.7 13.0 9.4 9.5 43 10</span></span>
<span id="cb2-25"><a href="#cb2-25"></a><span class="co"># as.mo("S. aureus") 14.0 15.0 30.0 37.0 40.0 44 10</span></span>
<span id="cb2-26"><a href="#cb2-26"></a><span class="co"># as.mo("S aureus") 14.0 15.0 43.0 15.0 37.0 250 10</span></span>
<span id="cb2-27"><a href="#cb2-27"></a><span class="co"># as.mo("Staphylococcus aureus") 4.7 5.0 7.4 5.1 5.5 28 10</span></span>
<span id="cb2-28"><a href="#cb2-28"></a><span class="co"># as.mo("Staphylococcus aureus (MRSA)") 660.0 690.0 720.0 730.0 730.0 790 10</span></span>
<span id="cb2-29"><a href="#cb2-29"></a><span class="co"># as.mo("Sthafilokkockus aaureuz") 350.0 380.0 420.0 400.0 440.0 520 10</span></span>
<span id="cb2-30"><a href="#cb2-30"></a><span class="co"># as.mo("MRSA") 8.1 8.5 12.0 9.1 9.3 38 10</span></span>
<span id="cb2-31"><a href="#cb2-31"></a><span class="co"># as.mo("VISA") 24.0 26.0 35.0 28.0 46.0 52 10</span></span>
<span id="cb2-32"><a href="#cb2-32"></a><span class="co"># as.mo("VRSA") 24.0 26.0 35.0 29.0 47.0 57 10</span></span>
<span id="cb2-33"><a href="#cb2-33"></a><span class="co"># as.mo(22242419) 130.0 130.0 150.0 140.0 150.0 240 10</span></span></code></pre></div>
<span id="cb2-19"><a href="#cb2-19"></a><span class="co"># expr min lq mean median uq max</span></span>
<span id="cb2-20"><a href="#cb2-20"></a><span class="co"># as.mo("sau") 8.0 8.2 9.1 8.4 8.5 16</span></span>
<span id="cb2-21"><a href="#cb2-21"></a><span class="co"># as.mo("stau") 37.0 40.0 51.0 52.0 60.0 76</span></span>
<span id="cb2-22"><a href="#cb2-22"></a><span class="co"># as.mo("STAU") 36.0 38.0 58.0 60.0 68.0 100</span></span>
<span id="cb2-23"><a href="#cb2-23"></a><span class="co"># as.mo("staaur") 8.2 8.4 9.5 8.6 8.9 14</span></span>
<span id="cb2-24"><a href="#cb2-24"></a><span class="co"># as.mo("STAAUR") 8.2 8.3 15.0 9.2 14.0 53</span></span>
<span id="cb2-25"><a href="#cb2-25"></a><span class="co"># as.mo("S. aureus") 13.0 21.0 64.0 21.0 45.0 260</span></span>
<span id="cb2-26"><a href="#cb2-26"></a><span class="co"># as.mo("S aureus") 13.0 14.0 33.0 24.0 44.0 76</span></span>
<span id="cb2-27"><a href="#cb2-27"></a><span class="co"># as.mo("Staphylococcus aureus") 4.7 4.8 9.9 6.8 7.9 42</span></span>
<span id="cb2-28"><a href="#cb2-28"></a><span class="co"># as.mo("Staphylococcus aureus (MRSA)") 620.0 640.0 770.0 700.0 860.0 1100</span></span>
<span id="cb2-29"><a href="#cb2-29"></a><span class="co"># as.mo("Sthafilokkockus aaureuz") 330.0 350.0 460.0 490.0 560.0 570</span></span>
<span id="cb2-30"><a href="#cb2-30"></a><span class="co"># as.mo("MRSA") 8.1 8.3 14.0 12.0 13.0 48</span></span>
<span id="cb2-31"><a href="#cb2-31"></a><span class="co"># as.mo("VISA") 24.0 25.0 34.0 26.0 38.0 59</span></span>
<span id="cb2-32"><a href="#cb2-32"></a><span class="co"># as.mo("VRSA") 23.0 24.0 37.0 27.0 39.0 78</span></span>
<span id="cb2-33"><a href="#cb2-33"></a><span class="co"># as.mo(22242419) 120.0 130.0 150.0 140.0 160.0 240</span></span>
<span id="cb2-34"><a href="#cb2-34"></a><span class="co"># neval</span></span>
<span id="cb2-35"><a href="#cb2-35"></a><span class="co"># 10</span></span>
<span id="cb2-36"><a href="#cb2-36"></a><span class="co"># 10</span></span>
<span id="cb2-37"><a href="#cb2-37"></a><span class="co"># 10</span></span>
<span id="cb2-38"><a href="#cb2-38"></a><span class="co"># 10</span></span>
<span id="cb2-39"><a href="#cb2-39"></a><span class="co"># 10</span></span>
<span id="cb2-40"><a href="#cb2-40"></a><span class="co"># 10</span></span>
<span id="cb2-41"><a href="#cb2-41"></a><span class="co"># 10</span></span>
<span id="cb2-42"><a href="#cb2-42"></a><span class="co"># 10</span></span>
<span id="cb2-43"><a href="#cb2-43"></a><span class="co"># 10</span></span>
<span id="cb2-44"><a href="#cb2-44"></a><span class="co"># 10</span></span>
<span id="cb2-45"><a href="#cb2-45"></a><span class="co"># 10</span></span>
<span id="cb2-46"><a href="#cb2-46"></a><span class="co"># 10</span></span>
<span id="cb2-47"><a href="#cb2-47"></a><span class="co"># 10</span></span>
<span id="cb2-48"><a href="#cb2-48"></a><span class="co"># 10</span></span></code></pre></div>
<p><img src="benchmarks_files/figure-html/unnamed-chunk-4-1.png" width="562.5"></p>
<p>In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second.</p>
<p>To achieve this speed, the <code>as.mo</code> function also takes into account the prevalence of human pathogenic microorganisms. The downside of this is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of <em>Methanosarcina semesiae</em> (<code>B_MTHNSR_SEMS</code>), a bug probably never found before in humans:</p>
@ -239,19 +261,19 @@
<span id="cb3-6"><a href="#cb3-6"></a> <span class="dt">times =</span> <span class="dv">10</span>)</span>
<span id="cb3-7"><a href="#cb3-7"></a><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(M.semesiae, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">4</span>)</span>
<span id="cb3-8"><a href="#cb3-8"></a><span class="co"># Unit: milliseconds</span></span>
<span id="cb3-9"><a href="#cb3-9"></a><span class="co"># expr min lq mean median uq</span></span>
<span id="cb3-10"><a href="#cb3-10"></a><span class="co"># as.mo("metsem") 1497.000 1536.000 1604.00 1575.00 1693.000</span></span>
<span id="cb3-11"><a href="#cb3-11"></a><span class="co"># as.mo("METSEM") 1472.000 1510.000 1563.00 1563.00 1615.000</span></span>
<span id="cb3-12"><a href="#cb3-12"></a><span class="co"># as.mo("M. semesiae") 14.520 14.760 22.18 15.39 36.430</span></span>
<span id="cb3-13"><a href="#cb3-13"></a><span class="co"># as.mo("M. semesiae") 14.310 14.630 19.94 15.18 16.080</span></span>
<span id="cb3-14"><a href="#cb3-14"></a><span class="co"># as.mo("Methanosarcina semesiae") 5.376 5.482 8.41 5.81 5.911</span></span>
<span id="cb3-15"><a href="#cb3-15"></a><span class="co"># max neval</span></span>
<span id="cb3-16"><a href="#cb3-16"></a><span class="co"># 1709.00 10</span></span>
<span id="cb3-17"><a href="#cb3-17"></a><span class="co"># 1641.00 10</span></span>
<span id="cb3-18"><a href="#cb3-18"></a><span class="co"># 40.57 10</span></span>
<span id="cb3-19"><a href="#cb3-19"></a><span class="co"># 40.27 10</span></span>
<span id="cb3-20"><a href="#cb3-20"></a><span class="co"># 32.27 10</span></span></code></pre></div>
<p>That takes 5.7 times as much time on average. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like <em>Methanosarcina semesiae</em>) are always very fast and only take some thousands of seconds to coerce - they are the most probable input from most data sets.</p>
<span id="cb3-9"><a href="#cb3-9"></a><span class="co"># expr min lq mean median uq</span></span>
<span id="cb3-10"><a href="#cb3-10"></a><span class="co"># as.mo("metsem") 1349.000 1352.000 1597.000 1411.000 1983.000</span></span>
<span id="cb3-11"><a href="#cb3-11"></a><span class="co"># as.mo("METSEM") 1316.000 2146.000 2069.000 2226.000 2245.000</span></span>
<span id="cb3-12"><a href="#cb3-12"></a><span class="co"># as.mo("M. semesiae") 13.330 14.110 32.960 21.840 53.090</span></span>
<span id="cb3-13"><a href="#cb3-13"></a><span class="co"># as.mo("M. semesiae") 13.730 20.960 29.720 21.430 40.000</span></span>
<span id="cb3-14"><a href="#cb3-14"></a><span class="co"># as.mo("Methanosarcina semesiae") 4.802 5.171 6.667 6.551 8.036</span></span>
<span id="cb3-15"><a href="#cb3-15"></a><span class="co"># max neval</span></span>
<span id="cb3-16"><a href="#cb3-16"></a><span class="co"># 2184.000 10</span></span>
<span id="cb3-17"><a href="#cb3-17"></a><span class="co"># 2337.000 10</span></span>
<span id="cb3-18"><a href="#cb3-18"></a><span class="co"># 62.780 10</span></span>
<span id="cb3-19"><a href="#cb3-19"></a><span class="co"># 64.510 10</span></span>
<span id="cb3-20"><a href="#cb3-20"></a><span class="co"># 8.735 10</span></span></code></pre></div>
<p>That takes 6.1 times as much time on average. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like <em>Methanosarcina semesiae</em>) are always very fast and only take some thousands of seconds to coerce - they are the most probable input from most data sets.</p>
<p>In the figure below, we compare <em>Escherichia coli</em> (which is very common) with <em>Prevotella brevis</em> (which is moderately common) and with <em>Methanosarcina semesiae</em> (which is uncommon):</p>
<p><img src="benchmarks_files/figure-html/unnamed-chunk-6-1.png" width="900"></p>
<p>Uncommon microorganisms take a lot more time than common microorganisms. To relieve this pitfall and further improve performance, two important calculations take almost no time at all: <strong>repetitive results</strong> and <strong>already precalculated results</strong>.</p>
@ -285,8 +307,8 @@
<span id="cb4-24"><a href="#cb4-24"></a><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</span>
<span id="cb4-25"><a href="#cb4-25"></a><span class="co"># Unit: milliseconds</span></span>
<span id="cb4-26"><a href="#cb4-26"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb4-27"><a href="#cb4-27"></a><span class="co"># mo_name(x) 568 614 651 634 657 1170 100</span></span></code></pre></div>
<p>So transforming 500,000 values (!!) of 50 unique values only takes 0.63 seconds (634 ms). You only lose time on your unique input values.</p>
<span id="cb4-27"><a href="#cb4-27"></a><span class="co"># mo_name(x) 564 605 673 630 657 1100 100</span></span></code></pre></div>
<p>So transforming 500,000 values (!!) of 50 unique values only takes 0.63 seconds (630 ms). You only lose time on your unique input values.</p>
</div>
<div id="precalculated-results" class="section level3">
<h3 class="hasAnchor">
@ -298,10 +320,10 @@
<span id="cb5-4"><a href="#cb5-4"></a> <span class="dt">times =</span> <span class="dv">10</span>)</span>
<span id="cb5-5"><a href="#cb5-5"></a><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</span>
<span id="cb5-6"><a href="#cb5-6"></a><span class="co"># Unit: milliseconds</span></span>
<span id="cb5-7"><a href="#cb5-7"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb5-8"><a href="#cb5-8"></a><span class="co"># A 6.630 6.790 7.330 7.230 7.690 8.39 10</span></span>
<span id="cb5-9"><a href="#cb5-9"></a><span class="co"># B 13.900 14.300 19.000 14.700 17.000 53.00 10</span></span>
<span id="cb5-10"><a href="#cb5-10"></a><span class="co"># C 0.847 0.875 0.947 0.901 0.977 1.16 10</span></span></code></pre></div>
<span id="cb5-7"><a href="#cb5-7"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb5-8"><a href="#cb5-8"></a><span class="co"># A 6.58 6.590 7.340 6.630 6.780 13.00 10</span></span>
<span id="cb5-9"><a href="#cb5-9"></a><span class="co"># B 13.50 13.700 18.700 13.900 14.600 60.80 10</span></span>
<span id="cb5-10"><a href="#cb5-10"></a><span class="co"># C 0.72 0.863 0.917 0.898 0.935 1.26 10</span></span></code></pre></div>
<p>So going from <code><a href="../reference/mo_property.html">mo_name("Staphylococcus aureus")</a></code> to <code>"Staphylococcus aureus"</code> takes 0.0009 seconds - it doesnt even start calculating <em>if the result would be the same as the expected resulting value</em>. That goes for all helper functions:</p>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1"></a>run_it &lt;-<span class="st"> </span><span class="kw"><a href="https://rdrr.io/pkg/microbenchmark/man/microbenchmark.html">microbenchmark</a></span>(<span class="dt">A =</span> <span class="kw"><a href="../reference/mo_property.html">mo_species</a></span>(<span class="st">"aureus"</span>),</span>
<span id="cb6-2"><a href="#cb6-2"></a> <span class="dt">B =</span> <span class="kw"><a href="../reference/mo_property.html">mo_genus</a></span>(<span class="st">"Staphylococcus"</span>),</span>
@ -315,14 +337,14 @@
<span id="cb6-10"><a href="#cb6-10"></a><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</span>
<span id="cb6-11"><a href="#cb6-11"></a><span class="co"># Unit: milliseconds</span></span>
<span id="cb6-12"><a href="#cb6-12"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb6-13"><a href="#cb6-13"></a><span class="co"># A 0.476 0.485 0.501 0.498 0.504 0.554 10</span></span>
<span id="cb6-14"><a href="#cb6-14"></a><span class="co"># B 0.515 0.521 0.548 0.545 0.553 0.614 10</span></span>
<span id="cb6-15"><a href="#cb6-15"></a><span class="co"># C 0.710 0.791 0.870 0.842 0.855 1.330 10</span></span>
<span id="cb6-16"><a href="#cb6-16"></a><span class="co"># D 0.491 0.524 0.539 0.535 0.546 0.613 10</span></span>
<span id="cb6-17"><a href="#cb6-17"></a><span class="co"># E 0.488 0.500 0.583 0.541 0.635 0.830 10</span></span>
<span id="cb6-18"><a href="#cb6-18"></a><span class="co"># F 0.477 0.488 0.509 0.495 0.519 0.569 10</span></span>
<span id="cb6-19"><a href="#cb6-19"></a><span class="co"># G 0.473 0.490 0.507 0.498 0.534 0.547 10</span></span>
<span id="cb6-20"><a href="#cb6-20"></a><span class="co"># H 0.477 0.486 0.500 0.494 0.509 0.561 10</span></span></code></pre></div>
<span id="cb6-13"><a href="#cb6-13"></a><span class="co"># A 0.499 0.511 0.516 0.517 0.522 0.544 10</span></span>
<span id="cb6-14"><a href="#cb6-14"></a><span class="co"># B 0.532 0.539 0.550 0.542 0.563 0.592 10</span></span>
<span id="cb6-15"><a href="#cb6-15"></a><span class="co"># C 0.718 0.787 0.832 0.843 0.889 0.904 10</span></span>
<span id="cb6-16"><a href="#cb6-16"></a><span class="co"># D 0.538 0.548 0.566 0.567 0.571 0.607 10</span></span>
<span id="cb6-17"><a href="#cb6-17"></a><span class="co"># E 0.503 0.509 0.515 0.513 0.516 0.549 10</span></span>
<span id="cb6-18"><a href="#cb6-18"></a><span class="co"># F 0.502 0.504 0.514 0.511 0.519 0.539 10</span></span>
<span id="cb6-19"><a href="#cb6-19"></a><span class="co"># G 0.493 0.513 0.538 0.514 0.536 0.684 10</span></span>
<span id="cb6-20"><a href="#cb6-20"></a><span class="co"># H 0.499 0.501 0.509 0.505 0.516 0.531 10</span></span></code></pre></div>
<p>Of course, when running <code><a href="../reference/mo_property.html">mo_phylum("Firmicutes")</a></code> the function has zero knowledge about the actual microorganism, namely <em>S. aureus</em>. But since the result would be <code>"Firmicutes"</code> anyway, there is no point in calculating the result. And because this package knows all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.</p>
</div>
<div id="results-in-other-languages" class="section level3">
@ -349,13 +371,13 @@
<span id="cb7-18"><a href="#cb7-18"></a><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">4</span>)</span>
<span id="cb7-19"><a href="#cb7-19"></a><span class="co"># Unit: milliseconds</span></span>
<span id="cb7-20"><a href="#cb7-20"></a><span class="co"># expr min lq mean median uq max neval</span></span>
<span id="cb7-21"><a href="#cb7-21"></a><span class="co"># en 24.69 25.88 32.93 26.49 28.24 165.60 100</span></span>
<span id="cb7-22"><a href="#cb7-22"></a><span class="co"># de 26.07 27.22 32.82 28.08 29.90 75.78 100</span></span>
<span id="cb7-23"><a href="#cb7-23"></a><span class="co"># nl 32.01 33.49 39.63 34.78 36.68 78.48 100</span></span>
<span id="cb7-24"><a href="#cb7-24"></a><span class="co"># es 25.66 27.31 32.25 28.04 29.53 68.15 100</span></span>
<span id="cb7-25"><a href="#cb7-25"></a><span class="co"># it 25.69 27.13 32.40 28.22 30.01 62.64 100</span></span>
<span id="cb7-26"><a href="#cb7-26"></a><span class="co"># fr 25.77 27.22 33.72 28.03 30.46 71.35 100</span></span>
<span id="cb7-27"><a href="#cb7-27"></a><span class="co"># pt 25.65 27.12 31.94 27.78 29.08 60.06 100</span></span></code></pre></div>
<span id="cb7-21"><a href="#cb7-21"></a><span class="co"># en 23.72 25.30 30.59 25.77 26.99 76.03 100</span></span>
<span id="cb7-22"><a href="#cb7-22"></a><span class="co"># de 24.88 26.81 31.11 27.47 28.93 69.86 100</span></span>
<span id="cb7-23"><a href="#cb7-23"></a><span class="co"># nl 30.65 32.77 38.07 33.70 35.23 74.79 100</span></span>
<span id="cb7-24"><a href="#cb7-24"></a><span class="co"># es 24.89 26.33 32.10 27.13 28.87 68.79 100</span></span>
<span id="cb7-25"><a href="#cb7-25"></a><span class="co"># it 24.78 26.72 33.51 27.53 28.91 166.60 100</span></span>
<span id="cb7-26"><a href="#cb7-26"></a><span class="co"># fr 24.84 26.58 31.50 27.13 28.29 67.38 100</span></span>
<span id="cb7-27"><a href="#cb7-27"></a><span class="co"># pt 24.88 26.58 32.38 27.50 29.20 79.30 100</span></span></code></pre></div>
<p>Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.</p>
</div>
</div>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 94 KiB

After

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 57 KiB

After

Width:  |  Height:  |  Size: 56 KiB

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -114,6 +114,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
@ -226,6 +233,7 @@
<li><a href="AMR.html">How to conduct AMR analysis</a></li>
<li><a href="EUCAST.html">How to apply EUCAST rules</a></li>
<li><a href="MDR.html">How to determine multi-drug resistance (MDR)</a></li>
<li><a href="PCA.html">How to conduct principal component analysis (PCA) for AMR</a></li>
<li><a href="SPSS.html">How to import data from SPSS / SAS / Stata</a></li>
<li><a href="WHONET.html">How to work with WHONET data</a></li>
<li><a href="benchmarks.html">Benchmarks</a></li>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -114,6 +114,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>

View File

@ -43,7 +43,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -79,6 +79,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -114,6 +114,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
@ -219,6 +226,19 @@
</div>
<div id="amr-1019000" class="section level1">
<h1 class="page-header">
<a href="#amr-1019000" class="anchor"></a>AMR 1.0.1.9000<small> Unreleased </small>
</h1>
<div id="new" class="section level3">
<h3 class="hasAnchor">
<a href="#new" class="anchor"></a>New</h3>
<ul>
<li>Support for easy principal component analysis for AMR, using the new <code><a href="../reference/pca.html">pca()</a></code> function</li>
<li>Plotting biplots for principal component analysis using the new <code><a href="../reference/ggplot_pca.html">ggplot_pca()</a></code> function</li>
</ul>
</div>
</div>
<div id="amr-101" class="section level1">
<h1 class="page-header">
<a href="#amr-101" class="anchor"></a>AMR 1.0.1<small> 2020-02-23 </small>
@ -247,9 +267,9 @@
<a href="#amr-100" class="anchor"></a>AMR 1.0.0<small> 2020-02-17 </small>
</h1>
<p>This software is now out of beta and considered stable. Nonetheless, this package will be developed continually.</p>
<div id="new" class="section level3">
<div id="new-1" class="section level3">
<h3 class="hasAnchor">
<a href="#new" class="anchor"></a>New</h3>
<a href="#new-1" class="anchor"></a>New</h3>
<ul>
<li>Support for the newest <a href="http://www.eucast.org/clinical_breakpoints/">EUCAST Clinical Breakpoint Tables v.10.0</a>, valid from 1 January 2020. This affects translation of MIC and disk zones using <code><a href="../reference/as.rsi.html">as.rsi()</a></code> and inferred resistance and susceptibility using <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code>.</li>
<li>The repository of this package now contains a clean version of the EUCAST and CLSI guidelines from 2011-2020 to translate MIC and disk diffusion values to R/SI: <a href="https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt" class="uri">https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt</a>. This <strong>allows for machine reading these guidelines</strong>, which is almost impossible with the Excel and PDF files distributed by EUCAST and CLSI. This file used to process the EUCAST Clinical Breakpoints Excel file <a href="https://gitlab.com/msberends/AMR/blob/master/data-raw/read_EUCAST.R">can be found here</a>.</li>
@ -337,9 +357,9 @@
</li>
</ul>
</div>
<div id="new-1" class="section level3">
<div id="new-2" class="section level3">
<h3 class="hasAnchor">
<a href="#new-1" class="anchor"></a>New</h3>
<a href="#new-2" class="anchor"></a>New</h3>
<ul>
<li>
<p>Functions <code><a href="../reference/proportion.html">susceptibility()</a></code> and <code><a href="../reference/proportion.html">resistance()</a></code> as aliases of <code><a href="../reference/proportion.html">proportion_SI()</a></code> and <code><a href="../reference/proportion.html">proportion_R()</a></code>, respectively. These functions were added to make it more clear that “I” should be considered susceptible and not resistant.</p>
@ -449,9 +469,9 @@
<li><p>Renamed data set <code>septic_patients</code> to <code>example_isolates</code></p></li>
</ul>
</div>
<div id="new-2" class="section level3">
<div id="new-3" class="section level3">
<h3 class="hasAnchor">
<a href="#new-2" class="anchor"></a>New</h3>
<a href="#new-3" class="anchor"></a>New</h3>
<ul>
<li>
<p>Function <code><a href="../reference/bug_drug_combinations.html">bug_drug_combinations()</a></code> to quickly get a <code>data.frame</code> with the results of all bug-drug combinations in a data set. The column containing microorganism codes is guessed automatically and its input is transformed with <code><a href="../reference/mo_property.html">mo_shortname()</a></code> at default:</p>
@ -575,9 +595,9 @@
<h1 class="page-header">
<a href="#amr-071" class="anchor"></a>AMR 0.7.1<small> 2019-06-23 </small>
</h1>
<div id="new-3" class="section level4">
<div id="new-4" class="section level4">
<h4 class="hasAnchor">
<a href="#new-3" class="anchor"></a>New</h4>
<a href="#new-4" class="anchor"></a>New</h4>
<ul>
<li>
<p>Function <code><a href="../reference/proportion.html">rsi_df()</a></code> to transform a <code>data.frame</code> to a data set containing only the microbial interpretation (S, I, R), the antibiotic, the percentage of S/I/R and the number of available isolates. This is a convenient combination of the existing functions <code><a href="../reference/count.html">count_df()</a></code> and <code><a href="../reference/AMR-deprecated.html">portion_df()</a></code> to immediately show resistance percentages and number of available isolates:</p>
@ -656,9 +676,9 @@
<h1 class="page-header">
<a href="#amr-070" class="anchor"></a>AMR 0.7.0<small> 2019-06-03 </small>
</h1>
<div id="new-4" class="section level4">
<div id="new-5" class="section level4">
<h4 class="hasAnchor">
<a href="#new-4" class="anchor"></a>New</h4>
<a href="#new-5" class="anchor"></a>New</h4>
<ul>
<li>Support for translation of disk diffusion and MIC values to RSI values (i.e. antimicrobial interpretations). Supported guidelines are EUCAST (2011 to 2019) and CLSI (2011 to 2019). Use <code><a href="../reference/as.rsi.html">as.rsi()</a></code> on an MIC value (created with <code><a href="../reference/as.mic.html">as.mic()</a></code>), a disk diffusion value (created with the new <code><a href="../reference/as.disk.html">as.disk()</a></code>) or on a complete date set containing columns with MIC or disk diffusion values.</li>
<li>Function <code><a href="../reference/mo_property.html">mo_name()</a></code> as alias of <code><a href="../reference/mo_property.html">mo_fullname()</a></code>
@ -773,9 +793,9 @@
<li>Contains the complete manual of this package and all of its functions with an explanation of their parameters</li>
<li>Contains a comprehensive tutorial about how to conduct antimicrobial resistance analysis, import data from WHONET or SPSS and many more.</li>
</ul>
<div id="new-5" class="section level4">
<div id="new-6" class="section level4">
<h4 class="hasAnchor">
<a href="#new-5" class="anchor"></a>New</h4>
<a href="#new-6" class="anchor"></a>New</h4>
<ul>
<li><p><strong>BREAKING</strong>: removed deprecated functions, parameters and references to bactid. Use <code><a href="../reference/as.mo.html">as.mo()</a></code> to identify an MO code.</p></li>
<li>
@ -999,9 +1019,9 @@
<h1 class="page-header">
<a href="#amr-050" class="anchor"></a>AMR 0.5.0<small> 2018-11-30 </small>
</h1>
<div id="new-6" class="section level4">
<div id="new-7" class="section level4">
<h4 class="hasAnchor">
<a href="#new-6" class="anchor"></a>New</h4>
<a href="#new-7" class="anchor"></a>New</h4>
<ul>
<li>Repository moved to GitLab: <a href="https://gitlab.com/msberends/AMR" class="uri">https://gitlab.com/msberends/AMR</a>
</li>
@ -1118,9 +1138,9 @@
<h1 class="page-header">
<a href="#amr-040" class="anchor"></a>AMR 0.4.0<small> 2018-10-01 </small>
</h1>
<div id="new-7" class="section level4">
<div id="new-8" class="section level4">
<h4 class="hasAnchor">
<a href="#new-7" class="anchor"></a>New</h4>
<a href="#new-8" class="anchor"></a>New</h4>
<ul>
<li><p>The data set <code>microorganisms</code> now contains <strong>all microbial taxonomic data from ITIS</strong> (kingdoms Bacteria, Fungi and Protozoa), the Integrated Taxonomy Information System, available via <a href="https://itis.gov" class="uri">https://itis.gov</a>. The data set now contains more than 18,000 microorganisms with all known bacteria, fungi and protozoa according ITIS with genus, species, subspecies, family, order, class, phylum and subkingdom. The new data set <code>microorganisms.old</code> contains all previously known taxonomic names from those kingdoms.</p></li>
<li>
@ -1251,9 +1271,9 @@
<h1 class="page-header">
<a href="#amr-030" class="anchor"></a>AMR 0.3.0<small> 2018-08-14 </small>
</h1>
<div id="new-8" class="section level4">
<div id="new-9" class="section level4">
<h4 class="hasAnchor">
<a href="#new-8" class="anchor"></a>New</h4>
<a href="#new-9" class="anchor"></a>New</h4>
<ul>
<li>
<strong>BREAKING</strong>: <code>rsi_df</code> was removed in favour of new functions <code>portion_R</code>, <code>portion_IR</code>, <code>portion_I</code>, <code>portion_SI</code> and <code>portion_S</code> to selectively calculate resistance or susceptibility. These functions are 20 to 30 times faster than the old <code>rsi</code> function. The old function still works, but is deprecated.
@ -1388,9 +1408,9 @@
<h1 class="page-header">
<a href="#amr-020" class="anchor"></a>AMR 0.2.0<small> 2018-05-03 </small>
</h1>
<div id="new-9" class="section level4">
<div id="new-10" class="section level4">
<h4 class="hasAnchor">
<a href="#new-9" class="anchor"></a>New</h4>
<a href="#new-10" class="anchor"></a>New</h4>
<ul>
<li>Full support for Windows, Linux and macOS</li>
<li>Full support for old R versions, only R-3.0.0 (April 2013) or later is needed (needed packages may have other dependencies)</li>
@ -1469,6 +1489,7 @@
<div id="tocnav">
<h2>Contents</h2>
<ul class="nav nav-pills nav-stacked">
<li><a href="#amr-1019000">1.0.1.9000</a></li>
<li><a href="#amr-101">1.0.1</a></li>
<li><a href="#amr-100">1.0.0</a></li>
<li><a href="#amr-090">0.9.0</a></li>

View File

@ -5,6 +5,7 @@ articles:
AMR: AMR.html
EUCAST: EUCAST.html
MDR: MDR.html
PCA: PCA.html
SPSS: SPSS.html
WHONET: WHONET.html
benchmarks: benchmarks.html

View File

@ -79,7 +79,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -115,6 +115,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress-alt"></span>
Conduct Principal Component Analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
@ -250,7 +257,7 @@
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
<p>An integer (no decimals) if <code>exact = FALSE</code>, a double (with decimals) otherwise</p>
<p>An <a href='https://rdrr.io/r/base/integer.html'>integer</a> (no decimals) if <code>exact = FALSE</code>, a <a href='https://rdrr.io/r/base/double.html'>double</a> (with decimals) otherwise</p>
<h2 class="hasAnchor" id="stable-lifecycle"><a class="anchor" href="#stable-lifecycle"></a>Stable lifecycle</h2>

View File

@ -79,7 +79,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -115,6 +115,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress-alt"></span>
Conduct Principal Component Analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
@ -240,23 +247,23 @@
</tr>
<tr>
<th>na.rm</th>
<td><p>a logical to indicate whether missing values should be removed</p></td>
<td><p>a <a href='https://rdrr.io/r/base/logical.html'>logical</a> to indicate whether missing values should be removed</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
<p>Ordered <code><a href='https://rdrr.io/r/base/factor.html'>factor</a></code></p>
<p>Ordered <a href='https://rdrr.io/r/base/factor.html'>factor</a></p>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>To split ages, the input can be:</p><ul>
<p>To split ages, the input for the <code>split_at</code> parameter can be:</p><ul>
<li><p>A numeric vector. A vector of e.g. <code><a href='https://rdrr.io/r/base/c.html'>c(10, 20)</a></code> will split on 0-9, 10-19 and 20+. A value of only <code>50</code> will split on 0-49 and 50+.
The default is to split on young children (0-11), youth (12-24), young adults (25-54), middle-aged adults (55-74) and elderly (75+).</p></li>
<li><p>A character:</p><ul>
<li><p><code>"children"</code> or <code>"kids"</code>, equivalent of: <code><a href='https://rdrr.io/r/base/c.html'>c(0, 1, 2, 4, 6, 13, 18)</a></code>. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.</p></li>
<li><p><code>"elderly"</code> or <code>"seniors"</code>, equivalent of: <code><a href='https://rdrr.io/r/base/c.html'>c(65, 75, 85)</a></code>. This will split on 0-64, 65-74, 75-84, 85+.</p></li>
<li><p><code>"fives"</code>, equivalent of: <code>1:20 * 5</code>. This will split on 0-4, 5-9, 10-14, ..., 90-94, 95-99, 100+.</p></li>
<li><p><code>"tens"</code>, equivalent of: <code>1:10 * 10</code>. This will split on 0-9, 10-19, 20-29, ... 80-89, 90-99, 100+.</p></li>
<li><p><code>"tens"</code>, equivalent of: <code>1:10 * 10</code>. This will split on 0-9, 10-19, 20-29, ..., 80-89, 90-99, 100+.</p></li>
</ul></li>
</ul>

View File

@ -0,0 +1,415 @@
<!-- Generated by pkgdown: do not edit by hand -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>PCA biplot with <code>ggplot2</code> — ggplot_pca • AMR (for R)</title>
<!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
<link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png" />
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png" />
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png" />
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png" />
<!-- jquery -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js" integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=" crossorigin="anonymous"></script>
<!-- Bootstrap -->
<link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.3.7/flatly/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha256-U5ZEeKfGNOja007MMD3YBI0A3OSZOQbeG6z2f2Y0hu8=" crossorigin="anonymous"></script>
<!-- Font Awesome icons -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.7.1/css/all.min.css" integrity="sha256-nAmazAk6vS34Xqo0BSrTb+abbtFlgsFK7NKSi6o7Y78=" crossorigin="anonymous" />
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.7.1/css/v4-shims.min.css" integrity="sha256-6qHlizsOWFskGlwVOKuns+D1nB6ssZrHQrNj1wGplHc=" crossorigin="anonymous" />
<!-- clipboard.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js" integrity="sha256-FiZwavyI2V6+EXO1U+xzLG3IKldpiTFf3153ea9zikQ=" crossorigin="anonymous"></script>
<!-- headroom.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.4/headroom.min.js" integrity="sha256-DJFC1kqIhelURkuza0AvYal5RxMtpzLjFhsnVIeuk+U=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.4/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script>
<!-- pkgdown -->
<link href="../pkgdown.css" rel="stylesheet">
<script src="../pkgdown.js"></script>
<link href="../extra.css" rel="stylesheet">
<script src="../extra.js"></script>
<meta property="og:title" content="PCA biplot with <code>ggplot2</code> — ggplot_pca" />
<meta property="og:description" content="This function is to produce a ggplot2 variant of a so-called biplot for PCA (principal component analysis), but is more flexible and more appealing than the base R biplot() function." />
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png" />
<meta name="twitter:card" content="summary" />
<!-- mathjax -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script>
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<div class="container template-reference-topic">
<header>
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="../index.html">
<span class="fa fa-home"></span>
Home
</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
<span class="fa fa-question-circle"></span>
How to
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="../articles/AMR.html">
<span class="fa fa-directions"></span>
Conduct AMR analysis
</a>
</li>
<li>
<a href="../articles/resistance_predict.html">
<span class="fa fa-dice"></span>
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress-alt"></span>
Conduct Principal Component Analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
Determine multi-drug resistance (MDR)
</a>
</li>
<li>
<a href="../articles/WHONET.html">
<span class="fa fa-globe-americas"></span>
Work with WHONET data
</a>
</li>
<li>
<a href="../articles/SPSS.html">
<span class="fa fa-file-upload"></span>
Import data from SPSS/SAS/Stata
</a>
</li>
<li>
<a href="../articles/EUCAST.html">
<span class="fa fa-exchange-alt"></span>
Apply EUCAST rules
</a>
</li>
<li>
<a href="../reference/mo_property.html">
<span class="fa fa-bug"></span>
Get properties of a microorganism
</a>
</li>
<li>
<a href="../reference/ab_property.html">
<span class="fa fa-capsules"></span>
Get properties of an antibiotic
</a>
</li>
<li>
<a href="../articles/benchmarks.html">
<span class="fa fa-shipping-fast"></span>
Other: benchmarks
</a>
</li>
</ul>
</li>
<li>
<a href="../reference/">
<span class="fa fa-book-open"></span>
Manual
</a>
</li>
<li>
<a href="../authors.html">
<span class="fa fa-users"></span>
Authors
</a>
</li>
<li>
<a href="../news/">
<span class="far fa far fa-newspaper"></span>
Changelog
</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a href="https://gitlab.com/msberends/AMR">
<span class="fab fa fab fa-gitlab"></span>
Source Code
</a>
</li>
<li>
<a href="../LICENSE-text.html">
<span class="fa fa-book"></span>
Licence
</a>
</li>
</ul>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
</header>
<div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>PCA biplot with <code>ggplot2</code></h1>
<div class="hidden name"><code>ggplot_pca.Rd</code></div>
</div>
<div class="ref-description">
<p>This function is to produce a <code>ggplot2</code> variant of a so-called <a href='https://en.wikipedia.org/wiki/Biplot'>biplot</a> for PCA (principal component analysis), but is more flexible and more appealing than the base <span style="R">R</span> <code><a href='https://rdrr.io/r/stats/biplot.html'>biplot()</a></code> function.</p>
</div>
<pre class="usage"><span class='fu'>ggplot_pca</span>(
<span class='no'>x</span>,
<span class='kw'>choices</span> <span class='kw'>=</span> <span class='fl'>1</span>:<span class='fl'>2</span>,
<span class='kw'>scale</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
<span class='kw'>labels</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
<span class='kw'>labels_textsize</span> <span class='kw'>=</span> <span class='fl'>3</span>,
<span class='kw'>labels_text_placement</span> <span class='kw'>=</span> <span class='fl'>1.5</span>,
<span class='kw'>groups</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
<span class='kw'>ellipse</span> <span class='kw'>=</span> <span class='fl'>FALSE</span>,
<span class='kw'>ellipse_prob</span> <span class='kw'>=</span> <span class='fl'>0.68</span>,
<span class='kw'>ellipse_size</span> <span class='kw'>=</span> <span class='fl'>0.5</span>,
<span class='kw'>ellipse_alpha</span> <span class='kw'>=</span> <span class='fl'>0.25</span>,
<span class='kw'>points_size</span> <span class='kw'>=</span> <span class='fl'>2</span>,
<span class='kw'>points_alpha</span> <span class='kw'>=</span> <span class='fl'>0.25</span>,
<span class='kw'>arrows</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
<span class='kw'>arrows_colour</span> <span class='kw'>=</span> <span class='st'>"darkblue"</span>,
<span class='kw'>arrows_size</span> <span class='kw'>=</span> <span class='fl'>0.5</span>,
<span class='kw'>arrows_textsize</span> <span class='kw'>=</span> <span class='fl'>3</span>,
<span class='kw'>arrows_alpha</span> <span class='kw'>=</span> <span class='fl'>0.75</span>,
<span class='kw'>base_textsize</span> <span class='kw'>=</span> <span class='fl'>10</span>,
<span class='no'>...</span>
)</pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>x</th>
<td><p>an object returned by <code><a href='pca.html'>pca()</a></code>, <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp()</a></code> or <code><a href='https://rdrr.io/r/stats/princomp.html'>princomp()</a></code></p></td>
</tr>
<tr>
<th>choices</th>
<td><p>length 2 vector specifying the components to plot. Only the default
is a biplot in the strict sense.</p></td>
</tr>
<tr>
<th>scale</th>
<td><p>The variables are scaled by <code>lambda ^ scale</code> and the
observations are scaled by <code>lambda ^ (1-scale)</code> where
<code>lambda</code> are the singular values as computed by
<code><a href='https://rdrr.io/r/stats/princomp.html'>princomp</a></code>. Normally <code>0 &lt;= scale &lt;= 1</code>, and a warning
will be issued if the specified <code>scale</code> is outside this range.</p></td>
</tr>
<tr>
<th>labels</th>
<td><p>an optional vector of labels for the observations. If set, the labels will be placed below their respective points. When using the <code><a href='pca.html'>pca()</a></code> function as input for <code>x</code>, this will be determined automatically based on the attribute <code>non_numeric_cols</code>, see <code><a href='pca.html'>pca()</a></code>.</p></td>
</tr>
<tr>
<th>labels_textsize</th>
<td><p>the size of the text used for the labels</p></td>
</tr>
<tr>
<th>labels_text_placement</th>
<td><p>adjustment factor the placement of the variable names (<code>&gt;=1</code> means further away from the arrow head)</p></td>
</tr>
<tr>
<th>groups</th>
<td><p>an optional vector of groups for the labels, with the same length as <code>labels</code>. If set, the points and labels will be coloured according to these groups. When using the <code><a href='pca.html'>pca()</a></code> function as input for <code>x</code>, this will be determined automatically based on the attribute <code>non_numeric_cols</code>, see <code><a href='pca.html'>pca()</a></code>.</p></td>
</tr>
<tr>
<th>ellipse</th>
<td><p>a logical to indicate whether a normal data ellipse should be drawn for each group (set with <code>groups</code>)</p></td>
</tr>
<tr>
<th>ellipse_prob</th>
<td><p>statistical size of the ellipse in normal probability</p></td>
</tr>
<tr>
<th>ellipse_size</th>
<td><p>the size of the ellipse line</p></td>
</tr>
<tr>
<th>ellipse_alpha</th>
<td><p>the alpha (transparency) of the ellipse line</p></td>
</tr>
<tr>
<th>points_alpha</th>
<td><p>the alpha (transparency) of the points</p></td>
</tr>
<tr>
<th>arrows</th>
<td><p>a logical to indicate whether arrows should be drawn</p></td>
</tr>
<tr>
<th>arrows_colour</th>
<td><p>the colour of the arrow and their text</p></td>
</tr>
<tr>
<th>arrows_size</th>
<td><p>the size (thickness) of the arrow lines</p></td>
</tr>
<tr>
<th>arrows_textsize</th>
<td><p>the size of the text at the end of the arrows</p></td>
</tr>
<tr>
<th>arrows_alpha</th>
<td><p>the alpha (transparency) of the arrows and their text</p></td>
</tr>
<tr>
<th>base_textsize</th>
<td><p>the text size for all plot elements except the labels and arrows</p></td>
</tr>
<tr>
<th>...</th>
<td><p>Parameters passed on to functions</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>
<p>The <code>ggplot_pca()</code> function is based on the <code>ggbiplot()</code> function from the <code>ggbiplot</code> package by Vince Vu, as found on GitHub: <a href='https://github.com/vqv/ggbiplot'>https://github.com/vqv/ggbiplot</a> (retrieved: 2 March 2020, their latest commit: <a href='https://github.com/vqv/ggbiplot/commit/7325e880485bea4c07465a0304c470608fffb5d9'><code>7325e88</code></a>; 12 February 2015).</p>
<p>As per their GPL-2 licence that demands documentation of code changes, the changes made based on the source code were:</p><ol>
<li><p>Rewritten code to remove the dependency on packages <code>plyr</code>, <code>scales</code> and <code>grid</code></p></li>
<li><p>Parametrised more options, like arrow and ellipse settings</p></li>
<li><p>Added total amount of explained variance as a caption in the plot</p></li>
<li><p>Cleaned all syntax based on the <code>lintr</code> package</p></li>
<li><p>Updated documentation</p></li>
</ol>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>The default colours for labels and points is set with <code>scale_colour_viridis_d()</code>, but these can be changed by adding another scale for colour, like <code>scale_colour_brewer()</code>.</p>
<h2 class="hasAnchor" id="maturing-lifecycle"><a class="anchor" href="#maturing-lifecycle"></a>Maturing lifecycle</h2>
<p><img src='figures/lifecycle_maturing.svg' style=margin-bottom:5px /> <br />
The <a href='lifecycle.html'>lifecycle</a> of this function is <strong>maturing</strong>. The unlying code of a maturing function has been roughed out, but finer details might still change. We will strive to maintain backward compatibility, but the function needs wider usage and more extensive testing in order to optimise the unlying code.</p>
<h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
<pre class="examples"><span class='co'># `example_isolates` is a dataset available in the AMR package.</span>
<span class='co'># See ?example_isolates.</span>
<span class='co'># See ?pca for more info about Principal Component Analysis (PCA).</span>
<span class='fu'><a href='https://rdrr.io/r/base/library.html'>library</a></span>(<span class='no'>dplyr</span>)
<span class='no'>pca_model</span> <span class='kw'>&lt;-</span> <span class='no'>example_isolates</span> <span class='kw'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/filter.html'>filter</a></span>(<span class='fu'><a href='mo_property.html'>mo_genus</a></span>(<span class='no'>mo</span>) <span class='kw'>==</span> <span class='st'>"Staphylococcus"</span>) <span class='kw'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span>(<span class='kw'>species</span> <span class='kw'>=</span> <span class='fu'><a href='mo_property.html'>mo_shortname</a></span>(<span class='no'>mo</span>)) <span class='kw'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise_all.html'>summarise_if</a></span> (<span class='no'>is.rsi</span>, <span class='no'>resistance</span>) <span class='kw'>%&gt;%</span>
<span class='fu'><a href='pca.html'>pca</a></span>(<span class='no'>FLC</span>, <span class='no'>AMC</span>, <span class='no'>CXM</span>, <span class='no'>GEN</span>, <span class='no'>TOB</span>, <span class='no'>TMP</span>, <span class='no'>SXT</span>, <span class='no'>CIP</span>, <span class='no'>TEC</span>, <span class='no'>TCY</span>, <span class='no'>ERY</span>)
<span class='co'># old</span>
<span class='fu'><a href='https://rdrr.io/r/stats/biplot.html'>biplot</a></span>(<span class='no'>pca_model</span>)
<span class='co'># new </span>
<span class='fu'>ggplot_pca</span>(<span class='no'>pca_model</span>)</pre>
</div>
<div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
<h2>Contents</h2>
<ul class="nav nav-pills nav-stacked">
<li><a href="#arguments">Arguments</a></li>
<li><a href="#source">Source</a></li>
<li><a href="#details">Details</a></li>
<li><a href="#maturing-lifecycle">Maturing lifecycle</a></li>
<li><a href="#examples">Examples</a></li>
</ul>
</div>
</div>
<footer>
<div class="copyright">
<p>Developed by <a href='https://www.rug.nl/staff/m.s.berends/'>Matthijs S. Berends</a>, <a href='https://www.rug.nl/staff/c.f.luz/'>Christian F. Luz</a>, <a href='https://www.rug.nl/staff/a.w.friedrich/'>Alexander W. Friedrich</a>, <a href='https://www.rug.nl/staff/b.sinha/'>Bhanu N. M. Sinha</a>, <a href='https://www.rug.nl/staff/c.j.albers/'>Casper J. Albers</a>, <a href='https://www.rug.nl/staff/c.glasner/'>Corinna Glasner</a>.</p>
</div>
<div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.4.1.</p>
</div>
</footer>
</div>
</body>
</html>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -114,6 +114,13 @@
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
@ -287,12 +294,6 @@
<p><code><a href="read.4D.html">read.4D()</a></code> </p>
</td>
<td><p>Read data from 4D database</p></td>
</tr><tr>
<td>
<p><code><a href="rsi_translation.html">rsi_translation</a></code> </p>
</td>
<td><p>Data set for R/SI interpretation</p></td>
</tr>
</tbody><tbody>
<tr>
@ -371,6 +372,18 @@
</tr>
<tr>
<td>
<p><code><a href="proportion.html">resistance()</a></code> <code><a href="proportion.html">susceptibility()</a></code> <code><a href="proportion.html">proportion_R()</a></code> <code><a href="proportion.html">proportion_IR()</a></code> <code><a href="proportion.html">proportion_I()</a></code> <code><a href="proportion.html">proportion_SI()</a></code> <code><a href="proportion.html">proportion_S()</a></code> <code><a href="proportion.html">proportion_df()</a></code> <code><a href="proportion.html">rsi_df()</a></code> </p>
</td>
<td><p>Calculate microbial resistance</p></td>
</tr><tr>
<td>
<p><code><a href="count.html">count_resistant()</a></code> <code><a href="count.html">count_susceptible()</a></code> <code><a href="count.html">count_R()</a></code> <code><a href="count.html">count_IR()</a></code> <code><a href="count.html">count_I()</a></code> <code><a href="count.html">count_SI()</a></code> <code><a href="count.html">count_S()</a></code> <code><a href="count.html">count_all()</a></code> <code><a href="count.html">n_rsi()</a></code> <code><a href="count.html">count_df()</a></code> </p>
</td>
<td><p>Count available isolates</p></td>
</tr><tr>
<td>
<p><code><a href="availability.html">availability()</a></code> </p>
</td>
@ -384,9 +397,15 @@
</tr><tr>
<td>
<p><code><a href="count.html">count_resistant()</a></code> <code><a href="count.html">count_susceptible()</a></code> <code><a href="count.html">count_R()</a></code> <code><a href="count.html">count_IR()</a></code> <code><a href="count.html">count_I()</a></code> <code><a href="count.html">count_SI()</a></code> <code><a href="count.html">count_S()</a></code> <code><a href="count.html">count_all()</a></code> <code><a href="count.html">n_rsi()</a></code> <code><a href="count.html">count_df()</a></code> </p>
<p><code><a href="resistance_predict.html">resistance_predict()</a></code> <code><a href="resistance_predict.html">rsi_predict()</a></code> <code><a href="resistance_predict.html">plot(<i>&lt;resistance_predict&gt;</i>)</a></code> <code><a href="resistance_predict.html">ggplot_rsi_predict()</a></code> </p>
</td>
<td><p>Count available isolates</p></td>
<td><p>Predict antimicrobial resistance</p></td>
</tr><tr>
<td>
<p><code><a href="pca.html">prcomp(<i>&lt;data.frame&gt;</i>)</a></code> <code><a href="pca.html">pca()</a></code> </p>
</td>
<td><p>Principal Component Analysis (for AMR)</p></td>
</tr><tr>
<td>
@ -407,24 +426,18 @@
<td><p>AMR plots with <code>ggplot2</code></p></td>
</tr><tr>
<td>
<p><code><a href="ggplot_pca.html">ggplot_pca()</a></code> </p>
</td>
<td><p>PCA biplot with <code>ggplot2</code></p></td>
</tr><tr>
<td>
<p><code><a href="kurtosis.html">kurtosis()</a></code> </p>
</td>
<td><p>Kurtosis of the sample</p></td>
</tr><tr>
<td>
<p><code><a href="proportion.html">resistance()</a></code> <code><a href="proportion.html">susceptibility()</a></code> <code><a href="proportion.html">proportion_R()</a></code> <code><a href="proportion.html">proportion_IR()</a></code> <code><a href="proportion.html">proportion_I()</a></code> <code><a href="proportion.html">proportion_SI()</a></code> <code><a href="proportion.html">proportion_S()</a></code> <code><a href="proportion.html">proportion_df()</a></code> <code><a href="proportion.html">rsi_df()</a></code> </p>
</td>
<td><p>Calculate microbial resistance</p></td>
</tr><tr>
<td>
<p><code><a href="resistance_predict.html">resistance_predict()</a></code> <code><a href="resistance_predict.html">rsi_predict()</a></code> <code><a href="resistance_predict.html">plot(<i>&lt;resistance_predict&gt;</i>)</a></code> <code><a href="resistance_predict.html">ggplot_rsi_predict()</a></code> </p>
</td>
<td><p>Predict antimicrobial resistance</p></td>
</tr><tr>
<td>
<p><code><a href="skewness.html">skewness()</a></code> </p>
</td>
@ -439,6 +452,12 @@
</tr>
<tr>
<td>
<p><code><a href="microorganisms.html">microorganisms</a></code> </p>
</td>
<td><p>Data set with ~70,000 microorganisms</p></td>
</tr><tr>
<td>
<p><code><a href="antibiotics.html">antibiotics</a></code> <code><a href="antibiotics.html">antivirals</a></code> </p>
</td>
@ -457,6 +476,12 @@
<td><p>Data set with unclean data</p></td>
</tr><tr>
<td>
<p><code><a href="rsi_translation.html">rsi_translation</a></code> </p>
</td>
<td><p>Data set for R/SI interpretation</p></td>
</tr><tr>
<td>
<p><code><a href="microorganisms.codes.html">microorganisms.codes</a></code> </p>
</td>
@ -469,12 +494,6 @@
<td><p>Data set with previously accepted taxonomic names</p></td>
</tr><tr>
<td>
<p><code><a href="microorganisms.html">microorganisms</a></code> </p>
</td>
<td><p>Data set with ~70,000 microorganisms</p></td>
</tr><tr>
<td>
<p><code><a href="WHONET.html">WHONET</a></code> </p>
</td>

View File

@ -81,7 +81,7 @@ This page contains a section for every lifecycle (with text borrowed from the af
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
@ -117,6 +117,13 @@ This page contains a section for every lifecycle (with text borrowed from the af
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress-alt"></span>
Conduct Principal Component Analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
@ -236,7 +243,7 @@ This page contains a section for every lifecycle (with text borrowed from the af
<p><img src='figures/lifecycle_experimental.svg' style=margin-bottom:5px /> <br />
The lifecycle of this function is <strong>experimental</strong>. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN.</p>
The lifecycle of this function is <strong>experimental</strong>. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN, since they have not yet matured enough.</p>
<h2 class="hasAnchor" id="maturing-lifecycle"><a class="anchor" href="#maturing-lifecycle"></a>Maturing lifecycle</h2>

362
docs/reference/pca.html Normal file
View File

@ -0,0 +1,362 @@
<!-- Generated by pkgdown: do not edit by hand -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Principal Component Analysis (for AMR) — prcomp.data.frame • AMR (for R)</title>
<!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
<link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png" />
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png" />
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png" />
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png" />
<!-- jquery -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js" integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=" crossorigin="anonymous"></script>
<!-- Bootstrap -->
<link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.3.7/flatly/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha256-U5ZEeKfGNOja007MMD3YBI0A3OSZOQbeG6z2f2Y0hu8=" crossorigin="anonymous"></script>
<!-- Font Awesome icons -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.7.1/css/all.min.css" integrity="sha256-nAmazAk6vS34Xqo0BSrTb+abbtFlgsFK7NKSi6o7Y78=" crossorigin="anonymous" />
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.7.1/css/v4-shims.min.css" integrity="sha256-6qHlizsOWFskGlwVOKuns+D1nB6ssZrHQrNj1wGplHc=" crossorigin="anonymous" />
<!-- clipboard.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js" integrity="sha256-FiZwavyI2V6+EXO1U+xzLG3IKldpiTFf3153ea9zikQ=" crossorigin="anonymous"></script>
<!-- headroom.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.4/headroom.min.js" integrity="sha256-DJFC1kqIhelURkuza0AvYal5RxMtpzLjFhsnVIeuk+U=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.4/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script>
<!-- pkgdown -->
<link href="../pkgdown.css" rel="stylesheet">
<script src="../pkgdown.js"></script>
<link href="../extra.css" rel="stylesheet">
<script src="../extra.js"></script>
<meta property="og:title" content="Principal Component Analysis (for AMR) — prcomp.data.frame" />
<meta property="og:description" content="Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels." />
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png" />
<meta name="twitter:card" content="summary" />
<!-- mathjax -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script>
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<div class="container template-reference-topic">
<header>
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
</span>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="../index.html">
<span class="fa fa-home"></span>
Home
</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
<span class="fa fa-question-circle"></span>
How to
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="../articles/AMR.html">
<span class="fa fa-directions"></span>
Conduct AMR analysis
</a>
</li>
<li>
<a href="../articles/resistance_predict.html">
<span class="fa fa-dice"></span>
Predict antimicrobial resistance
</a>
</li>
<li>
<a href="../articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
</li>
<li>
<a href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
Determine multi-drug resistance (MDR)
</a>
</li>
<li>
<a href="../articles/WHONET.html">
<span class="fa fa-globe-americas"></span>
Work with WHONET data
</a>
</li>
<li>
<a href="../articles/SPSS.html">
<span class="fa fa-file-upload"></span>
Import data from SPSS/SAS/Stata
</a>
</li>
<li>
<a href="../articles/EUCAST.html">
<span class="fa fa-exchange-alt"></span>
Apply EUCAST rules
</a>
</li>
<li>
<a href="../reference/mo_property.html">
<span class="fa fa-bug"></span>
Get properties of a microorganism
</a>
</li>
<li>
<a href="../reference/ab_property.html">
<span class="fa fa-capsules"></span>
Get properties of an antibiotic
</a>
</li>
<li>
<a href="../articles/benchmarks.html">
<span class="fa fa-shipping-fast"></span>
Other: benchmarks
</a>
</li>
</ul>
</li>
<li>
<a href="../reference/">
<span class="fa fa-book-open"></span>
Manual
</a>
</li>
<li>
<a href="../authors.html">
<span class="fa fa-users"></span>
Authors
</a>
</li>
<li>
<a href="../news/">
<span class="far fa far fa-newspaper"></span>
Changelog
</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a href="https://gitlab.com/msberends/AMR">
<span class="fab fa fab fa-gitlab"></span>
Source Code
</a>
</li>
<li>
<a href="../LICENSE-text.html">
<span class="fa fa-book"></span>
Licence
</a>
</li>
</ul>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
</header>
<div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>Principal Component Analysis (for AMR)</h1>
<div class="hidden name"><code>pca.Rd</code></div>
</div>
<div class="ref-description">
<p>Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.</p>
</div>
<pre class="usage"><span class='co'># S3 method for data.frame</span>
<span class='fu'><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a></span>(
<span class='no'>x</span>,
<span class='no'>...</span>,
<span class='kw'>retx</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
<span class='kw'>center</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
<span class='kw'>scale.</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
<span class='kw'>tol</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
<span class='kw'>rank.</span> <span class='kw'>=</span> <span class='kw'>NULL</span>
)
<span class='fu'>pca</span>(<span class='no'>x</span>, <span class='no'>...</span>)</pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>x</th>
<td><p>a <a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a> containing numeric columns</p></td>
</tr>
<tr>
<th>...</th>
<td><p>columns of <code>x</code> to be selected for PCA</p></td>
</tr>
<tr>
<th>retx</th>
<td><p>a logical value indicating whether the rotated variables
should be returned.</p></td>
</tr>
<tr>
<th>center</th>
<td><p>a logical value indicating whether the variables
should be shifted to be zero centered. Alternately, a vector of
length equal the number of columns of <code>x</code> can be supplied.
The value is passed to <code>scale</code>.</p></td>
</tr>
<tr>
<th>scale.</th>
<td><p>a logical value indicating whether the variables should
be scaled to have unit variance before the analysis takes
place. The default is <code>FALSE</code> for consistency with S, but
in general scaling is advisable. Alternatively, a vector of length
equal the number of columns of <code>x</code> can be supplied. The
value is passed to <code><a href='https://rdrr.io/r/base/scale.html'>scale</a></code>.</p></td>
</tr>
<tr>
<th>tol</th>
<td><p>a value indicating the magnitude below which components
should be omitted. (Components are omitted if their
standard deviations are less than or equal to <code>tol</code> times the
standard deviation of the first component.) With the default null
setting, no components are omitted (unless <code>rank.</code> is specified
less than <code><a href='https://rdrr.io/r/base/Extremes.html'>min(dim(x))</a></code>.). Other settings for tol could be
<code>tol = 0</code> or <code>tol = sqrt(.Machine$double.eps)</code>, which
would omit essentially constant components.</p></td>
</tr>
<tr>
<th>rank.</th>
<td><p>optionally, a number specifying the maximal rank, i.e.,
maximal number of principal components to be used. Can be set as
alternative or in addition to <code>tol</code>, useful notably when the
desired rank is considerably smaller than the dimensions of the matrix.</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>The <code>pca()</code> function takes a <a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a> as input and performs the actual PCA with the R function <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp()</a></code>.</p>
<p>The result of the <code>pca()</code> function is a <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a></code> object, with an additional attribute <code>non_numeric_cols</code> which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by <code><a href='ggplot_pca.html'>ggplot_pca()</a></code>.</p>
<h2 class="hasAnchor" id="experimental-lifecycle"><a class="anchor" href="#experimental-lifecycle"></a>Experimental lifecycle</h2>
<p><img src='figures/lifecycle_experimental.svg' style=margin-bottom:5px /> <br />
The <a href='lifecycle.html'>lifecycle</a> of this function is <strong>experimental</strong>. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN, since they have not yet matured enough.</p>
<h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
<pre class="examples"><span class='co'># `example_isolates` is a dataset available in the AMR package.</span>
<span class='co'># See ?example_isolates.</span>
<span class='co'># calculate the resistance per group first</span>
<span class='fu'><a href='https://rdrr.io/r/base/library.html'>library</a></span>(<span class='no'>dplyr</span>)
<span class='no'>resistance_data</span> <span class='kw'>&lt;-</span> <span class='no'>example_isolates</span> <span class='kw'>%&gt;%</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/group_by.html'>group_by</a></span>(<span class='kw'>order</span> <span class='kw'>=</span> <span class='fu'><a href='mo_property.html'>mo_order</a></span>(<span class='no'>mo</span>), <span class='co'># group on anything, like order</span>
<span class='kw'>genus</span> <span class='kw'>=</span> <span class='fu'><a href='mo_property.html'>mo_genus</a></span>(<span class='no'>mo</span>)) <span class='kw'>%&gt;%</span> <span class='co'># and genus as we do here</span>
<span class='fu'><a href='https://dplyr.tidyverse.org/reference/summarise_all.html'>summarise_if</a></span>(<span class='no'>is.rsi</span>, <span class='no'>resistance</span>) <span class='co'># then get resistance of all drugs</span>
<span class='co'># now conduct PCA for certain antimicrobial agents</span>
<span class='no'>pca_result</span> <span class='kw'>&lt;-</span> <span class='no'>resistance_data</span> <span class='kw'>%&gt;%</span>
<span class='fu'>pca</span>(<span class='no'>AMC</span>, <span class='no'>CXM</span>, <span class='no'>CTX</span>, <span class='no'>CAZ</span>, <span class='no'>GEN</span>, <span class='no'>TOB</span>, <span class='no'>TMP</span>, <span class='no'>SXT</span>)
<span class='no'>pca_result</span>
<span class='fu'><a href='https://rdrr.io/r/base/summary.html'>summary</a></span>(<span class='no'>pca_result</span>)
<span class='fu'><a href='https://rdrr.io/r/stats/biplot.html'>biplot</a></span>(<span class='no'>pca_result</span>)
<span class='fu'><a href='ggplot_pca.html'>ggplot_pca</a></span>(<span class='no'>pca_result</span>) <span class='co'># a new and convenient plot function</span></pre>
</div>
<div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
<h2>Contents</h2>
<ul class="nav nav-pills nav-stacked">
<li><a href="#arguments">Arguments</a></li>
<li><a href="#details">Details</a></li>
<li><a href="#experimental-lifecycle">Experimental lifecycle</a></li>
<li><a href="#examples">Examples</a></li>
</ul>
</div>
</div>
<footer>
<div class="copyright">
<p>Developed by <a href='https://www.rug.nl/staff/m.s.berends/'>Matthijs S. Berends</a>, <a href='https://www.rug.nl/staff/c.f.luz/'>Christian F. Luz</a>, <a href='https://www.rug.nl/staff/a.w.friedrich/'>Alexander W. Friedrich</a>, <a href='https://www.rug.nl/staff/b.sinha/'>Bhanu N. M. Sinha</a>, <a href='https://www.rug.nl/staff/c.j.albers/'>Casper J. Albers</a>, <a href='https://www.rug.nl/staff/c.glasner/'>Corinna Glasner</a>.</p>
</div>
<div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.4.1.</p>
</div>
</footer>
</div>
</body>
</html>

View File

@ -81,6 +81,9 @@
<url>
<loc>https://msberends.gitlab.io/AMR/reference/g.test.html</loc>
</url>
<url>
<loc>https://msberends.gitlab.io/AMR/reference/ggplot_pca.html</loc>
</url>
<url>
<loc>https://msberends.gitlab.io/AMR/reference/ggplot_rsi.html</loc>
</url>
@ -123,6 +126,9 @@
<url>
<loc>https://msberends.gitlab.io/AMR/reference/p_symbol.html</loc>
</url>
<url>
<loc>https://msberends.gitlab.io/AMR/reference/pca.html</loc>
</url>
<url>
<loc>https://msberends.gitlab.io/AMR/reference/proportion.html</loc>
</url>
@ -153,6 +159,9 @@
<url>
<loc>https://msberends.gitlab.io/AMR/articles/MDR.html</loc>
</url>
<url>
<loc>https://msberends.gitlab.io/AMR/articles/PCA.html</loc>
</url>
<url>
<loc>https://msberends.gitlab.io/AMR/articles/SPSS.html</loc>
</url>

View File

@ -16,7 +16,7 @@ age(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE)
\item{na.rm}{a logical to indicate whether missing values should be removed}
}
\value{
An integer (no decimals) if \code{exact = FALSE}, a double (with decimals) otherwise
An \link{integer} (no decimals) if \code{exact = FALSE}, a \link{double} (with decimals) otherwise
}
\description{
Calculates age in years based on a reference date, which is the sytem date at default.

View File

@ -11,16 +11,16 @@ age_groups(x, split_at = c(12, 25, 55, 75), na.rm = FALSE)
\item{split_at}{values to split \code{x} at, defaults to age groups 0-11, 12-24, 25-54, 55-74 and 75+. See Details.}
\item{na.rm}{a logical to indicate whether missing values should be removed}
\item{na.rm}{a \link{logical} to indicate whether missing values should be removed}
}
\value{
Ordered \code{\link{factor}}
Ordered \link{factor}
}
\description{
Split ages into age groups defined by the \code{split} parameter. This allows for easier demographic (antimicrobial resistance) analysis.
}
\details{
To split ages, the input can be:
To split ages, the input for the \code{split_at} parameter can be:
\itemize{
\item A numeric vector. A vector of e.g. \code{c(10, 20)} will split on 0-9, 10-19 and 20+. A value of only \code{50} will split on 0-49 and 50+.
The default is to split on young children (0-11), youth (12-24), young adults (25-54), middle-aged adults (55-74) and elderly (75+).
@ -29,7 +29,7 @@ The default is to split on young children (0-11), youth (12-24), young adults (2
\item \code{"children"} or \code{"kids"}, equivalent of: \code{c(0, 1, 2, 4, 6, 13, 18)}. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.
\item \code{"elderly"} or \code{"seniors"}, equivalent of: \code{c(65, 75, 85)}. This will split on 0-64, 65-74, 75-84, 85+.
\item \code{"fives"}, equivalent of: \code{1:20 * 5}. This will split on 0-4, 5-9, 10-14, ..., 90-94, 95-99, 100+.
\item \code{"tens"}, equivalent of: \code{1:10 * 10}. This will split on 0-9, 10-19, 20-29, ... 80-89, 90-99, 100+.
\item \code{"tens"}, equivalent of: \code{1:10 * 10}. This will split on 0-9, 10-19, 20-29, ..., 80-89, 90-99, 100+.
}
}
}

119
man/ggplot_pca.Rd Normal file
View File

@ -0,0 +1,119 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ggplot_pca.R
\name{ggplot_pca}
\alias{ggplot_pca}
\title{PCA biplot with \code{ggplot2}}
\source{
The \code{\link[=ggplot_pca]{ggplot_pca()}} function is based on the \code{\link[=ggbiplot]{ggbiplot()}} function from the \code{ggbiplot} package by Vince Vu, as found on GitHub: \url{https://github.com/vqv/ggbiplot} (retrieved: 2 March 2020, their latest commit: \href{https://github.com/vqv/ggbiplot/commit/7325e880485bea4c07465a0304c470608fffb5d9}{\code{7325e88}}; 12 February 2015).
As per their GPL-2 licence that demands documentation of code changes, the changes made based on the source code were:
\enumerate{
\item Rewritten code to remove the dependency on packages \code{plyr}, \code{scales} and \code{grid}
\item Parametrised more options, like arrow and ellipse settings
\item Added total amount of explained variance as a caption in the plot
\item Cleaned all syntax based on the \code{lintr} package
\item Updated documentation
}
}
\usage{
ggplot_pca(
x,
choices = 1:2,
scale = TRUE,
labels = NULL,
labels_textsize = 3,
labels_text_placement = 1.5,
groups = NULL,
ellipse = FALSE,
ellipse_prob = 0.68,
ellipse_size = 0.5,
ellipse_alpha = 0.25,
points_size = 2,
points_alpha = 0.25,
arrows = TRUE,
arrows_colour = "darkblue",
arrows_size = 0.5,
arrows_textsize = 3,
arrows_alpha = 0.75,
base_textsize = 10,
...
)
}
\arguments{
\item{x}{an object returned by \code{\link[=pca]{pca()}}, \code{\link[=prcomp]{prcomp()}} or \code{\link[=princomp]{princomp()}}}
\item{choices}{
length 2 vector specifying the components to plot. Only the default
is a biplot in the strict sense.
}
\item{scale}{
The variables are scaled by \code{lambda ^ scale} and the
observations are scaled by \code{lambda ^ (1-scale)} where
\code{lambda} are the singular values as computed by
\code{\link[stats]{princomp}}. Normally \code{0 <= scale <= 1}, and a warning
will be issued if the specified \code{scale} is outside this range.
}
\item{labels}{an optional vector of labels for the observations. If set, the labels will be placed below their respective points. When using the \code{\link[=pca]{pca()}} function as input for \code{x}, this will be determined automatically based on the attribute \code{non_numeric_cols}, see \code{\link[=pca]{pca()}}.}
\item{labels_textsize}{the size of the text used for the labels}
\item{labels_text_placement}{adjustment factor the placement of the variable names (\verb{>=1} means further away from the arrow head)}
\item{groups}{an optional vector of groups for the labels, with the same length as \code{labels}. If set, the points and labels will be coloured according to these groups. When using the \code{\link[=pca]{pca()}} function as input for \code{x}, this will be determined automatically based on the attribute \code{non_numeric_cols}, see \code{\link[=pca]{pca()}}.}
\item{ellipse}{a logical to indicate whether a normal data ellipse should be drawn for each group (set with \code{groups})}
\item{ellipse_prob}{statistical size of the ellipse in normal probability}
\item{ellipse_size}{the size of the ellipse line}
\item{ellipse_alpha}{the alpha (transparency) of the ellipse line}
\item{points_alpha}{the alpha (transparency) of the points}
\item{arrows}{a logical to indicate whether arrows should be drawn}
\item{arrows_colour}{the colour of the arrow and their text}
\item{arrows_size}{the size (thickness) of the arrow lines}
\item{arrows_textsize}{the size of the text at the end of the arrows}
\item{arrows_alpha}{the alpha (transparency) of the arrows and their text}
\item{base_textsize}{the text size for all plot elements except the labels and arrows}
\item{...}{Parameters passed on to functions}
}
\description{
This function is to produce a \code{ggplot2} variant of a so-called \href{https://en.wikipedia.org/wiki/Biplot}{biplot} for PCA (principal component analysis), but is more flexible and more appealing than the base \R \code{\link[=biplot]{biplot()}} function.
}
\details{
The default colours for labels and points is set with \code{\link[=scale_colour_viridis_d]{scale_colour_viridis_d()}}, but these can be changed by adding another scale for colour, like \code{\link[=scale_colour_brewer]{scale_colour_brewer()}}.
}
\section{Maturing lifecycle}{
\if{html}{\figure{lifecycle_maturing.svg}{options: style=margin-bottom:5px} \cr}
The \link[AMR:lifecycle]{lifecycle} of this function is \strong{maturing}. The unlying code of a maturing function has been roughed out, but finer details might still change. We will strive to maintain backward compatibility, but the function needs wider usage and more extensive testing in order to optimise the unlying code.
}
\examples{
# `example_isolates` is a dataset available in the AMR package.
# See ?example_isolates.
# See ?pca for more info about Principal Component Analysis (PCA).
library(dplyr)
pca_model <- example_isolates \%>\%
filter(mo_genus(mo) == "Staphylococcus") \%>\%
group_by(species = mo_shortname(mo)) \%>\%
summarise_if (is.rsi, resistance) \%>\%
pca(FLC, AMC, CXM, GEN, TOB, TMP, SXT, CIP, TEC, TCY, ERY)
# old
biplot(pca_model)
# new
ggplot_pca(pca_model)
}

View File

@ -12,7 +12,7 @@ This page contains a section for every lifecycle (with text borrowed from the af
\section{Experimental lifecycle}{
\if{html}{\figure{lifecycle_experimental.svg}{options: style=margin-bottom:5px} \cr}
The \link[AMR:lifecycle]{lifecycle} of this function is \strong{experimental}. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN.
The \link[AMR:lifecycle]{lifecycle} of this function is \strong{experimental}. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN, since they have not yet matured enough.
}
\section{Maturing lifecycle}{

87
man/pca.Rd Normal file
View File

@ -0,0 +1,87 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pca.R
\name{prcomp.data.frame}
\alias{prcomp.data.frame}
\alias{pca}
\title{Principal Component Analysis (for AMR)}
\usage{
\method{prcomp}{data.frame}(
x,
...,
retx = TRUE,
center = TRUE,
scale. = TRUE,
tol = NULL,
rank. = NULL
)
pca(x, ...)
}
\arguments{
\item{x}{a \link{data.frame} containing numeric columns}
\item{...}{columns of \code{x} to be selected for PCA}
\item{retx}{a logical value indicating whether the rotated variables
should be returned.}
\item{center}{a logical value indicating whether the variables
should be shifted to be zero centered. Alternately, a vector of
length equal the number of columns of \code{x} can be supplied.
The value is passed to \code{scale}.}
\item{scale.}{a logical value indicating whether the variables should
be scaled to have unit variance before the analysis takes
place. The default is \code{FALSE} for consistency with S, but
in general scaling is advisable. Alternatively, a vector of length
equal the number of columns of \code{x} can be supplied. The
value is passed to \code{\link{scale}}.}
\item{tol}{a value indicating the magnitude below which components
should be omitted. (Components are omitted if their
standard deviations are less than or equal to \code{tol} times the
standard deviation of the first component.) With the default null
setting, no components are omitted (unless \code{rank.} is specified
less than \code{min(dim(x))}.). Other settings for tol could be
\code{tol = 0} or \code{tol = sqrt(.Machine$double.eps)}, which
would omit essentially constant components.}
\item{rank.}{optionally, a number specifying the maximal rank, i.e.,
maximal number of principal components to be used. Can be set as
alternative or in addition to \code{tol}, useful notably when the
desired rank is considerably smaller than the dimensions of the matrix.}
}
\description{
Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.
}
\details{
The \code{\link[=pca]{pca()}} function takes a \link{data.frame} as input and performs the actual PCA with the R function \code{\link[=prcomp]{prcomp()}}.
The result of the \code{\link[=pca]{pca()}} function is a \code{\link{prcomp}} object, with an additional attribute \code{non_numeric_cols} which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by \code{\link[=ggplot_pca]{ggplot_pca()}}.
}
\section{Experimental lifecycle}{
\if{html}{\figure{lifecycle_experimental.svg}{options: style=margin-bottom:5px} \cr}
The \link[AMR:lifecycle]{lifecycle} of this function is \strong{experimental}. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN, since they have not yet matured enough.
}
\examples{
# `example_isolates` is a dataset available in the AMR package.
# See ?example_isolates.
# calculate the resistance per group first
library(dplyr)
resistance_data <- example_isolates \%>\%
group_by(order = mo_order(mo), # group on anything, like order
genus = mo_genus(mo)) \%>\% # and genus as we do here
summarise_if(is.rsi, resistance) # then get resistance of all drugs
# now conduct PCA for certain antimicrobial agents
pca_result <- resistance_data \%>\%
pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT)
pca_result
summary(pca_result)
biplot(pca_result)
ggplot_pca(pca_result) # a new and convenient plot function
}

View File

@ -71,8 +71,8 @@ data
knitr::kable(data, align = "lccccccc")
```
```{r, eval = FALSE}
eucast_rules(data, info = FALSE)
eucast_rules(data)
```
```{r, echo = FALSE, message = FALSE}
knitr::kable(eucast_rules(data, info = FALSE), align = "lccccccc")
knitr::kable(eucast_rules(data), align = "lccccccc")
```

91
vignettes/PCA.Rmd Executable file
View File

@ -0,0 +1,91 @@
---
title: "How to conduct principal component analysis (PCA) for AMR"
author: "Matthijs S. Berends"
date: '`r format(Sys.Date(), "%d %B %Y")`'
output:
rmarkdown::html_vignette:
toc: true
toc_depth: 3
vignette: >
%\VignetteIndexEntry{Benchmarks}
%\VignetteEncoding{UTF-8}
%\VignetteEngine{knitr::rmarkdown}
editor_options:
chunk_output_type: console
---
```{r setup, include = FALSE, results = 'markup'}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#",
fig.width = 7.5,
fig.height = 4.5,
dpi = 100
)
```
**NOTE: This page will be updated soon, as the pca() function is currently being developed.**
# Introduction
# Transforming
For PCA, we need to transform our AMR data first. This is what the `example_isolates` data set in this package looks like:
```{r, message = FALSE}
library(AMR)
library(dplyr)
glimpse(example_isolates)
```
Now to transform this to a data set with only resistance percentages per taxonomic order and genus:
```{r, warning = FALSE}
resistance_data <- example_isolates %>%
group_by(order = mo_order(mo), # group on anything, like order
genus = mo_genus(mo)) %>% # and genus as we do here
summarise_if(is.rsi, resistance) %>% # then get resistance of all drugs
select(order, genus, AMC, CXM, CTX,
CAZ, GEN, TOB, TMP, SXT) # and select only relevant columns
head(resistance_data)
```
# Perform principal component analysis
The new `pca()` function will automatically filter on rows that contain numeric values in all selected variables, so we now only need to do:
```{r pca}
pca_result <- pca(resistance_data)
```
The result can be reviewed with the good old `summary()` function:
```{r}
summary(pca_result)
```
```{r, echo = FALSE}
proportion_of_variance <- summary(pca_result)$importance[2, ]
```
Good news. The first two components explain a total of `r cleaner::percentage(sum(proportion_of_variance[1:2]))` of the variance (see the PC1 and PC2 values of the *Proportion of Variance*. We can create a so-called biplot with the base R `biplot()` function, to see which antimicrobial resistance per drug explain the difference per microorganism.
# Plotting the results
```{r}
biplot(pca_result)
```
But we can't see the explanation of the points. Perhaps this works better with the new `ggplot_pca()` function, that automatically adds the right labels and even groups:
```{r}
ggplot_pca(pca_result)
```
You can also print an ellipse per group, and edit the appearance:
```{r}
ggplot_pca(pca_result, ellipse = TRUE) +
ggplot2::labs(title = "An AMR/PCA biplot!")
```

View File

@ -112,9 +112,9 @@ In the figure below, we compare *Escherichia coli* (which is very common) with *
```{r, echo = FALSE, fig.width=12}
par(mar = c(5, 16, 4, 2))
boxplot(microbenchmark(
as.mo("M. semesiae"),
as.mo("P. brevis"),
as.mo("E. coli"),
as.mo("Meth. semesiae"),
as.mo("Prev. brevis"),
as.mo("Esc. coli"),
times = 10),
horizontal = TRUE, las = 1, unit = "s", log = TRUE,
xlab = "", ylab = "Time in seconds (log)",