(v1.0.1.9002) PCA unit tests

This commit is contained in:
dr. M.S. (Matthijs) Berends 2020-03-08 11:18:59 +01:00
parent 9fc858f208
commit 77656a676c
20 changed files with 182 additions and 135 deletions

View File

@ -1,5 +1,5 @@
Package: AMR
Version: 1.0.1.9001
Version: 1.0.1.9002
Date: 2020-03-08
Title: Antimicrobial Resistance Analysis
Authors@R: c(

View File

@ -37,7 +37,6 @@ S3method(pillar_shaft,rsi)
S3method(plot,mic)
S3method(plot,resistance_predict)
S3method(plot,rsi)
S3method(prcomp,data.frame)
S3method(print,ab)
S3method(print,bug_drug_combinations)
S3method(print,catalogue_of_life_version)
@ -227,7 +226,6 @@ exportMethods(kurtosis.default)
exportMethods(kurtosis.matrix)
exportMethods(plot.mic)
exportMethods(plot.rsi)
exportMethods(prcomp.data.frame)
exportMethods(print.ab)
exportMethods(print.bug_drug_combinations)
exportMethods(print.catalogue_of_life_version)
@ -329,6 +327,8 @@ importFrom(stats,lm)
importFrom(stats,pchisq)
importFrom(stats,prcomp)
importFrom(stats,predict)
importFrom(stats,qchisq)
importFrom(stats,var)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(utils,adist)

View File

@ -1,4 +1,5 @@
# AMR 1.0.1.9001
# AMR 1.0.1.9002
## <small>Last updated: 08-Mar-2020</small>
### New
* Support for easy principal component analysis for AMR, using the new `pca()` function

View File

@ -43,7 +43,7 @@ check_dataset_integrity <- function() {
"iv_ddd", "iv_units", "loinc") %in% colnames(antibiotics),
na.rm = TRUE)
}, error = function(e)
stop('Please use the command \'library("AMR")\' before using this function, to load the needed reference data.', call. = FALSE)
stop('Please use the command \'library("AMR")\' before using this function, to load the required reference data.', call. = FALSE)
)
if (!check_microorganisms | !check_antibiotics) {
stop("Data set `microorganisms` or data set `antibiotics` is overwritten by your global environment and prevents the AMR package from working correctly. Please rename your object before using this function.", call. = FALSE)
@ -154,6 +154,13 @@ stopifnot_installed_package <- function(package) {
return(invisible())
}
stopifnot_msg <- function(expr, msg) {
if (!isTRUE(expr)) {
stop(msg, call. = FALSE)
}
}
"%or%" <- function(x, y) {
if (is.null(x) | is.null(y)) {
if (is.null(x)) {

View File

@ -49,9 +49,9 @@
#' 1. Rewritten code to remove the dependency on packages `plyr`, `scales` and `grid`
#' 2. Parametrised more options, like arrow and ellipse settings
#' 3. Added total amount of explained variance as a caption in the plot
#' 4. Cleaned all syntax based on the `lintr` package
#' 4. Cleaned all syntax based on the `lintr` package and added integrity checks
#' 5. Updated documentation
#' @details The default colours for labels and points is set with [scale_colour_viridis_d()], but these can be changed by adding another scale for colour, like [scale_colour_brewer()].
#' @details The colours for labels and points can be changed by adding another scale layer for colour, like [scale_colour_viridis_d()] or [scale_colour_brewer()].
#' @rdname ggplot_pca
#' @export
#' @examples
@ -74,14 +74,15 @@
ggplot_pca <- function(x,
choices = 1:2,
scale = TRUE,
pc.biplot = TRUE,
labels = NULL,
labels_textsize = 3,
labels_text_placement = 1.5,
groups = NULL,
ellipse = FALSE,
ellipse = TRUE,
ellipse_prob = 0.68,
ellipse_size = 0.5,
ellipse_alpha = 0.25,
ellipse_alpha = 0.5,
points_size = 2,
points_alpha = 0.25,
arrows = TRUE,
@ -93,6 +94,21 @@ ggplot_pca <- function(x,
...) {
stopifnot_installed_package("ggplot2")
stopifnot_msg(length(choices) == 2, "`choices` must be of length 2")
stopifnot_msg(is.logical(scale), "`scale` must be TRUE or FALSE")
stopifnot_msg(is.logical(pc.biplot), "`pc.biplot` must be TRUE or FALSE")
stopifnot_msg(is.numeric(choices), "`choices` must be numeric")
stopifnot_msg(is.numeric(labels_textsize), "`labels_textsize` must be numeric")
stopifnot_msg(is.numeric(labels_text_placement), "`labels_text_placement` must be numeric")
stopifnot_msg(is.logical(ellipse), "`ellipse` must be TRUE or FALSE")
stopifnot_msg(is.numeric(ellipse_prob), "`ellipse_prob` must be numeric")
stopifnot_msg(is.numeric(ellipse_size), "`ellipse_size` must be numeric")
stopifnot_msg(is.numeric(ellipse_alpha), "`ellipse_alpha` must be numeric")
stopifnot_msg(is.logical(arrows), "`arrows` must be TRUE or FALSE")
stopifnot_msg(is.numeric(arrows_size), "`arrows_size` must be numeric")
stopifnot_msg(is.numeric(arrows_textsize), "`arrows_textsize` must be numeric")
stopifnot_msg(is.numeric(arrows_alpha), "`arrows_alpha` must be numeric")
stopifnot_msg(is.numeric(base_textsize), "`base_textsize` must be numeric")
calculations <- pca_calculations(pca_model = x,
groups = groups,
@ -101,6 +117,7 @@ ggplot_pca <- function(x,
labels_missing = missing(labels),
choices = choices,
scale = scale,
pc.biplot = pc.biplot,
ellipse_prob = ellipse_prob,
labels_text_placement = labels_text_placement)
nobs.factor <- calculations$nobs.factor
@ -116,17 +133,16 @@ ggplot_pca <- function(x,
group_name <- calculations$group_name
labels <- calculations$labels
stopifnot(length(choices) == 2)
# Append the proportion of explained variance to the axis labels
if ((1 - as.integer(scale)) == 0) {
u.axis.labs <- paste("Standardised PC", choices, sep = "")
u.axis.labs <- paste0("Standardised PC", choices)
} else {
u.axis.labs <- paste("PC", choices, sep = "")
u.axis.labs <- paste0("PC", choices)
}
u.axis.labs <- paste(u.axis.labs,
paste0("\n(explained var: ",
percentage(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2)), ")"))
u.axis.labs <- paste0(u.axis.labs,
paste0("\n(explained var: ",
percentage(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2)),
")"))
# Score Labels
if (!is.null(labels)) {
@ -138,7 +154,6 @@ ggplot_pca <- function(x,
df.u$groups <- groups
}
# Base plot
g <- ggplot2::ggplot(data = df.u,
ggplot2::aes(x = xvar, y = yvar)) +
@ -150,30 +165,25 @@ ggplot_pca <- function(x,
# Draw either labels or points
if (!is.null(df.u$labels)) {
if (!is.null(df.u$groups)) {
g <- g +
ggplot2::geom_point(ggplot2::aes(colour = groups),
alpha = points_alpha,
size = points_size) +
g <- g + ggplot2::geom_point(ggplot2::aes(colour = groups),
alpha = points_alpha,
size = points_size) +
ggplot2::geom_text(ggplot2::aes(label = labels, colour = groups),
nudge_y = -0.05,
size = labels_textsize) +
ggplot2::scale_colour_viridis_d() +
ggplot2::labs(colour = group_name)
} else {
g <- g +
ggplot2::geom_point(alpha = points_alpha,
size = points_size) +
g <- g + ggplot2::geom_point(alpha = points_alpha,
size = points_size) +
ggplot2::geom_text(ggplot2::aes(label = labels),
nudge_y = -0.05,
size = labels_textsize)
}
} else {
if (!is.null(df.u$groups)) {
g <- g +
ggplot2::geom_point(ggplot2::aes(colour = groups),
alpha = points_alpha,
size = points_size) +
ggplot2::scale_colour_viridis_d() +
g <- g + ggplot2::geom_point(ggplot2::aes(colour = groups),
alpha = points_alpha,
size = points_size) +
ggplot2::labs(colour = group_name)
} else {
g <- g + ggplot2::geom_point(alpha = points_alpha,
@ -182,26 +192,24 @@ ggplot_pca <- function(x,
}
# Overlay a concentration ellipse if there are groups
if (!is.null(df.u$groups) & isTRUE(ellipse)) {
g <- g +
ggplot2::geom_path(data = ell,
ggplot2::aes(colour = groups, group = groups),
size = ellipse_size,
alpha = points_alpha)
if (!is.null(df.u$groups) & !is.null(ell) & isTRUE(ellipse)) {
g <- g + ggplot2::geom_path(data = ell,
ggplot2::aes(colour = groups, group = groups),
size = ellipse_size,
alpha = points_alpha)
}
# Label the variable axes
if (arrows == TRUE) {
g <- g +
ggplot2::geom_segment(data = df.v,
ggplot2::aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = ggplot2::arrow(length = ggplot2::unit(0.5, "picas"),
angle = 20,
ends = "last",
type = "open"),
colour = arrows_colour,
size = arrows_size,
alpha = arrows_alpha) +
g <- g + ggplot2::geom_segment(data = df.v,
ggplot2::aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = ggplot2::arrow(length = ggplot2::unit(0.5, "picas"),
angle = 20,
ends = "last",
type = "open"),
colour = arrows_colour,
size = arrows_size,
alpha = arrows_alpha) +
ggplot2::geom_text(data = df.v,
ggplot2::aes(label = varname, x = xvar, y = yvar, angle = angle, hjust = hjust),
colour = arrows_colour,
@ -225,6 +233,7 @@ ggplot_pca <- function(x,
}
#' @importFrom dplyr bind_rows
#' @importFrom stats qchisq var
pca_calculations <- function(pca_model,
groups = NULL,
groups_missing = TRUE,
@ -232,6 +241,7 @@ pca_calculations <- function(pca_model,
labels_missing = TRUE,
choices = 1:2,
scale = 1,
pc.biplot = TRUE,
ellipse_prob = 0.68,
labels_text_placement = 1.5) {
@ -291,7 +301,9 @@ pca_calculations <- function(pca_model,
names(df.u) <- c("xvar", "yvar")
names(df.v) <- names(df.u)
df.u <- df.u * nobs.factor
if (isTRUE(pc.biplot)) {
df.u <- df.u * nobs.factor
}
# Scale the radius of the correlation circle so that it corresponds to
# a data ellipse for the standardized PC scores
@ -314,8 +326,8 @@ pca_calculations <- function(pca_model,
df.v$hjust <- with(df.v, (1 - labels_text_placement * sign(xvar)) / 2)
if (!is.null(df.u$groups)) {
theta <<- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <<- cbind(cos(theta), sin(theta))
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- cbind(cos(theta), sin(theta))
ell <- bind_rows(
sapply(unique(df.u$groups), function(g, df = df.u) {
x <- df[which(df$groups == g), , drop = FALSE]
@ -325,18 +337,18 @@ pca_calculations <- function(pca_model,
sigma <- var(cbind(x$xvar, x$yvar))
mu <- c(mean(x$xvar), mean(x$yvar))
ed <- sqrt(qchisq(ellipse_prob, df = 2))
el <- data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = "+"),
groups = x$groups[1])
names(el)[1:2] <- c("xvar", "yvar")
el
data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = "+"),
groups = x$groups[1])
}))
if (NROW(ell) == 0) {
ell <- NULL
} else {
names(ell)[1:2] <- c("xvar", "yvar")
}
} else {
ell <- NULL
}
list(nobs.factor = nobs.factor,
d = d,
u = u,
@ -350,5 +362,4 @@ pca_calculations <- function(pca_model,
group_name = group_name,
labels = labels
)
}

View File

@ -24,6 +24,7 @@ globalVariables(c(".",
"ab",
"ab_txt",
"abbreviations",
"angle",
"antibiotic",
"antibiotics",
"CNS_CPS",
@ -40,6 +41,7 @@ globalVariables(c(".",
"genus",
"gramstain",
"group",
"hjust",
"index",
"input",
"interpretation",
@ -100,7 +102,10 @@ globalVariables(c(".",
"txt",
"uncertainty_level",
"value",
"varname",
"x",
"xdr",
"xvar",
"y",
"year"))
"year",
"yvar"))

60
R/pca.R
View File

@ -21,17 +21,18 @@
#' Principal Component Analysis (for AMR)
#'
#' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.
#' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables.
#' @inheritSection lifecycle Experimental lifecycle
#' @param x a [data.frame] containing numeric columns
#' @param ... columns of `x` to be selected for PCA
#' @inheritParams stats::prcomp
#' @details The [pca()] function takes a [data.frame] as input and performs the actual PCA with the R function [prcomp()].
#' @details The [pca()] function takes a [data.frame] as input and performs the actual PCA with the \R function [prcomp()].
#'
#' The result of the [pca()] function is a [`prcomp`] object, with an additional attribute `non_numeric_cols` which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by [ggplot_pca()].
#' @rdname pca
#' @exportMethod prcomp.data.frame
#' The result of the [pca()] function is a [prcomp] object, with an additional attribute `non_numeric_cols` which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by [ggplot_pca()].
#' @return An object of classes [pca] and [prcomp]
#' @importFrom stats prcomp
#' @importFrom dplyr ungroup %>% filter_all all_vars
#' @importFrom rlang enquos eval_tidy
#' @export
#' @examples
#' # `example_isolates` is a dataset available in the AMR package.
@ -52,40 +53,19 @@
#' summary(pca_result)
#' biplot(pca_result)
#' ggplot_pca(pca_result) # a new and convenient plot function
prcomp.data.frame <- function(x,
...,
retx = TRUE,
center = TRUE,
scale. = TRUE,
tol = NULL,
rank. = NULL) {
pca <- function(x,
...,
retx = TRUE,
center = TRUE,
scale. = TRUE,
tol = NULL,
rank. = NULL) {
x <- pca_transform_x(x = x, ... = ...)
pca_data <- x[, which(sapply(x, function(x) is.numeric(x)))]
message(blue(paste0("NOTE: Columns selected for PCA: ", paste0(bold(colnames(pca_data)), collapse = "/"),
".\n Total observations available: ", nrow(pca_data), ".")))
stats:::prcomp.default(pca_data, retx = retx, center = center, scale. = scale., tol = tol, rank. = rank.)
}
#' @rdname pca
#' @export
pca <- function(x, ...) {
if (!is.data.frame(x)) {
stop("this function only takes a data.frame as input")
}
pca_model <- prcomp(x, ...)
x <- pca_transform_x(x = x, ... = ...)
attr(pca_model, "non_numeric_cols") <- x[, sapply(x, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE]
pca_model
}
#' @importFrom dplyr ungroup %>% filter_all all_vars
#' @importFrom rlang enquos eval_tidy
pca_transform_x <- function(x, ...) {
# unset data.table, tbl_df, etc.
# unset data.table, tibble, etc.
# also removes groups made by dplyr::group_by
x <- as.data.frame(x, stringsAsFactors = FALSE)
x.bak <- x
@ -123,7 +103,17 @@ pca_transform_x <- function(x, ...) {
x <- cbind(x.bak[, sapply(x.bak, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE], x)
}
x %>%
x <- x %>%
ungroup() %>% # would otherwise select the grouping vars
filter_all(all_vars(!is.na(.)))
pca_data <- x[, which(sapply(x, function(x) is.numeric(x)))]
message(blue(paste0("NOTE: Columns selected for PCA: ", paste0(bold(colnames(pca_data)), collapse = "/"),
".\n Total observations available: ", nrow(pca_data), ".")))
pca_model <- prcomp(pca_data, retx = retx, center = center, scale. = scale., tol = tol, rank. = rank.)
attr(pca_model, "non_numeric_cols") <- x[, sapply(x, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE]
class(pca_model) <- c("pca", class(pca_model))
pca_model
}

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
</span>
</div>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
</span>
</div>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
</span>
</div>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
</span>
</div>

View File

@ -43,7 +43,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
</span>
</div>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
</span>
</div>
@ -226,10 +226,14 @@
</div>
<div id="amr-1019001" class="section level1">
<div id="amr-1019002" class="section level1">
<h1 class="page-header">
<a href="#amr-1019001" class="anchor"></a>AMR 1.0.1.9001<small> Unreleased </small>
<a href="#amr-1019002" class="anchor"></a>AMR 1.0.1.9002<small> Unreleased </small>
</h1>
<div id="last-updated-08-mar-2020" class="section level2">
<h2 class="hasAnchor">
<a href="#last-updated-08-mar-2020" class="anchor"></a><small>Last updated: 08-Mar-2020</small>
</h2>
<div id="new" class="section level3">
<h3 class="hasAnchor">
<a href="#new" class="anchor"></a>New</h3>
@ -238,6 +242,7 @@
<li>Plotting biplots for principal component analysis using the new <code><a href="../reference/ggplot_pca.html">ggplot_pca()</a></code> function</li>
</ul>
</div>
</div>
</div>
<div id="amr-101" class="section level1">
<h1 class="page-header">
@ -1489,7 +1494,7 @@
<div id="tocnav">
<h2>Contents</h2>
<ul class="nav nav-pills nav-stacked">
<li><a href="#amr-1019001">1.0.1.9001</a></li>
<li><a href="#amr-1019002">1.0.1.9002</a></li>
<li><a href="#amr-101">1.0.1</a></li>
<li><a href="#amr-100">1.0.0</a></li>
<li><a href="#amr-090">0.9.0</a></li>

View File

@ -79,7 +79,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
</span>
</div>
@ -236,14 +236,15 @@
<span class='no'>x</span>,
<span class='kw'>choices</span> <span class='kw'>=</span> <span class='fl'>1</span>:<span class='fl'>2</span>,
<span class='kw'>scale</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
<span class='kw'>pc.biplot</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
<span class='kw'>labels</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
<span class='kw'>labels_textsize</span> <span class='kw'>=</span> <span class='fl'>3</span>,
<span class='kw'>labels_text_placement</span> <span class='kw'>=</span> <span class='fl'>1.5</span>,
<span class='kw'>groups</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
<span class='kw'>ellipse</span> <span class='kw'>=</span> <span class='fl'>FALSE</span>,
<span class='kw'>ellipse</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
<span class='kw'>ellipse_prob</span> <span class='kw'>=</span> <span class='fl'>0.68</span>,
<span class='kw'>ellipse_size</span> <span class='kw'>=</span> <span class='fl'>0.5</span>,
<span class='kw'>ellipse_alpha</span> <span class='kw'>=</span> <span class='fl'>0.25</span>,
<span class='kw'>ellipse_alpha</span> <span class='kw'>=</span> <span class='fl'>0.5</span>,
<span class='kw'>points_size</span> <span class='kw'>=</span> <span class='fl'>2</span>,
<span class='kw'>points_alpha</span> <span class='kw'>=</span> <span class='fl'>0.25</span>,
<span class='kw'>arrows</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
@ -275,6 +276,14 @@
<code><a href='https://rdrr.io/r/stats/princomp.html'>princomp</a></code>. Normally <code>0 &lt;= scale &lt;= 1</code>, and a warning
will be issued if the specified <code>scale</code> is outside this range.</p></td>
</tr>
<tr>
<th>pc.biplot</th>
<td><p>If true, use what Gabriel (1971) refers to as a "principal component
biplot", with <code>lambda = 1</code> and observations scaled up by sqrt(n) and
variables scaled down by sqrt(n). Then inner products between
variables approximate covariances and distances between observations
approximate Mahalanobis distance.</p></td>
</tr>
<tr>
<th>labels</th>
<td><p>an optional vector of labels for the observations. If set, the labels will be placed below their respective points. When using the <code><a href='pca.html'>pca()</a></code> function as input for <code>x</code>, this will be determined automatically based on the attribute <code>non_numeric_cols</code>, see <code><a href='pca.html'>pca()</a></code>.</p></td>
@ -352,13 +361,13 @@
<li><p>Rewritten code to remove the dependency on packages <code>plyr</code>, <code>scales</code> and <code>grid</code></p></li>
<li><p>Parametrised more options, like arrow and ellipse settings</p></li>
<li><p>Added total amount of explained variance as a caption in the plot</p></li>
<li><p>Cleaned all syntax based on the <code>lintr</code> package</p></li>
<li><p>Cleaned all syntax based on the <code>lintr</code> package and added integrity checks</p></li>
<li><p>Updated documentation</p></li>
</ol>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>The default colours for labels and points is set with <code>scale_colour_viridis_d()</code>, but these can be changed by adding another scale for colour, like <code>scale_colour_brewer()</code>.</p>
<p>The colours for labels and points can be changed by adding another scale layer for colour, like <code>scale_colour_viridis_d()</code> or <code>scale_colour_brewer()</code>.</p>
<h2 class="hasAnchor" id="maturing-lifecycle"><a class="anchor" href="#maturing-lifecycle"></a>Maturing lifecycle</h2>

View File

@ -78,7 +78,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
</span>
</div>
@ -403,7 +403,7 @@
</tr><tr>
<td>
<p><code><a href="pca.html">prcomp(<i>&lt;data.frame&gt;</i>)</a></code> <code><a href="pca.html">pca()</a></code> </p>
<p><code><a href="pca.html">pca()</a></code> </p>
</td>
<td><p>Principal Component Analysis (for AMR)</p></td>
</tr><tr>

View File

@ -6,7 +6,7 @@
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Principal Component Analysis (for AMR) — prcomp.data.frame • AMR (for R)</title>
<title>Principal Component Analysis (for AMR) — pca • AMR (for R)</title>
<!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
@ -44,8 +44,8 @@
<link href="../extra.css" rel="stylesheet">
<script src="../extra.js"></script>
<meta property="og:title" content="Principal Component Analysis (for AMR) — prcomp.data.frame" />
<meta property="og:description" content="Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels." />
<meta property="og:title" content="Principal Component Analysis (for AMR) — pca" />
<meta property="og:description" content="Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables." />
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png" />
<meta name="twitter:card" content="summary" />
@ -79,7 +79,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
</span>
</div>
@ -229,11 +229,10 @@
</div>
<div class="ref-description">
<p>Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.</p>
<p>Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables.</p>
</div>
<pre class="usage"><span class='co'># S3 method for data.frame</span>
<span class='fu'><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a></span>(
<pre class="usage"><span class='fu'>pca</span>(
<span class='no'>x</span>,
<span class='no'>...</span>,
<span class='kw'>retx</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
@ -241,9 +240,7 @@
<span class='kw'>scale.</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
<span class='kw'>tol</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
<span class='kw'>rank.</span> <span class='kw'>=</span> <span class='kw'>NULL</span>
)
<span class='fu'>pca</span>(<span class='no'>x</span>, <span class='no'>...</span>)</pre>
)</pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
@ -297,10 +294,13 @@
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
<p>An object of classes pca and <a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a></p>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>The <code>pca()</code> function takes a <a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a> as input and performs the actual PCA with the R function <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp()</a></code>.</p>
<p>The result of the <code>pca()</code> function is a <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a></code> object, with an additional attribute <code>non_numeric_cols</code> which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by <code><a href='ggplot_pca.html'>ggplot_pca()</a></code>.</p>
<p>The <code>pca()</code> function takes a <a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a> as input and performs the actual PCA with the <span style="R">R</span> function <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp()</a></code>.</p>
<p>The result of the <code>pca()</code> function is a <a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a> object, with an additional attribute <code>non_numeric_cols</code> which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by <code><a href='ggplot_pca.html'>ggplot_pca()</a></code>.</p>
<h2 class="hasAnchor" id="experimental-lifecycle"><a class="anchor" href="#experimental-lifecycle"></a>Experimental lifecycle</h2>
@ -332,6 +332,7 @@ The <a href='lifecycle.html'>lifecycle</a> of this function is <strong>experimen
<h2>Contents</h2>
<ul class="nav nav-pills nav-stacked">
<li><a href="#arguments">Arguments</a></li>
<li><a href="#value">Value</a></li>
<li><a href="#details">Details</a></li>
<li><a href="#experimental-lifecycle">Experimental lifecycle</a></li>
<li><a href="#examples">Examples</a></li>

View File

@ -50,6 +50,14 @@ if [ -z "$3" ]; then
git pull --tags --quiet
current_tag=`git describe --tags --abbrev=0 | sed 's/v//'`
current_commit=`git describe --tags | sed 's/.*-\(.*\)-.*/\1/'`
if [ -z "current_tag" ]; then
echo "FATAL - could not determine current tag"
exit 1
fi
if [ -z "current_commit" ]; then
echo "FATAL - could not determine last commit index number"
exit 1
fi
# combine tag (e.g. 0.1.0) and commit number (like 40) increased by 9000 to indicate beta version
new_version="$current_tag.$((current_commit + 9000))" # results in 0.1.0.9040
if [ -z "$new_version" ]; then
@ -99,6 +107,7 @@ echo
echo "•••••••••••••••••••••••••"
echo "• List of changed files •"
echo "•••••••••••••••••••••••••"
git add .
git status --short
echo
read -p "Uploading version ${new_version}. Continue (Y/n)? " choice
@ -111,7 +120,6 @@ echo
echo "•••••••••••••••••••••••••••"
echo "• Uploading to repository •"
echo "•••••••••••••••••••••••••••"
git add .
git commit -a -m "(v$new_version) $1" --quiet
git push --quiet
echo "Comparison:"

View File

@ -11,7 +11,7 @@ As per their GPL-2 licence that demands documentation of code changes, the chang
\item Rewritten code to remove the dependency on packages \code{plyr}, \code{scales} and \code{grid}
\item Parametrised more options, like arrow and ellipse settings
\item Added total amount of explained variance as a caption in the plot
\item Cleaned all syntax based on the \code{lintr} package
\item Cleaned all syntax based on the \code{lintr} package and added integrity checks
\item Updated documentation
}
}
@ -20,14 +20,15 @@ ggplot_pca(
x,
choices = 1:2,
scale = TRUE,
pc.biplot = TRUE,
labels = NULL,
labels_textsize = 3,
labels_text_placement = 1.5,
groups = NULL,
ellipse = FALSE,
ellipse = TRUE,
ellipse_prob = 0.68,
ellipse_size = 0.5,
ellipse_alpha = 0.25,
ellipse_alpha = 0.5,
points_size = 2,
points_alpha = 0.25,
arrows = TRUE,
@ -55,6 +56,14 @@ ggplot_pca(
will be issued if the specified \code{scale} is outside this range.
}
\item{pc.biplot}{
If true, use what Gabriel (1971) refers to as a "principal component
biplot", with \code{lambda = 1} and observations scaled up by sqrt(n) and
variables scaled down by sqrt(n). Then inner products between
variables approximate covariances and distances between observations
approximate Mahalanobis distance.
}
\item{labels}{an optional vector of labels for the observations. If set, the labels will be placed below their respective points. When using the \code{\link[=pca]{pca()}} function as input for \code{x}, this will be determined automatically based on the attribute \code{non_numeric_cols}, see \code{\link[=pca]{pca()}}.}
\item{labels_textsize}{the size of the text used for the labels}
@ -93,7 +102,7 @@ ggplot_pca(
This function is to produce a \code{ggplot2} variant of a so-called \href{https://en.wikipedia.org/wiki/Biplot}{biplot} for PCA (principal component analysis), but is more flexible and more appealing than the base \R \code{\link[=biplot]{biplot()}} function.
}
\details{
The default colours for labels and points is set with \code{\link[=scale_colour_viridis_d]{scale_colour_viridis_d()}}, but these can be changed by adding another scale for colour, like \code{\link[=scale_colour_brewer]{scale_colour_brewer()}}.
The colours for labels and points can be changed by adding another scale layer for colour, like \code{\link[=scale_colour_viridis_d]{scale_colour_viridis_d()}} or \code{\link[=scale_colour_brewer]{scale_colour_brewer()}}.
}
\section{Maturing lifecycle}{

View File

@ -1,11 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pca.R
\name{prcomp.data.frame}
\alias{prcomp.data.frame}
\name{pca}
\alias{pca}
\title{Principal Component Analysis (for AMR)}
\usage{
\method{prcomp}{data.frame}(
pca(
x,
...,
retx = TRUE,
@ -14,8 +13,6 @@
tol = NULL,
rank. = NULL
)
pca(x, ...)
}
\arguments{
\item{x}{a \link{data.frame} containing numeric columns}
@ -51,13 +48,16 @@ pca(x, ...)
alternative or in addition to \code{tol}, useful notably when the
desired rank is considerably smaller than the dimensions of the matrix.}
}
\value{
An object of classes \link{pca} and \link{prcomp}
}
\description{
Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.
Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables.
}
\details{
The \code{\link[=pca]{pca()}} function takes a \link{data.frame} as input and performs the actual PCA with the R function \code{\link[=prcomp]{prcomp()}}.
The \code{\link[=pca]{pca()}} function takes a \link{data.frame} as input and performs the actual PCA with the \R function \code{\link[=prcomp]{prcomp()}}.
The result of the \code{\link[=pca]{pca()}} function is a \code{\link{prcomp}} object, with an additional attribute \code{non_numeric_cols} which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by \code{\link[=ggplot_pca]{ggplot_pca()}}.
The result of the \code{\link[=pca]{pca()}} function is a \link{prcomp} object, with an additional attribute \code{non_numeric_cols} which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by \code{\link[=ggplot_pca]{ggplot_pca()}}.
}
\section{Experimental lifecycle}{

View File

@ -30,8 +30,9 @@ test_that("PCA works", {
genus = mo_genus(mo)) %>% # and genus as we do here
summarise_if(is.rsi, resistance, minimum = 0)
expect_s3_class(pca(resistance_data), "prcomp")
expect_s3_class(prcomp(resistance_data), "prcomp")
pca_model <- pca(resistance_data)
ggplot_pca(pca(resistance_data), ellipse = TRUE)
expect_s3_class(pca_model, "pca")
ggplot_pca(pca_model, ellipse = TRUE)
})