mirror of
https://github.com/msberends/AMR.git
synced 2024-12-26 05:26:13 +01:00
(v1.0.1.9002) PCA unit tests
This commit is contained in:
parent
9fc858f208
commit
77656a676c
@ -1,5 +1,5 @@
|
||||
Package: AMR
|
||||
Version: 1.0.1.9001
|
||||
Version: 1.0.1.9002
|
||||
Date: 2020-03-08
|
||||
Title: Antimicrobial Resistance Analysis
|
||||
Authors@R: c(
|
||||
|
@ -37,7 +37,6 @@ S3method(pillar_shaft,rsi)
|
||||
S3method(plot,mic)
|
||||
S3method(plot,resistance_predict)
|
||||
S3method(plot,rsi)
|
||||
S3method(prcomp,data.frame)
|
||||
S3method(print,ab)
|
||||
S3method(print,bug_drug_combinations)
|
||||
S3method(print,catalogue_of_life_version)
|
||||
@ -227,7 +226,6 @@ exportMethods(kurtosis.default)
|
||||
exportMethods(kurtosis.matrix)
|
||||
exportMethods(plot.mic)
|
||||
exportMethods(plot.rsi)
|
||||
exportMethods(prcomp.data.frame)
|
||||
exportMethods(print.ab)
|
||||
exportMethods(print.bug_drug_combinations)
|
||||
exportMethods(print.catalogue_of_life_version)
|
||||
@ -329,6 +327,8 @@ importFrom(stats,lm)
|
||||
importFrom(stats,pchisq)
|
||||
importFrom(stats,prcomp)
|
||||
importFrom(stats,predict)
|
||||
importFrom(stats,qchisq)
|
||||
importFrom(stats,var)
|
||||
importFrom(tidyr,pivot_longer)
|
||||
importFrom(tidyr,pivot_wider)
|
||||
importFrom(utils,adist)
|
||||
|
3
NEWS.md
3
NEWS.md
@ -1,4 +1,5 @@
|
||||
# AMR 1.0.1.9001
|
||||
# AMR 1.0.1.9002
|
||||
## <small>Last updated: 08-Mar-2020</small>
|
||||
|
||||
### New
|
||||
* Support for easy principal component analysis for AMR, using the new `pca()` function
|
||||
|
@ -43,7 +43,7 @@ check_dataset_integrity <- function() {
|
||||
"iv_ddd", "iv_units", "loinc") %in% colnames(antibiotics),
|
||||
na.rm = TRUE)
|
||||
}, error = function(e)
|
||||
stop('Please use the command \'library("AMR")\' before using this function, to load the needed reference data.', call. = FALSE)
|
||||
stop('Please use the command \'library("AMR")\' before using this function, to load the required reference data.', call. = FALSE)
|
||||
)
|
||||
if (!check_microorganisms | !check_antibiotics) {
|
||||
stop("Data set `microorganisms` or data set `antibiotics` is overwritten by your global environment and prevents the AMR package from working correctly. Please rename your object before using this function.", call. = FALSE)
|
||||
@ -154,6 +154,13 @@ stopifnot_installed_package <- function(package) {
|
||||
return(invisible())
|
||||
}
|
||||
|
||||
stopifnot_msg <- function(expr, msg) {
|
||||
if (!isTRUE(expr)) {
|
||||
stop(msg, call. = FALSE)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
"%or%" <- function(x, y) {
|
||||
if (is.null(x) | is.null(y)) {
|
||||
if (is.null(x)) {
|
111
R/ggplot_pca.R
111
R/ggplot_pca.R
@ -49,9 +49,9 @@
|
||||
#' 1. Rewritten code to remove the dependency on packages `plyr`, `scales` and `grid`
|
||||
#' 2. Parametrised more options, like arrow and ellipse settings
|
||||
#' 3. Added total amount of explained variance as a caption in the plot
|
||||
#' 4. Cleaned all syntax based on the `lintr` package
|
||||
#' 4. Cleaned all syntax based on the `lintr` package and added integrity checks
|
||||
#' 5. Updated documentation
|
||||
#' @details The default colours for labels and points is set with [scale_colour_viridis_d()], but these can be changed by adding another scale for colour, like [scale_colour_brewer()].
|
||||
#' @details The colours for labels and points can be changed by adding another scale layer for colour, like [scale_colour_viridis_d()] or [scale_colour_brewer()].
|
||||
#' @rdname ggplot_pca
|
||||
#' @export
|
||||
#' @examples
|
||||
@ -74,14 +74,15 @@
|
||||
ggplot_pca <- function(x,
|
||||
choices = 1:2,
|
||||
scale = TRUE,
|
||||
pc.biplot = TRUE,
|
||||
labels = NULL,
|
||||
labels_textsize = 3,
|
||||
labels_text_placement = 1.5,
|
||||
groups = NULL,
|
||||
ellipse = FALSE,
|
||||
ellipse = TRUE,
|
||||
ellipse_prob = 0.68,
|
||||
ellipse_size = 0.5,
|
||||
ellipse_alpha = 0.25,
|
||||
ellipse_alpha = 0.5,
|
||||
points_size = 2,
|
||||
points_alpha = 0.25,
|
||||
arrows = TRUE,
|
||||
@ -93,6 +94,21 @@ ggplot_pca <- function(x,
|
||||
...) {
|
||||
|
||||
stopifnot_installed_package("ggplot2")
|
||||
stopifnot_msg(length(choices) == 2, "`choices` must be of length 2")
|
||||
stopifnot_msg(is.logical(scale), "`scale` must be TRUE or FALSE")
|
||||
stopifnot_msg(is.logical(pc.biplot), "`pc.biplot` must be TRUE or FALSE")
|
||||
stopifnot_msg(is.numeric(choices), "`choices` must be numeric")
|
||||
stopifnot_msg(is.numeric(labels_textsize), "`labels_textsize` must be numeric")
|
||||
stopifnot_msg(is.numeric(labels_text_placement), "`labels_text_placement` must be numeric")
|
||||
stopifnot_msg(is.logical(ellipse), "`ellipse` must be TRUE or FALSE")
|
||||
stopifnot_msg(is.numeric(ellipse_prob), "`ellipse_prob` must be numeric")
|
||||
stopifnot_msg(is.numeric(ellipse_size), "`ellipse_size` must be numeric")
|
||||
stopifnot_msg(is.numeric(ellipse_alpha), "`ellipse_alpha` must be numeric")
|
||||
stopifnot_msg(is.logical(arrows), "`arrows` must be TRUE or FALSE")
|
||||
stopifnot_msg(is.numeric(arrows_size), "`arrows_size` must be numeric")
|
||||
stopifnot_msg(is.numeric(arrows_textsize), "`arrows_textsize` must be numeric")
|
||||
stopifnot_msg(is.numeric(arrows_alpha), "`arrows_alpha` must be numeric")
|
||||
stopifnot_msg(is.numeric(base_textsize), "`base_textsize` must be numeric")
|
||||
|
||||
calculations <- pca_calculations(pca_model = x,
|
||||
groups = groups,
|
||||
@ -101,6 +117,7 @@ ggplot_pca <- function(x,
|
||||
labels_missing = missing(labels),
|
||||
choices = choices,
|
||||
scale = scale,
|
||||
pc.biplot = pc.biplot,
|
||||
ellipse_prob = ellipse_prob,
|
||||
labels_text_placement = labels_text_placement)
|
||||
nobs.factor <- calculations$nobs.factor
|
||||
@ -116,17 +133,16 @@ ggplot_pca <- function(x,
|
||||
group_name <- calculations$group_name
|
||||
labels <- calculations$labels
|
||||
|
||||
stopifnot(length(choices) == 2)
|
||||
|
||||
# Append the proportion of explained variance to the axis labels
|
||||
if ((1 - as.integer(scale)) == 0) {
|
||||
u.axis.labs <- paste("Standardised PC", choices, sep = "")
|
||||
u.axis.labs <- paste0("Standardised PC", choices)
|
||||
} else {
|
||||
u.axis.labs <- paste("PC", choices, sep = "")
|
||||
u.axis.labs <- paste0("PC", choices)
|
||||
}
|
||||
u.axis.labs <- paste(u.axis.labs,
|
||||
paste0("\n(explained var: ",
|
||||
percentage(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2)), ")"))
|
||||
u.axis.labs <- paste0(u.axis.labs,
|
||||
paste0("\n(explained var: ",
|
||||
percentage(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2)),
|
||||
")"))
|
||||
|
||||
# Score Labels
|
||||
if (!is.null(labels)) {
|
||||
@ -138,7 +154,6 @@ ggplot_pca <- function(x,
|
||||
df.u$groups <- groups
|
||||
}
|
||||
|
||||
|
||||
# Base plot
|
||||
g <- ggplot2::ggplot(data = df.u,
|
||||
ggplot2::aes(x = xvar, y = yvar)) +
|
||||
@ -150,30 +165,25 @@ ggplot_pca <- function(x,
|
||||
# Draw either labels or points
|
||||
if (!is.null(df.u$labels)) {
|
||||
if (!is.null(df.u$groups)) {
|
||||
g <- g +
|
||||
ggplot2::geom_point(ggplot2::aes(colour = groups),
|
||||
alpha = points_alpha,
|
||||
size = points_size) +
|
||||
g <- g + ggplot2::geom_point(ggplot2::aes(colour = groups),
|
||||
alpha = points_alpha,
|
||||
size = points_size) +
|
||||
ggplot2::geom_text(ggplot2::aes(label = labels, colour = groups),
|
||||
nudge_y = -0.05,
|
||||
size = labels_textsize) +
|
||||
ggplot2::scale_colour_viridis_d() +
|
||||
ggplot2::labs(colour = group_name)
|
||||
} else {
|
||||
g <- g +
|
||||
ggplot2::geom_point(alpha = points_alpha,
|
||||
size = points_size) +
|
||||
g <- g + ggplot2::geom_point(alpha = points_alpha,
|
||||
size = points_size) +
|
||||
ggplot2::geom_text(ggplot2::aes(label = labels),
|
||||
nudge_y = -0.05,
|
||||
size = labels_textsize)
|
||||
}
|
||||
} else {
|
||||
if (!is.null(df.u$groups)) {
|
||||
g <- g +
|
||||
ggplot2::geom_point(ggplot2::aes(colour = groups),
|
||||
alpha = points_alpha,
|
||||
size = points_size) +
|
||||
ggplot2::scale_colour_viridis_d() +
|
||||
g <- g + ggplot2::geom_point(ggplot2::aes(colour = groups),
|
||||
alpha = points_alpha,
|
||||
size = points_size) +
|
||||
ggplot2::labs(colour = group_name)
|
||||
} else {
|
||||
g <- g + ggplot2::geom_point(alpha = points_alpha,
|
||||
@ -182,26 +192,24 @@ ggplot_pca <- function(x,
|
||||
}
|
||||
|
||||
# Overlay a concentration ellipse if there are groups
|
||||
if (!is.null(df.u$groups) & isTRUE(ellipse)) {
|
||||
g <- g +
|
||||
ggplot2::geom_path(data = ell,
|
||||
ggplot2::aes(colour = groups, group = groups),
|
||||
size = ellipse_size,
|
||||
alpha = points_alpha)
|
||||
if (!is.null(df.u$groups) & !is.null(ell) & isTRUE(ellipse)) {
|
||||
g <- g + ggplot2::geom_path(data = ell,
|
||||
ggplot2::aes(colour = groups, group = groups),
|
||||
size = ellipse_size,
|
||||
alpha = points_alpha)
|
||||
}
|
||||
|
||||
# Label the variable axes
|
||||
if (arrows == TRUE) {
|
||||
g <- g +
|
||||
ggplot2::geom_segment(data = df.v,
|
||||
ggplot2::aes(x = 0, y = 0, xend = xvar, yend = yvar),
|
||||
arrow = ggplot2::arrow(length = ggplot2::unit(0.5, "picas"),
|
||||
angle = 20,
|
||||
ends = "last",
|
||||
type = "open"),
|
||||
colour = arrows_colour,
|
||||
size = arrows_size,
|
||||
alpha = arrows_alpha) +
|
||||
g <- g + ggplot2::geom_segment(data = df.v,
|
||||
ggplot2::aes(x = 0, y = 0, xend = xvar, yend = yvar),
|
||||
arrow = ggplot2::arrow(length = ggplot2::unit(0.5, "picas"),
|
||||
angle = 20,
|
||||
ends = "last",
|
||||
type = "open"),
|
||||
colour = arrows_colour,
|
||||
size = arrows_size,
|
||||
alpha = arrows_alpha) +
|
||||
ggplot2::geom_text(data = df.v,
|
||||
ggplot2::aes(label = varname, x = xvar, y = yvar, angle = angle, hjust = hjust),
|
||||
colour = arrows_colour,
|
||||
@ -225,6 +233,7 @@ ggplot_pca <- function(x,
|
||||
}
|
||||
|
||||
#' @importFrom dplyr bind_rows
|
||||
#' @importFrom stats qchisq var
|
||||
pca_calculations <- function(pca_model,
|
||||
groups = NULL,
|
||||
groups_missing = TRUE,
|
||||
@ -232,6 +241,7 @@ pca_calculations <- function(pca_model,
|
||||
labels_missing = TRUE,
|
||||
choices = 1:2,
|
||||
scale = 1,
|
||||
pc.biplot = TRUE,
|
||||
ellipse_prob = 0.68,
|
||||
labels_text_placement = 1.5) {
|
||||
|
||||
@ -291,7 +301,9 @@ pca_calculations <- function(pca_model,
|
||||
names(df.u) <- c("xvar", "yvar")
|
||||
names(df.v) <- names(df.u)
|
||||
|
||||
df.u <- df.u * nobs.factor
|
||||
if (isTRUE(pc.biplot)) {
|
||||
df.u <- df.u * nobs.factor
|
||||
}
|
||||
|
||||
# Scale the radius of the correlation circle so that it corresponds to
|
||||
# a data ellipse for the standardized PC scores
|
||||
@ -314,8 +326,8 @@ pca_calculations <- function(pca_model,
|
||||
df.v$hjust <- with(df.v, (1 - labels_text_placement * sign(xvar)) / 2)
|
||||
|
||||
if (!is.null(df.u$groups)) {
|
||||
theta <<- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
|
||||
circle <<- cbind(cos(theta), sin(theta))
|
||||
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
|
||||
circle <- cbind(cos(theta), sin(theta))
|
||||
ell <- bind_rows(
|
||||
sapply(unique(df.u$groups), function(g, df = df.u) {
|
||||
x <- df[which(df$groups == g), , drop = FALSE]
|
||||
@ -325,18 +337,18 @@ pca_calculations <- function(pca_model,
|
||||
sigma <- var(cbind(x$xvar, x$yvar))
|
||||
mu <- c(mean(x$xvar), mean(x$yvar))
|
||||
ed <- sqrt(qchisq(ellipse_prob, df = 2))
|
||||
el <- data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = "+"),
|
||||
groups = x$groups[1])
|
||||
names(el)[1:2] <- c("xvar", "yvar")
|
||||
el
|
||||
data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = "+"),
|
||||
groups = x$groups[1])
|
||||
}))
|
||||
if (NROW(ell) == 0) {
|
||||
ell <- NULL
|
||||
} else {
|
||||
names(ell)[1:2] <- c("xvar", "yvar")
|
||||
}
|
||||
} else {
|
||||
ell <- NULL
|
||||
}
|
||||
|
||||
|
||||
list(nobs.factor = nobs.factor,
|
||||
d = d,
|
||||
u = u,
|
||||
@ -350,5 +362,4 @@ pca_calculations <- function(pca_model,
|
||||
group_name = group_name,
|
||||
labels = labels
|
||||
)
|
||||
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ globalVariables(c(".",
|
||||
"ab",
|
||||
"ab_txt",
|
||||
"abbreviations",
|
||||
"angle",
|
||||
"antibiotic",
|
||||
"antibiotics",
|
||||
"CNS_CPS",
|
||||
@ -40,6 +41,7 @@ globalVariables(c(".",
|
||||
"genus",
|
||||
"gramstain",
|
||||
"group",
|
||||
"hjust",
|
||||
"index",
|
||||
"input",
|
||||
"interpretation",
|
||||
@ -100,7 +102,10 @@ globalVariables(c(".",
|
||||
"txt",
|
||||
"uncertainty_level",
|
||||
"value",
|
||||
"varname",
|
||||
"x",
|
||||
"xdr",
|
||||
"xvar",
|
||||
"y",
|
||||
"year"))
|
||||
"year",
|
||||
"yvar"))
|
||||
|
60
R/pca.R
60
R/pca.R
@ -21,17 +21,18 @@
|
||||
|
||||
#' Principal Component Analysis (for AMR)
|
||||
#'
|
||||
#' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.
|
||||
#' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables.
|
||||
#' @inheritSection lifecycle Experimental lifecycle
|
||||
#' @param x a [data.frame] containing numeric columns
|
||||
#' @param ... columns of `x` to be selected for PCA
|
||||
#' @inheritParams stats::prcomp
|
||||
#' @details The [pca()] function takes a [data.frame] as input and performs the actual PCA with the R function [prcomp()].
|
||||
#' @details The [pca()] function takes a [data.frame] as input and performs the actual PCA with the \R function [prcomp()].
|
||||
#'
|
||||
#' The result of the [pca()] function is a [`prcomp`] object, with an additional attribute `non_numeric_cols` which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by [ggplot_pca()].
|
||||
#' @rdname pca
|
||||
#' @exportMethod prcomp.data.frame
|
||||
#' The result of the [pca()] function is a [prcomp] object, with an additional attribute `non_numeric_cols` which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by [ggplot_pca()].
|
||||
#' @return An object of classes [pca] and [prcomp]
|
||||
#' @importFrom stats prcomp
|
||||
#' @importFrom dplyr ungroup %>% filter_all all_vars
|
||||
#' @importFrom rlang enquos eval_tidy
|
||||
#' @export
|
||||
#' @examples
|
||||
#' # `example_isolates` is a dataset available in the AMR package.
|
||||
@ -52,40 +53,19 @@
|
||||
#' summary(pca_result)
|
||||
#' biplot(pca_result)
|
||||
#' ggplot_pca(pca_result) # a new and convenient plot function
|
||||
prcomp.data.frame <- function(x,
|
||||
...,
|
||||
retx = TRUE,
|
||||
center = TRUE,
|
||||
scale. = TRUE,
|
||||
tol = NULL,
|
||||
rank. = NULL) {
|
||||
pca <- function(x,
|
||||
...,
|
||||
retx = TRUE,
|
||||
center = TRUE,
|
||||
scale. = TRUE,
|
||||
tol = NULL,
|
||||
rank. = NULL) {
|
||||
|
||||
x <- pca_transform_x(x = x, ... = ...)
|
||||
pca_data <- x[, which(sapply(x, function(x) is.numeric(x)))]
|
||||
|
||||
message(blue(paste0("NOTE: Columns selected for PCA: ", paste0(bold(colnames(pca_data)), collapse = "/"),
|
||||
".\n Total observations available: ", nrow(pca_data), ".")))
|
||||
|
||||
stats:::prcomp.default(pca_data, retx = retx, center = center, scale. = scale., tol = tol, rank. = rank.)
|
||||
}
|
||||
|
||||
#' @rdname pca
|
||||
#' @export
|
||||
pca <- function(x, ...) {
|
||||
if (!is.data.frame(x)) {
|
||||
stop("this function only takes a data.frame as input")
|
||||
}
|
||||
pca_model <- prcomp(x, ...)
|
||||
|
||||
x <- pca_transform_x(x = x, ... = ...)
|
||||
attr(pca_model, "non_numeric_cols") <- x[, sapply(x, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE]
|
||||
pca_model
|
||||
}
|
||||
|
||||
#' @importFrom dplyr ungroup %>% filter_all all_vars
|
||||
#' @importFrom rlang enquos eval_tidy
|
||||
pca_transform_x <- function(x, ...) {
|
||||
# unset data.table, tbl_df, etc.
|
||||
# unset data.table, tibble, etc.
|
||||
# also removes groups made by dplyr::group_by
|
||||
x <- as.data.frame(x, stringsAsFactors = FALSE)
|
||||
x.bak <- x
|
||||
@ -123,7 +103,17 @@ pca_transform_x <- function(x, ...) {
|
||||
x <- cbind(x.bak[, sapply(x.bak, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE], x)
|
||||
}
|
||||
|
||||
x %>%
|
||||
x <- x %>%
|
||||
ungroup() %>% # would otherwise select the grouping vars
|
||||
filter_all(all_vars(!is.na(.)))
|
||||
|
||||
pca_data <- x[, which(sapply(x, function(x) is.numeric(x)))]
|
||||
|
||||
message(blue(paste0("NOTE: Columns selected for PCA: ", paste0(bold(colnames(pca_data)), collapse = "/"),
|
||||
".\n Total observations available: ", nrow(pca_data), ".")))
|
||||
|
||||
pca_model <- prcomp(pca_data, retx = retx, center = center, scale. = scale., tol = tol, rank. = rank.)
|
||||
attr(pca_model, "non_numeric_cols") <- x[, sapply(x, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE]
|
||||
class(pca_model) <- c("pca", class(pca_model))
|
||||
pca_model
|
||||
}
|
||||
|
@ -78,7 +78,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -78,7 +78,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -78,7 +78,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -78,7 +78,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -43,7 +43,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -78,7 +78,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@ -226,10 +226,14 @@
|
||||
|
||||
</div>
|
||||
|
||||
<div id="amr-1019001" class="section level1">
|
||||
<div id="amr-1019002" class="section level1">
|
||||
<h1 class="page-header">
|
||||
<a href="#amr-1019001" class="anchor"></a>AMR 1.0.1.9001<small> Unreleased </small>
|
||||
<a href="#amr-1019002" class="anchor"></a>AMR 1.0.1.9002<small> Unreleased </small>
|
||||
</h1>
|
||||
<div id="last-updated-08-mar-2020" class="section level2">
|
||||
<h2 class="hasAnchor">
|
||||
<a href="#last-updated-08-mar-2020" class="anchor"></a><small>Last updated: 08-Mar-2020</small>
|
||||
</h2>
|
||||
<div id="new" class="section level3">
|
||||
<h3 class="hasAnchor">
|
||||
<a href="#new" class="anchor"></a>New</h3>
|
||||
@ -238,6 +242,7 @@
|
||||
<li>Plotting biplots for principal component analysis using the new <code><a href="../reference/ggplot_pca.html">ggplot_pca()</a></code> function</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="amr-101" class="section level1">
|
||||
<h1 class="page-header">
|
||||
@ -1489,7 +1494,7 @@
|
||||
<div id="tocnav">
|
||||
<h2>Contents</h2>
|
||||
<ul class="nav nav-pills nav-stacked">
|
||||
<li><a href="#amr-1019001">1.0.1.9001</a></li>
|
||||
<li><a href="#amr-1019002">1.0.1.9002</a></li>
|
||||
<li><a href="#amr-101">1.0.1</a></li>
|
||||
<li><a href="#amr-100">1.0.0</a></li>
|
||||
<li><a href="#amr-090">0.9.0</a></li>
|
||||
|
@ -79,7 +79,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@ -236,14 +236,15 @@
|
||||
<span class='no'>x</span>,
|
||||
<span class='kw'>choices</span> <span class='kw'>=</span> <span class='fl'>1</span>:<span class='fl'>2</span>,
|
||||
<span class='kw'>scale</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
|
||||
<span class='kw'>pc.biplot</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
|
||||
<span class='kw'>labels</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
|
||||
<span class='kw'>labels_textsize</span> <span class='kw'>=</span> <span class='fl'>3</span>,
|
||||
<span class='kw'>labels_text_placement</span> <span class='kw'>=</span> <span class='fl'>1.5</span>,
|
||||
<span class='kw'>groups</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
|
||||
<span class='kw'>ellipse</span> <span class='kw'>=</span> <span class='fl'>FALSE</span>,
|
||||
<span class='kw'>ellipse</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
|
||||
<span class='kw'>ellipse_prob</span> <span class='kw'>=</span> <span class='fl'>0.68</span>,
|
||||
<span class='kw'>ellipse_size</span> <span class='kw'>=</span> <span class='fl'>0.5</span>,
|
||||
<span class='kw'>ellipse_alpha</span> <span class='kw'>=</span> <span class='fl'>0.25</span>,
|
||||
<span class='kw'>ellipse_alpha</span> <span class='kw'>=</span> <span class='fl'>0.5</span>,
|
||||
<span class='kw'>points_size</span> <span class='kw'>=</span> <span class='fl'>2</span>,
|
||||
<span class='kw'>points_alpha</span> <span class='kw'>=</span> <span class='fl'>0.25</span>,
|
||||
<span class='kw'>arrows</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
|
||||
@ -275,6 +276,14 @@
|
||||
<code><a href='https://rdrr.io/r/stats/princomp.html'>princomp</a></code>. Normally <code>0 <= scale <= 1</code>, and a warning
|
||||
will be issued if the specified <code>scale</code> is outside this range.</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>pc.biplot</th>
|
||||
<td><p>If true, use what Gabriel (1971) refers to as a "principal component
|
||||
biplot", with <code>lambda = 1</code> and observations scaled up by sqrt(n) and
|
||||
variables scaled down by sqrt(n). Then inner products between
|
||||
variables approximate covariances and distances between observations
|
||||
approximate Mahalanobis distance.</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>labels</th>
|
||||
<td><p>an optional vector of labels for the observations. If set, the labels will be placed below their respective points. When using the <code><a href='pca.html'>pca()</a></code> function as input for <code>x</code>, this will be determined automatically based on the attribute <code>non_numeric_cols</code>, see <code><a href='pca.html'>pca()</a></code>.</p></td>
|
||||
@ -352,13 +361,13 @@
|
||||
<li><p>Rewritten code to remove the dependency on packages <code>plyr</code>, <code>scales</code> and <code>grid</code></p></li>
|
||||
<li><p>Parametrised more options, like arrow and ellipse settings</p></li>
|
||||
<li><p>Added total amount of explained variance as a caption in the plot</p></li>
|
||||
<li><p>Cleaned all syntax based on the <code>lintr</code> package</p></li>
|
||||
<li><p>Cleaned all syntax based on the <code>lintr</code> package and added integrity checks</p></li>
|
||||
<li><p>Updated documentation</p></li>
|
||||
</ol>
|
||||
|
||||
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
|
||||
|
||||
<p>The default colours for labels and points is set with <code>scale_colour_viridis_d()</code>, but these can be changed by adding another scale for colour, like <code>scale_colour_brewer()</code>.</p>
|
||||
<p>The colours for labels and points can be changed by adding another scale layer for colour, like <code>scale_colour_viridis_d()</code> or <code>scale_colour_brewer()</code>.</p>
|
||||
<h2 class="hasAnchor" id="maturing-lifecycle"><a class="anchor" href="#maturing-lifecycle"></a>Maturing lifecycle</h2>
|
||||
|
||||
|
||||
|
@ -78,7 +78,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@ -403,7 +403,7 @@
|
||||
</tr><tr>
|
||||
|
||||
<td>
|
||||
<p><code><a href="pca.html">prcomp(<i><data.frame></i>)</a></code> <code><a href="pca.html">pca()</a></code> </p>
|
||||
<p><code><a href="pca.html">pca()</a></code> </p>
|
||||
</td>
|
||||
<td><p>Principal Component Analysis (for AMR)</p></td>
|
||||
</tr><tr>
|
||||
|
@ -6,7 +6,7 @@
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
<title>Principal Component Analysis (for AMR) — prcomp.data.frame • AMR (for R)</title>
|
||||
<title>Principal Component Analysis (for AMR) — pca • AMR (for R)</title>
|
||||
|
||||
<!-- favicons -->
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
|
||||
@ -44,8 +44,8 @@
|
||||
<link href="../extra.css" rel="stylesheet">
|
||||
<script src="../extra.js"></script>
|
||||
|
||||
<meta property="og:title" content="Principal Component Analysis (for AMR) — prcomp.data.frame" />
|
||||
<meta property="og:description" content="Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels." />
|
||||
<meta property="og:title" content="Principal Component Analysis (for AMR) — pca" />
|
||||
<meta property="og:description" content="Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables." />
|
||||
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png" />
|
||||
<meta name="twitter:card" content="summary" />
|
||||
|
||||
@ -79,7 +79,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@ -229,11 +229,10 @@
|
||||
</div>
|
||||
|
||||
<div class="ref-description">
|
||||
<p>Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.</p>
|
||||
<p>Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables.</p>
|
||||
</div>
|
||||
|
||||
<pre class="usage"><span class='co'># S3 method for data.frame</span>
|
||||
<span class='fu'><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a></span>(
|
||||
<pre class="usage"><span class='fu'>pca</span>(
|
||||
<span class='no'>x</span>,
|
||||
<span class='no'>...</span>,
|
||||
<span class='kw'>retx</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
|
||||
@ -241,9 +240,7 @@
|
||||
<span class='kw'>scale.</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
|
||||
<span class='kw'>tol</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
|
||||
<span class='kw'>rank.</span> <span class='kw'>=</span> <span class='kw'>NULL</span>
|
||||
)
|
||||
|
||||
<span class='fu'>pca</span>(<span class='no'>x</span>, <span class='no'>...</span>)</pre>
|
||||
)</pre>
|
||||
|
||||
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
||||
<table class="ref-arguments">
|
||||
@ -297,10 +294,13 @@
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
|
||||
|
||||
<p>An object of classes pca and <a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a></p>
|
||||
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
|
||||
|
||||
<p>The <code>pca()</code> function takes a <a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a> as input and performs the actual PCA with the R function <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp()</a></code>.</p>
|
||||
<p>The result of the <code>pca()</code> function is a <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a></code> object, with an additional attribute <code>non_numeric_cols</code> which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by <code><a href='ggplot_pca.html'>ggplot_pca()</a></code>.</p>
|
||||
<p>The <code>pca()</code> function takes a <a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a> as input and performs the actual PCA with the <span style="R">R</span> function <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp()</a></code>.</p>
|
||||
<p>The result of the <code>pca()</code> function is a <a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a> object, with an additional attribute <code>non_numeric_cols</code> which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by <code><a href='ggplot_pca.html'>ggplot_pca()</a></code>.</p>
|
||||
<h2 class="hasAnchor" id="experimental-lifecycle"><a class="anchor" href="#experimental-lifecycle"></a>Experimental lifecycle</h2>
|
||||
|
||||
|
||||
@ -332,6 +332,7 @@ The <a href='lifecycle.html'>lifecycle</a> of this function is <strong>experimen
|
||||
<h2>Contents</h2>
|
||||
<ul class="nav nav-pills nav-stacked">
|
||||
<li><a href="#arguments">Arguments</a></li>
|
||||
<li><a href="#value">Value</a></li>
|
||||
<li><a href="#details">Details</a></li>
|
||||
<li><a href="#experimental-lifecycle">Experimental lifecycle</a></li>
|
||||
<li><a href="#examples">Examples</a></li>
|
||||
|
@ -50,6 +50,14 @@ if [ -z "$3" ]; then
|
||||
git pull --tags --quiet
|
||||
current_tag=`git describe --tags --abbrev=0 | sed 's/v//'`
|
||||
current_commit=`git describe --tags | sed 's/.*-\(.*\)-.*/\1/'`
|
||||
if [ -z "current_tag" ]; then
|
||||
echo "FATAL - could not determine current tag"
|
||||
exit 1
|
||||
fi
|
||||
if [ -z "current_commit" ]; then
|
||||
echo "FATAL - could not determine last commit index number"
|
||||
exit 1
|
||||
fi
|
||||
# combine tag (e.g. 0.1.0) and commit number (like 40) increased by 9000 to indicate beta version
|
||||
new_version="$current_tag.$((current_commit + 9000))" # results in 0.1.0.9040
|
||||
if [ -z "$new_version" ]; then
|
||||
@ -99,6 +107,7 @@ echo
|
||||
echo "•••••••••••••••••••••••••"
|
||||
echo "• List of changed files •"
|
||||
echo "•••••••••••••••••••••••••"
|
||||
git add .
|
||||
git status --short
|
||||
echo
|
||||
read -p "Uploading version ${new_version}. Continue (Y/n)? " choice
|
||||
@ -111,7 +120,6 @@ echo
|
||||
echo "•••••••••••••••••••••••••••"
|
||||
echo "• Uploading to repository •"
|
||||
echo "•••••••••••••••••••••••••••"
|
||||
git add .
|
||||
git commit -a -m "(v$new_version) $1" --quiet
|
||||
git push --quiet
|
||||
echo "Comparison:"
|
||||
|
@ -11,7 +11,7 @@ As per their GPL-2 licence that demands documentation of code changes, the chang
|
||||
\item Rewritten code to remove the dependency on packages \code{plyr}, \code{scales} and \code{grid}
|
||||
\item Parametrised more options, like arrow and ellipse settings
|
||||
\item Added total amount of explained variance as a caption in the plot
|
||||
\item Cleaned all syntax based on the \code{lintr} package
|
||||
\item Cleaned all syntax based on the \code{lintr} package and added integrity checks
|
||||
\item Updated documentation
|
||||
}
|
||||
}
|
||||
@ -20,14 +20,15 @@ ggplot_pca(
|
||||
x,
|
||||
choices = 1:2,
|
||||
scale = TRUE,
|
||||
pc.biplot = TRUE,
|
||||
labels = NULL,
|
||||
labels_textsize = 3,
|
||||
labels_text_placement = 1.5,
|
||||
groups = NULL,
|
||||
ellipse = FALSE,
|
||||
ellipse = TRUE,
|
||||
ellipse_prob = 0.68,
|
||||
ellipse_size = 0.5,
|
||||
ellipse_alpha = 0.25,
|
||||
ellipse_alpha = 0.5,
|
||||
points_size = 2,
|
||||
points_alpha = 0.25,
|
||||
arrows = TRUE,
|
||||
@ -55,6 +56,14 @@ ggplot_pca(
|
||||
will be issued if the specified \code{scale} is outside this range.
|
||||
}
|
||||
|
||||
\item{pc.biplot}{
|
||||
If true, use what Gabriel (1971) refers to as a "principal component
|
||||
biplot", with \code{lambda = 1} and observations scaled up by sqrt(n) and
|
||||
variables scaled down by sqrt(n). Then inner products between
|
||||
variables approximate covariances and distances between observations
|
||||
approximate Mahalanobis distance.
|
||||
}
|
||||
|
||||
\item{labels}{an optional vector of labels for the observations. If set, the labels will be placed below their respective points. When using the \code{\link[=pca]{pca()}} function as input for \code{x}, this will be determined automatically based on the attribute \code{non_numeric_cols}, see \code{\link[=pca]{pca()}}.}
|
||||
|
||||
\item{labels_textsize}{the size of the text used for the labels}
|
||||
@ -93,7 +102,7 @@ ggplot_pca(
|
||||
This function is to produce a \code{ggplot2} variant of a so-called \href{https://en.wikipedia.org/wiki/Biplot}{biplot} for PCA (principal component analysis), but is more flexible and more appealing than the base \R \code{\link[=biplot]{biplot()}} function.
|
||||
}
|
||||
\details{
|
||||
The default colours for labels and points is set with \code{\link[=scale_colour_viridis_d]{scale_colour_viridis_d()}}, but these can be changed by adding another scale for colour, like \code{\link[=scale_colour_brewer]{scale_colour_brewer()}}.
|
||||
The colours for labels and points can be changed by adding another scale layer for colour, like \code{\link[=scale_colour_viridis_d]{scale_colour_viridis_d()}} or \code{\link[=scale_colour_brewer]{scale_colour_brewer()}}.
|
||||
}
|
||||
\section{Maturing lifecycle}{
|
||||
|
||||
|
16
man/pca.Rd
16
man/pca.Rd
@ -1,11 +1,10 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/pca.R
|
||||
\name{prcomp.data.frame}
|
||||
\alias{prcomp.data.frame}
|
||||
\name{pca}
|
||||
\alias{pca}
|
||||
\title{Principal Component Analysis (for AMR)}
|
||||
\usage{
|
||||
\method{prcomp}{data.frame}(
|
||||
pca(
|
||||
x,
|
||||
...,
|
||||
retx = TRUE,
|
||||
@ -14,8 +13,6 @@
|
||||
tol = NULL,
|
||||
rank. = NULL
|
||||
)
|
||||
|
||||
pca(x, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{a \link{data.frame} containing numeric columns}
|
||||
@ -51,13 +48,16 @@ pca(x, ...)
|
||||
alternative or in addition to \code{tol}, useful notably when the
|
||||
desired rank is considerably smaller than the dimensions of the matrix.}
|
||||
}
|
||||
\value{
|
||||
An object of classes \link{pca} and \link{prcomp}
|
||||
}
|
||||
\description{
|
||||
Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.
|
||||
Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables.
|
||||
}
|
||||
\details{
|
||||
The \code{\link[=pca]{pca()}} function takes a \link{data.frame} as input and performs the actual PCA with the R function \code{\link[=prcomp]{prcomp()}}.
|
||||
The \code{\link[=pca]{pca()}} function takes a \link{data.frame} as input and performs the actual PCA with the \R function \code{\link[=prcomp]{prcomp()}}.
|
||||
|
||||
The result of the \code{\link[=pca]{pca()}} function is a \code{\link{prcomp}} object, with an additional attribute \code{non_numeric_cols} which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by \code{\link[=ggplot_pca]{ggplot_pca()}}.
|
||||
The result of the \code{\link[=pca]{pca()}} function is a \link{prcomp} object, with an additional attribute \code{non_numeric_cols} which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by \code{\link[=ggplot_pca]{ggplot_pca()}}.
|
||||
}
|
||||
\section{Experimental lifecycle}{
|
||||
|
||||
|
@ -30,8 +30,9 @@ test_that("PCA works", {
|
||||
genus = mo_genus(mo)) %>% # and genus as we do here
|
||||
summarise_if(is.rsi, resistance, minimum = 0)
|
||||
|
||||
expect_s3_class(pca(resistance_data), "prcomp")
|
||||
expect_s3_class(prcomp(resistance_data), "prcomp")
|
||||
pca_model <- pca(resistance_data)
|
||||
|
||||
ggplot_pca(pca(resistance_data), ellipse = TRUE)
|
||||
expect_s3_class(pca_model, "pca")
|
||||
|
||||
ggplot_pca(pca_model, ellipse = TRUE)
|
||||
})
|
||||
|
Loading…
Reference in New Issue
Block a user