(v1.0.1.9002) PCA unit tests

2026-08-03 18:10:54 +02:00 · 2020-03-08 11:18:59 +01:00
parent 9fc858f208
commit 77656a676c
20 changed files with 182 additions and 135 deletions
--- a/2
+++ b/2
@@ -1,5 +1,5 @@
 Package: AMR
-Version: 1.0.1.9001
+Version: 1.0.1.9002
 Date: 2020-03-08
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
--- a/4
+++ b/4
@@ -37,7 +37,6 @@ S3method(pillar_shaft,rsi)
 S3method(plot,mic)
 S3method(plot,resistance_predict)
 S3method(plot,rsi)
-S3method(prcomp,data.frame)
 S3method(print,ab)
 S3method(print,bug_drug_combinations)
 S3method(print,catalogue_of_life_version)
@@ -227,7 +226,6 @@ exportMethods(kurtosis.default)
 exportMethods(kurtosis.matrix)
 exportMethods(plot.mic)
 exportMethods(plot.rsi)
-exportMethods(prcomp.data.frame)
 exportMethods(print.ab)
 exportMethods(print.bug_drug_combinations)
 exportMethods(print.catalogue_of_life_version)
@@ -329,6 +327,8 @@ importFrom(stats,lm)
 importFrom(stats,pchisq)
 importFrom(stats,prcomp)
 importFrom(stats,predict)
+importFrom(stats,qchisq)
+importFrom(stats,var)
 importFrom(tidyr,pivot_longer)
 importFrom(tidyr,pivot_wider)
 importFrom(utils,adist)
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,5 @@
-# AMR 1.0.1.9001
+# AMR 1.0.1.9002
+## <small>Last updated: 08-Mar-2020</small>

 ### New
 * Support for easy principal component analysis for AMR, using the new `pca()` function 
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -43,7 +43,7 @@ check_dataset_integrity <- function() {
                               "iv_ddd", "iv_units", "loinc") %in% colnames(antibiotics),
                             na.rm = TRUE)
  }, error = function(e)
-    stop('Please use the command \'library("AMR")\' before using this function, to load the needed reference data.', call. = FALSE)
+    stop('Please use the command \'library("AMR")\' before using this function, to load the required reference data.', call. = FALSE)
  )
  if (!check_microorganisms | !check_antibiotics) {
    stop("Data set `microorganisms` or data set `antibiotics` is overwritten by your global environment and prevents the AMR package from working correctly. Please rename your object before using this function.", call. = FALSE)
@@ -154,6 +154,13 @@ stopifnot_installed_package <- function(package) {
  return(invisible())
 }

+stopifnot_msg <- function(expr, msg) {
+  if (!isTRUE(expr)) {
+    stop(msg, call. = FALSE)
+  }
+}
+
+
 "%or%" <- function(x, y) {
  if (is.null(x) | is.null(y)) {
    if (is.null(x)) {
--- a/R/ggplot_pca.R
+++ b/R/ggplot_pca.R
@@ -49,9 +49,9 @@
 #' 1. Rewritten code to remove the dependency on packages `plyr`, `scales` and `grid`
 #' 2. Parametrised more options, like arrow and ellipse settings
 #' 3. Added total amount of explained variance as a caption in the plot
-#' 4. Cleaned all syntax based on the `lintr` package
+#' 4. Cleaned all syntax based on the `lintr` package and added integrity checks
 #' 5. Updated documentation
-#' @details The default colours for labels and points is set with [scale_colour_viridis_d()], but these can be changed by adding another scale for colour, like [scale_colour_brewer()].
+#' @details The colours for labels and points can be changed by adding another scale layer for colour, like [scale_colour_viridis_d()] or [scale_colour_brewer()].
 #' @rdname ggplot_pca
 #' @export
 #' @examples 
@@ -74,14 +74,15 @@
 ggplot_pca <- function(x,
                       choices = 1:2,
                       scale = TRUE,
+                       pc.biplot = TRUE,
                       labels = NULL,
                       labels_textsize = 3,
                       labels_text_placement = 1.5,
                       groups = NULL,
-                       ellipse = FALSE,
+                       ellipse = TRUE,
                       ellipse_prob = 0.68,
                       ellipse_size = 0.5,
-                       ellipse_alpha = 0.25,
+                       ellipse_alpha = 0.5,
                       points_size = 2,
                       points_alpha = 0.25,
                       arrows = TRUE,
@@ -93,6 +94,21 @@ ggplot_pca <- function(x,
                       ...) {
  
  stopifnot_installed_package("ggplot2")
+  stopifnot_msg(length(choices) == 2, "`choices` must be of length 2")
+  stopifnot_msg(is.logical(scale), "`scale` must be TRUE or FALSE")
+  stopifnot_msg(is.logical(pc.biplot), "`pc.biplot` must be TRUE or FALSE")
+  stopifnot_msg(is.numeric(choices), "`choices` must be numeric")
+  stopifnot_msg(is.numeric(labels_textsize), "`labels_textsize` must be numeric")
+  stopifnot_msg(is.numeric(labels_text_placement), "`labels_text_placement` must be numeric")
+  stopifnot_msg(is.logical(ellipse), "`ellipse` must be TRUE or FALSE")
+  stopifnot_msg(is.numeric(ellipse_prob), "`ellipse_prob` must be numeric")
+  stopifnot_msg(is.numeric(ellipse_size), "`ellipse_size` must be numeric")
+  stopifnot_msg(is.numeric(ellipse_alpha), "`ellipse_alpha` must be numeric")
+  stopifnot_msg(is.logical(arrows), "`arrows` must be TRUE or FALSE")
+  stopifnot_msg(is.numeric(arrows_size), "`arrows_size` must be numeric")
+  stopifnot_msg(is.numeric(arrows_textsize), "`arrows_textsize` must be numeric")
+  stopifnot_msg(is.numeric(arrows_alpha), "`arrows_alpha` must be numeric")
+  stopifnot_msg(is.numeric(base_textsize), "`base_textsize` must be numeric")
  
  calculations <- pca_calculations(pca_model = x,
                                   groups = groups, 
@@ -101,6 +117,7 @@ ggplot_pca <- function(x,
                                   labels_missing = missing(labels),
                                   choices = choices,
                                   scale = scale,
+                                   pc.biplot = pc.biplot,
                                   ellipse_prob = ellipse_prob,
                                   labels_text_placement = labels_text_placement)
  nobs.factor <- calculations$nobs.factor
@@ -116,17 +133,16 @@ ggplot_pca <- function(x,
  group_name <- calculations$group_name
  labels <- calculations$labels
  
-  stopifnot(length(choices) == 2)
-  
  # Append the proportion of explained variance to the axis labels
  if ((1 - as.integer(scale)) == 0) {
-    u.axis.labs <- paste("Standardised PC", choices, sep = "")
+    u.axis.labs <- paste0("Standardised PC", choices)
  } else {
-    u.axis.labs <- paste("PC", choices, sep = "")
+    u.axis.labs <- paste0("PC", choices)
  }
-  u.axis.labs <- paste(u.axis.labs,
-                       paste0("\n(explained var: ", 
-                              percentage(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2)), ")"))
+  u.axis.labs <- paste0(u.axis.labs,
+                        paste0("\n(explained var: ", 
+                               percentage(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2)),
+                               ")"))
  
  # Score Labels
  if (!is.null(labels)) {
@@ -138,7 +154,6 @@ ggplot_pca <- function(x,
    df.u$groups <- groups
  }
  
-  
  # Base plot
  g <- ggplot2::ggplot(data = df.u, 
                       ggplot2::aes(x = xvar, y = yvar)) + 
@@ -150,30 +165,25 @@ ggplot_pca <- function(x,
  # Draw either labels or points
  if (!is.null(df.u$labels)) {
    if (!is.null(df.u$groups)) {
-      g <- g + 
-        ggplot2::geom_point(ggplot2::aes(colour = groups),
-                            alpha = points_alpha, 
-                            size = points_size) +
+      g <- g + ggplot2::geom_point(ggplot2::aes(colour = groups),
+                                   alpha = points_alpha, 
+                                   size = points_size) +
        ggplot2::geom_text(ggplot2::aes(label = labels, colour = groups),
                           nudge_y = -0.05,
                           size = labels_textsize) +
-        ggplot2::scale_colour_viridis_d() +
        ggplot2::labs(colour = group_name)
    } else {
-      g <- g + 
-        ggplot2::geom_point(alpha = points_alpha, 
-                            size = points_size) +
+      g <- g + ggplot2::geom_point(alpha = points_alpha, 
+                                   size = points_size) +
        ggplot2::geom_text(ggplot2::aes(label = labels),
                           nudge_y = -0.05,
                           size = labels_textsize)      
    }
  } else {
    if (!is.null(df.u$groups)) {
-      g <- g + 
-        ggplot2::geom_point(ggplot2::aes(colour = groups),
-                            alpha = points_alpha,
-                            size = points_size) +
-        ggplot2::scale_colour_viridis_d() +
+      g <- g + ggplot2::geom_point(ggplot2::aes(colour = groups),
+                                   alpha = points_alpha,
+                                   size = points_size) +
        ggplot2::labs(colour = group_name)
    } else {
      g <- g + ggplot2::geom_point(alpha = points_alpha,
@@ -182,26 +192,24 @@ ggplot_pca <- function(x,
  }
  
  # Overlay a concentration ellipse if there are groups
-  if (!is.null(df.u$groups) & isTRUE(ellipse)) {
-    g <- g + 
-      ggplot2::geom_path(data = ell, 
-                         ggplot2::aes(colour = groups, group = groups),
-                         size = ellipse_size,
-                         alpha = points_alpha)
+  if (!is.null(df.u$groups) & !is.null(ell) & isTRUE(ellipse)) {
+    g <- g + ggplot2::geom_path(data = ell, 
+                                ggplot2::aes(colour = groups, group = groups),
+                                size = ellipse_size,
+                                alpha = points_alpha)
  }
  
  # Label the variable axes
  if (arrows == TRUE) {
-    g <- g + 
-      ggplot2::geom_segment(data = df.v,
-                            ggplot2::aes(x = 0, y = 0, xend = xvar, yend = yvar),
-                            arrow = ggplot2::arrow(length = ggplot2::unit(0.5, "picas"),
-                                                   angle = 20,
-                                                   ends = "last",
-                                                   type = "open"), 
-                            colour = arrows_colour, 
-                            size = arrows_size,
-                            alpha = arrows_alpha) +
+    g <- g + ggplot2::geom_segment(data = df.v,
+                                   ggplot2::aes(x = 0, y = 0, xend = xvar, yend = yvar),
+                                   arrow = ggplot2::arrow(length = ggplot2::unit(0.5, "picas"),
+                                                          angle = 20,
+                                                          ends = "last",
+                                                          type = "open"), 
+                                   colour = arrows_colour, 
+                                   size = arrows_size,
+                                   alpha = arrows_alpha) +
      ggplot2::geom_text(data = df.v, 
                         ggplot2::aes(label = varname, x = xvar, y = yvar, angle = angle, hjust = hjust), 
                         colour = arrows_colour,
@@ -225,6 +233,7 @@ ggplot_pca <- function(x,
 }

 #' @importFrom dplyr bind_rows
+#' @importFrom stats qchisq var
 pca_calculations <- function(pca_model,
                             groups = NULL,
                             groups_missing = TRUE,
@@ -232,6 +241,7 @@ pca_calculations <- function(pca_model,
                             labels_missing = TRUE,
                             choices = 1:2,
                             scale = 1,
+                             pc.biplot = TRUE,
                             ellipse_prob = 0.68,
                             labels_text_placement = 1.5) {
  
@@ -291,7 +301,9 @@ pca_calculations <- function(pca_model,
  names(df.u) <- c("xvar", "yvar")
  names(df.v) <- names(df.u)
  
-  df.u <- df.u * nobs.factor
+  if (isTRUE(pc.biplot)) {
+    df.u <- df.u * nobs.factor
+  }
  
  # Scale the radius of the correlation circle so that it corresponds to 
  # a data ellipse for the standardized PC scores
@@ -314,8 +326,8 @@ pca_calculations <- function(pca_model,
  df.v$hjust <- with(df.v, (1 - labels_text_placement * sign(xvar)) / 2)
  
  if (!is.null(df.u$groups)) {
-    theta <<- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
-    circle <<- cbind(cos(theta), sin(theta))
+    theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
+    circle <- cbind(cos(theta), sin(theta))
    ell <- bind_rows(
      sapply(unique(df.u$groups), function(g, df = df.u) {
        x <- df[which(df$groups == g), , drop = FALSE]
@@ -325,18 +337,18 @@ pca_calculations <- function(pca_model,
        sigma <- var(cbind(x$xvar, x$yvar))
        mu <- c(mean(x$xvar), mean(x$yvar))
        ed <- sqrt(qchisq(ellipse_prob, df = 2))
-        el <- data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = "+"), 
-                   groups = x$groups[1])
-        names(el)[1:2] <- c("xvar", "yvar")
-        el
+        data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = "+"), 
+                         groups = x$groups[1])
      }))
    if (NROW(ell) == 0) {
      ell <- NULL
+    } else {
+      names(ell)[1:2] <- c("xvar", "yvar")
    }
  } else {
    ell <- NULL
  }
-
+  
  list(nobs.factor = nobs.factor,
       d = d,
       u = u,
@@ -350,5 +362,4 @@ pca_calculations <- function(pca_model,
       group_name = group_name,
       labels = labels
  )
-  
 }
--- a/R/globals.R
+++ b/R/globals.R
@@ -24,6 +24,7 @@ globalVariables(c(".",
                  "ab",
                  "ab_txt",
                  "abbreviations",
+                  "angle",
                  "antibiotic",
                  "antibiotics",
                  "CNS_CPS",
@@ -40,6 +41,7 @@ globalVariables(c(".",
                  "genus",
                  "gramstain",
                  "group",
+                  "hjust",
                  "index",
                  "input",
                  "interpretation",
@@ -100,7 +102,10 @@ globalVariables(c(".",
                  "txt",
                  "uncertainty_level",
                  "value",
+                  "varname",
                  "x",
                  "xdr",
+                  "xvar",
                  "y",
-                  "year"))
+                  "year",
+                  "yvar"))
--- a/R/pca.R
+++ b/R/pca.R
@@ -21,17 +21,18 @@

 #' Principal Component Analysis (for AMR)
 #' 
-#' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.
+#' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables.
 #' @inheritSection lifecycle Experimental lifecycle
 #' @param x a [data.frame] containing numeric columns
 #' @param ... columns of `x` to be selected for PCA
 #' @inheritParams stats::prcomp
-#' @details The [pca()] function takes a [data.frame] as input and performs the actual PCA with the R function [prcomp()].
+#' @details The [pca()] function takes a [data.frame] as input and performs the actual PCA with the \R function [prcomp()].
 #' 
-#' The result of the [pca()] function is a [`prcomp`] object, with an additional attribute `non_numeric_cols` which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by [ggplot_pca()].
-#' @rdname pca
-#' @exportMethod prcomp.data.frame
+#' The result of the [pca()] function is a [prcomp] object, with an additional attribute `non_numeric_cols` which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by [ggplot_pca()].
+#' @return An object of classes [pca] and [prcomp]
 #' @importFrom stats prcomp
+#' @importFrom dplyr ungroup %>% filter_all all_vars
+#' @importFrom rlang enquos eval_tidy
 #' @export
 #' @examples 
 #' # `example_isolates` is a dataset available in the AMR package.
@@ -52,40 +53,19 @@
 #' summary(pca_result)
 #' biplot(pca_result)
 #' ggplot_pca(pca_result) # a new and convenient plot function
-prcomp.data.frame <- function(x,
-                              ...,
-                              retx = TRUE,
-                              center = TRUE, 
-                              scale. = TRUE,
-                              tol = NULL,
-                              rank. = NULL) {
+pca <- function(x,
+                ...,
+                retx = TRUE,
+                center = TRUE, 
+                scale. = TRUE,
+                tol = NULL,
+                rank. = NULL) {
  
-  x <- pca_transform_x(x = x, ... = ...)
-  pca_data <- x[, which(sapply(x, function(x) is.numeric(x)))]
-
-  message(blue(paste0("NOTE: Columns selected for PCA: ", paste0(bold(colnames(pca_data)), collapse = "/"),
-                      ".\n      Total observations available: ", nrow(pca_data), ".")))
-  
-  stats:::prcomp.default(pca_data, retx = retx, center = center, scale. = scale., tol = tol, rank. = rank.)
-}
-
-#' @rdname pca
-#' @export
-pca <- function(x, ...) {
  if (!is.data.frame(x)) {
    stop("this function only takes a data.frame as input")
  }
-  pca_model <- prcomp(x, ...)
  
-  x <- pca_transform_x(x = x, ... = ...)
-  attr(pca_model, "non_numeric_cols") <- x[, sapply(x, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE]
-  pca_model
-}
-
-#' @importFrom dplyr ungroup %>% filter_all all_vars
-#' @importFrom rlang enquos eval_tidy
-pca_transform_x <- function(x, ...) {
-  # unset data.table, tbl_df, etc.
+  # unset data.table, tibble, etc.
  # also removes groups made by dplyr::group_by
  x <- as.data.frame(x, stringsAsFactors = FALSE)
  x.bak <- x
@@ -123,7 +103,17 @@ pca_transform_x <- function(x, ...) {
    x <- cbind(x.bak[, sapply(x.bak, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE], x)
  }
  
-  x %>%
+  x <- x %>%
    ungroup() %>% # would otherwise select the grouping vars
    filter_all(all_vars(!is.na(.)))
+  
+  pca_data <- x[, which(sapply(x, function(x) is.numeric(x)))]
+  
+  message(blue(paste0("NOTE: Columns selected for PCA: ", paste0(bold(colnames(pca_data)), collapse = "/"),
+                      ".\n      Total observations available: ", nrow(pca_data), ".")))
+  
+  pca_model <- prcomp(pca_data, retx = retx, center = center, scale. = scale., tol = tol, rank. = rank.)
+  attr(pca_model, "non_numeric_cols") <- x[, sapply(x, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE]
+  class(pca_model) <- c("pca", class(pca_model))
+  pca_model
 }
--- a/docs/404.html
+++ b/docs/404.html
@@ -78,7 +78,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
      </span>
    </div>

--- a/docs/LICENSE-text.html
+++ b/docs/LICENSE-text.html
@@ -78,7 +78,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
      </span>
    </div>

--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -78,7 +78,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
      </span>
    </div>

--- a/docs/authors.html
+++ b/docs/authors.html
@@ -78,7 +78,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
      </span>
    </div>

--- a/docs/index.html
+++ b/docs/index.html
@@ -43,7 +43,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
      </span>
    </div>

--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -78,7 +78,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
      </span>
    </div>

@@ -226,10 +226,14 @@
      
    </div>

-    <div id="amr-1019001" class="section level1">
+    <div id="amr-1019002" class="section level1">
 <h1 class="page-header">
-<a href="#amr-1019001" class="anchor"></a>AMR 1.0.1.9001<small> Unreleased </small>
+<a href="#amr-1019002" class="anchor"></a>AMR 1.0.1.9002<small> Unreleased </small>
 </h1>
+<div id="last-updated-08-mar-2020" class="section level2">
+<h2 class="hasAnchor">
+<a href="#last-updated-08-mar-2020" class="anchor"></a><small>Last updated: 08-Mar-2020</small>
+</h2>
 <div id="new" class="section level3">
 <h3 class="hasAnchor">
 <a href="#new" class="anchor"></a>New</h3>
@@ -238,6 +242,7 @@
 <li>Plotting biplots for principal component analysis using the new <code><a href="../reference/ggplot_pca.html">ggplot_pca()</a></code> function</li>
 </ul>
 </div>
+</div>
 </div>
    <div id="amr-101" class="section level1">
 <h1 class="page-header">
@@ -1489,7 +1494,7 @@
    <div id="tocnav">
      <h2>Contents</h2>
      <ul class="nav nav-pills nav-stacked">
-        <li><a href="#amr-1019001">1.0.1.9001</a></li>
+        <li><a href="#amr-1019002">1.0.1.9002</a></li>
        <li><a href="#amr-101">1.0.1</a></li>
        <li><a href="#amr-100">1.0.0</a></li>
        <li><a href="#amr-090">0.9.0</a></li>
--- a/docs/reference/ggplot_pca.html
+++ b/docs/reference/ggplot_pca.html
@@ -79,7 +79,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
      </span>
    </div>

@@ -236,14 +236,15 @@
  <span class='no'>x</span>,
  <span class='kw'>choices</span> <span class='kw'>=</span> <span class='fl'>1</span>:<span class='fl'>2</span>,
  <span class='kw'>scale</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
+  <span class='kw'>pc.biplot</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
  <span class='kw'>labels</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
  <span class='kw'>labels_textsize</span> <span class='kw'>=</span> <span class='fl'>3</span>,
  <span class='kw'>labels_text_placement</span> <span class='kw'>=</span> <span class='fl'>1.5</span>,
  <span class='kw'>groups</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
-  <span class='kw'>ellipse</span> <span class='kw'>=</span> <span class='fl'>FALSE</span>,
+  <span class='kw'>ellipse</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
  <span class='kw'>ellipse_prob</span> <span class='kw'>=</span> <span class='fl'>0.68</span>,
  <span class='kw'>ellipse_size</span> <span class='kw'>=</span> <span class='fl'>0.5</span>,
-  <span class='kw'>ellipse_alpha</span> <span class='kw'>=</span> <span class='fl'>0.25</span>,
+  <span class='kw'>ellipse_alpha</span> <span class='kw'>=</span> <span class='fl'>0.5</span>,
  <span class='kw'>points_size</span> <span class='kw'>=</span> <span class='fl'>2</span>,
  <span class='kw'>points_alpha</span> <span class='kw'>=</span> <span class='fl'>0.25</span>,
  <span class='kw'>arrows</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
@@ -275,6 +276,14 @@
    <code><a href='https://rdrr.io/r/stats/princomp.html'>princomp</a></code>. Normally <code>0 &lt;= scale &lt;= 1</code>, and a warning
    will be issued if the specified <code>scale</code> is outside this range.</p></td>
    </tr>
+    <tr>
+      <th>pc.biplot</th>
+      <td><p>If true, use what Gabriel (1971) refers to as a "principal component
+    biplot", with <code>lambda = 1</code> and observations scaled up by sqrt(n) and
+    variables scaled down by sqrt(n).  Then inner products between
+    variables approximate covariances and distances between observations
+    approximate Mahalanobis distance.</p></td>
+    </tr>
    <tr>
      <th>labels</th>
      <td><p>an optional vector of labels for the observations. If set, the labels will be placed below their respective points. When using the <code><a href='pca.html'>pca()</a></code> function as input for <code>x</code>, this will be determined automatically based on the attribute <code>non_numeric_cols</code>, see <code><a href='pca.html'>pca()</a></code>.</p></td>
@@ -352,13 +361,13 @@
 <li><p>Rewritten code to remove the dependency on packages <code>plyr</code>, <code>scales</code> and <code>grid</code></p></li>
 <li><p>Parametrised more options, like arrow and ellipse settings</p></li>
 <li><p>Added total amount of explained variance as a caption in the plot</p></li>
-<li><p>Cleaned all syntax based on the <code>lintr</code> package</p></li>
+<li><p>Cleaned all syntax based on the <code>lintr</code> package and added integrity checks</p></li>
 <li><p>Updated documentation</p></li>
 </ol>

    <h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>

-    <p>The default colours for labels and points is set with <code>scale_colour_viridis_d()</code>, but these can be changed by adding another scale for colour, like <code>scale_colour_brewer()</code>.</p>
+    <p>The colours for labels and points can be changed by adding another scale layer for colour, like <code>scale_colour_viridis_d()</code> or <code>scale_colour_brewer()</code>.</p>
    <h2 class="hasAnchor" id="maturing-lifecycle"><a class="anchor" href="#maturing-lifecycle"></a>Maturing lifecycle</h2>

    
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -78,7 +78,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9001</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
      </span>
    </div>

@@ -403,7 +403,7 @@
      </tr><tr>
        
        <td>
-          <p><code><a href="pca.html">prcomp(<i>&lt;data.frame&gt;</i>)</a></code> <code><a href="pca.html">pca()</a></code> </p>
+          <p><code><a href="pca.html">pca()</a></code> </p>
        </td>
        <td><p>Principal Component Analysis (for AMR)</p></td>
      </tr><tr>
--- a/docs/reference/pca.html
+++ b/docs/reference/pca.html
@@ -6,7 +6,7 @@
 <meta http-equiv="X-UA-Compatible" content="IE=edge">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">

-<title>Principal Component Analysis (for AMR) — prcomp.data.frame • AMR (for R)</title>
+<title>Principal Component Analysis (for AMR) — pca • AMR (for R)</title>

 <!-- favicons -->
 <link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
@@ -44,8 +44,8 @@
  <link href="../extra.css" rel="stylesheet">
  <script src="../extra.js"></script>

-<meta property="og:title" content="Principal Component Analysis (for AMR) — prcomp.data.frame" />
-<meta property="og:description" content="Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels." />
+<meta property="og:title" content="Principal Component Analysis (for AMR) — pca" />
+<meta property="og:description" content="Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables." />
 <meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png" />
 <meta name="twitter:card" content="summary" />

@@ -79,7 +79,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.0.1.9002</span>
      </span>
    </div>

@@ -229,11 +229,10 @@
    </div>

    <div class="ref-description">
-    <p>Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.</p>
+    <p>Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables.</p>
    </div>

-    <pre class="usage"><span class='co'># S3 method for data.frame</span>
-<span class='fu'><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a></span>(
+    <pre class="usage"><span class='fu'>pca</span>(
  <span class='no'>x</span>,
  <span class='no'>...</span>,
  <span class='kw'>retx</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
@@ -241,9 +240,7 @@
  <span class='kw'>scale.</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
  <span class='kw'>tol</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
  <span class='kw'>rank.</span> <span class='kw'>=</span> <span class='kw'>NULL</span>
-)
-
-<span class='fu'>pca</span>(<span class='no'>x</span>, <span class='no'>...</span>)</pre>
+)</pre>

    <h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
    <table class="ref-arguments">
@@ -297,10 +294,13 @@
    </tr>
    </table>

+    <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
+
+    <p>An object of classes pca and <a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a></p>
    <h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>

-    <p>The <code>pca()</code> function takes a <a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a> as input and performs the actual PCA with the R function <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp()</a></code>.</p>
-<p>The result of the <code>pca()</code> function is a <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a></code> object, with an additional attribute <code>non_numeric_cols</code> which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by <code><a href='ggplot_pca.html'>ggplot_pca()</a></code>.</p>
+    <p>The <code>pca()</code> function takes a <a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a> as input and performs the actual PCA with the <span style="R">R</span> function <code><a href='https://rdrr.io/r/stats/prcomp.html'>prcomp()</a></code>.</p>
+<p>The result of the <code>pca()</code> function is a <a href='https://rdrr.io/r/stats/prcomp.html'>prcomp</a> object, with an additional attribute <code>non_numeric_cols</code> which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by <code><a href='ggplot_pca.html'>ggplot_pca()</a></code>.</p>
    <h2 class="hasAnchor" id="experimental-lifecycle"><a class="anchor" href="#experimental-lifecycle"></a>Experimental lifecycle</h2>

    
@@ -332,6 +332,7 @@ The <a href='lifecycle.html'>lifecycle</a> of this function is <strong>experimen
    <h2>Contents</h2>
    <ul class="nav nav-pills nav-stacked">
      <li><a href="#arguments">Arguments</a></li>
+      <li><a href="#value">Value</a></li>
      <li><a href="#details">Details</a></li>
      <li><a href="#experimental-lifecycle">Experimental lifecycle</a></li>
      <li><a href="#examples">Examples</a></li>
--- a/git_premaster.sh
+++ b/git_premaster.sh
@@ -50,6 +50,14 @@ if [ -z "$3" ]; then
  git pull --tags --quiet
  current_tag=`git describe --tags --abbrev=0 | sed 's/v//'`
  current_commit=`git describe --tags | sed 's/.*-\(.*\)-.*/\1/'`
+  if [ -z "current_tag" ]; then
+    echo "FATAL - could not determine current tag"
+    exit 1
+  fi
+  if [ -z "current_commit" ]; then
+    echo "FATAL - could not determine last commit index number"
+    exit 1
+  fi
  # combine tag (e.g. 0.1.0) and commit number (like 40) increased by 9000 to indicate beta version
  new_version="$current_tag.$((current_commit + 9000))" # results in 0.1.0.9040
  if [ -z "$new_version" ]; then
@@ -99,6 +107,7 @@ echo
 echo "•••••••••••••••••••••••••"
 echo "• List of changed files •"
 echo "•••••••••••••••••••••••••"
+git add .
 git status --short
 echo
 read -p "Uploading version ${new_version}. Continue (Y/n)? " choice
@@ -111,7 +120,6 @@ echo
 echo "•••••••••••••••••••••••••••"
 echo "• Uploading to repository •"
 echo "•••••••••••••••••••••••••••"
-git add .
 git commit -a -m "(v$new_version) $1" --quiet
 git push --quiet
 echo "Comparison:"
--- a/man/ggplot_pca.Rd
+++ b/man/ggplot_pca.Rd
@@ -11,7 +11,7 @@ As per their GPL-2 licence that demands documentation of code changes, the chang
 \item Rewritten code to remove the dependency on packages \code{plyr}, \code{scales} and \code{grid}
 \item Parametrised more options, like arrow and ellipse settings
 \item Added total amount of explained variance as a caption in the plot
-\item Cleaned all syntax based on the \code{lintr} package
+\item Cleaned all syntax based on the \code{lintr} package and added integrity checks
 \item Updated documentation
 }
 }
@@ -20,14 +20,15 @@ ggplot_pca(
  x,
  choices = 1:2,
  scale = TRUE,
+  pc.biplot = TRUE,
  labels = NULL,
  labels_textsize = 3,
  labels_text_placement = 1.5,
  groups = NULL,
-  ellipse = FALSE,
+  ellipse = TRUE,
  ellipse_prob = 0.68,
  ellipse_size = 0.5,
-  ellipse_alpha = 0.25,
+  ellipse_alpha = 0.5,
  points_size = 2,
  points_alpha = 0.25,
  arrows = TRUE,
@@ -55,6 +56,14 @@ ggplot_pca(
    will be issued if the specified \code{scale} is outside this range.
  }

+\item{pc.biplot}{
+    If true, use what Gabriel (1971) refers to as a "principal component
+    biplot", with \code{lambda = 1} and observations scaled up by sqrt(n) and
+    variables scaled down by sqrt(n).  Then inner products between
+    variables approximate covariances and distances between observations
+    approximate Mahalanobis distance.
+  }
+
 \item{labels}{an optional vector of labels for the observations. If set, the labels will be placed below their respective points. When using the \code{\link[=pca]{pca()}} function as input for \code{x}, this will be determined automatically based on the attribute \code{non_numeric_cols}, see \code{\link[=pca]{pca()}}.}

 \item{labels_textsize}{the size of the text used for the labels}
@@ -93,7 +102,7 @@ ggplot_pca(
 This function is to produce a \code{ggplot2} variant of a so-called \href{https://en.wikipedia.org/wiki/Biplot}{biplot} for PCA (principal component analysis), but is more flexible and more appealing than the base \R \code{\link[=biplot]{biplot()}} function.
 }
 \details{
-The default colours for labels and points is set with \code{\link[=scale_colour_viridis_d]{scale_colour_viridis_d()}}, but these can be changed by adding another scale for colour, like \code{\link[=scale_colour_brewer]{scale_colour_brewer()}}.
+The colours for labels and points can be changed by adding another scale layer for colour, like \code{\link[=scale_colour_viridis_d]{scale_colour_viridis_d()}} or \code{\link[=scale_colour_brewer]{scale_colour_brewer()}}.
 }
 \section{Maturing lifecycle}{

--- a/man/pca.Rd
+++ b/man/pca.Rd
@@ -1,11 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/pca.R
-\name{prcomp.data.frame}
-\alias{prcomp.data.frame}
+\name{pca}
 \alias{pca}
 \title{Principal Component Analysis (for AMR)}
 \usage{
-\method{prcomp}{data.frame}(
+pca(
  x,
  ...,
  retx = TRUE,
@@ -14,8 +13,6 @@
  tol = NULL,
  rank. = NULL
 )
-
-pca(x, ...)
 }
 \arguments{
 \item{x}{a \link{data.frame} containing numeric columns}
@@ -51,13 +48,16 @@ pca(x, ...)
    alternative or in addition to \code{tol}, useful notably when the
    desired rank is considerably smaller than the dimensions of the matrix.}
 }
+\value{
+An object of classes \link{pca} and \link{prcomp}
+}
 \description{
-Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels.
+Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables.
 }
 \details{
-The \code{\link[=pca]{pca()}} function takes a \link{data.frame} as input and performs the actual PCA with the R function \code{\link[=prcomp]{prcomp()}}.
+The \code{\link[=pca]{pca()}} function takes a \link{data.frame} as input and performs the actual PCA with the \R function \code{\link[=prcomp]{prcomp()}}.

-The result of the \code{\link[=pca]{pca()}} function is a \code{\link{prcomp}} object, with an additional attribute \code{non_numeric_cols} which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by \code{\link[=ggplot_pca]{ggplot_pca()}}.
+The result of the \code{\link[=pca]{pca()}} function is a \link{prcomp} object, with an additional attribute \code{non_numeric_cols} which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by \code{\link[=ggplot_pca]{ggplot_pca()}}.
 }
 \section{Experimental lifecycle}{

--- a/tests/testthat/test-pca.R
+++ b/tests/testthat/test-pca.R
@@ -30,8 +30,9 @@ test_that("PCA works", {
             genus = mo_genus(mo)) %>%   #  and genus as we do here
    summarise_if(is.rsi, resistance, minimum = 0)
  
-  expect_s3_class(pca(resistance_data), "prcomp")
-  expect_s3_class(prcomp(resistance_data), "prcomp")
+  pca_model <- pca(resistance_data)
  
-  ggplot_pca(pca(resistance_data), ellipse = TRUE)
+  expect_s3_class(pca_model, "pca")
+  
+  ggplot_pca(pca_model, ellipse = TRUE)
 })