2020-03-07 21:48:21 +01:00
# ==================================================================== #
2023-07-08 17:30:05 +02:00
# TITLE: #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2020-03-07 21:48:21 +01:00
# #
2023-07-08 17:30:05 +02:00
# SOURCE CODE: #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2020-03-07 21:48:21 +01:00
# #
2023-07-08 17:30:05 +02:00
# PLEASE CITE THIS SOFTWARE AS: #
2024-07-16 14:51:57 +02:00
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
# Journal of Statistical Software, 104(3), 1-31. #
2023-05-27 10:39:22 +02:00
# https://doi.org/10.18637/jss.v104.i03 #
2022-10-05 09:12:22 +02:00
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2020-03-07 21:48:21 +01:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
2021-02-02 23:57:35 +01:00
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2020-03-07 21:48:21 +01:00
# ==================================================================== #
2021-01-18 16:57:56 +01:00
#' PCA Biplot with `ggplot2`
2020-03-07 21:48:21 +01:00
#'
2020-04-13 21:09:56 +02:00
#' Produces a `ggplot2` variant of a so-called [biplot](https://en.wikipedia.org/wiki/Biplot) for PCA (principal component analysis), but is more flexible and more appealing than the base \R [biplot()] function.
2020-03-07 21:48:21 +01:00
#' @param x an object returned by [pca()], [prcomp()] or [princomp()]
#' @inheritParams stats::biplot.prcomp
#' @param labels an optional vector of labels for the observations. If set, the labels will be placed below their respective points. When using the [pca()] function as input for `x`, this will be determined automatically based on the attribute `non_numeric_cols`, see [pca()].
#' @param labels_textsize the size of the text used for the labels
#' @param labels_text_placement adjustment factor the placement of the variable names (`>=1` means further away from the arrow head)
#' @param groups an optional vector of groups for the labels, with the same length as `labels`. If set, the points and labels will be coloured according to these groups. When using the [pca()] function as input for `x`, this will be determined automatically based on the attribute `non_numeric_cols`, see [pca()].
2021-05-12 18:15:03 +02:00
#' @param ellipse a [logical] to indicate whether a normal data ellipse should be drawn for each group (set with `groups`)
2020-03-07 21:48:21 +01:00
#' @param ellipse_prob statistical size of the ellipse in normal probability
#' @param ellipse_size the size of the ellipse line
#' @param ellipse_alpha the alpha (transparency) of the ellipse line
2020-03-08 09:12:11 +01:00
#' @param points_size the size of the points
2020-03-07 21:48:21 +01:00
#' @param points_alpha the alpha (transparency) of the points
2021-05-12 18:15:03 +02:00
#' @param arrows a [logical] to indicate whether arrows should be drawn
2020-03-07 21:48:21 +01:00
#' @param arrows_textsize the size of the text for variable names
#' @param arrows_colour the colour of the arrow and their text
#' @param arrows_size the size (thickness) of the arrow lines
#' @param arrows_textsize the size of the text at the end of the arrows
2021-05-12 18:15:03 +02:00
#' @param arrows_textangled a [logical] whether the text at the end of the arrows should be angled
2020-03-07 21:48:21 +01:00
#' @param arrows_alpha the alpha (transparency) of the arrows and their text
#' @param base_textsize the text size for all plot elements except the labels and arrows
2021-04-29 17:16:30 +02:00
#' @param ... arguments passed on to functions
2020-03-08 09:12:11 +01:00
#' @source The [ggplot_pca()] function is based on the `ggbiplot()` function from the `ggbiplot` package by Vince Vu, as found on GitHub: <https://github.com/vqv/ggbiplot> (retrieved: 2 March 2020, their latest commit: [`7325e88`](https://github.com/vqv/ggbiplot/commit/7325e880485bea4c07465a0304c470608fffb5d9); 12 February 2015).
2022-08-28 10:31:50 +02:00
#'
#' As per their GPL-2 licence that demands documentation of code changes, the changes made based on the source code were:
2020-03-07 21:48:21 +01:00
#' 1. Rewritten code to remove the dependency on packages `plyr`, `scales` and `grid`
#' 2. Parametrised more options, like arrow and ellipse settings
2020-12-22 00:51:17 +01:00
#' 3. Hardened all input possibilities by defining the exact type of user input for every argument
2020-10-19 17:09:19 +02:00
#' 4. Added total amount of explained variance as a caption in the plot
#' 5. Cleaned all syntax based on the `lintr` package, fixed grammatical errors and added integrity checks
#' 6. Updated documentation
2021-03-15 07:23:21 +01:00
#' @details The colours for labels and points can be changed by adding another scale layer for colour, such as `scale_colour_viridis_d()` and `scale_colour_brewer()`.
2020-03-07 21:48:21 +01:00
#' @rdname ggplot_pca
#' @export
2022-08-28 10:31:50 +02:00
#' @examples
2021-01-24 14:48:56 +01:00
#' # `example_isolates` is a data set available in the AMR package.
2020-03-07 21:48:21 +01:00
#' # See ?example_isolates.
#'
2021-05-24 09:00:11 +02:00
#' \donttest{
2020-09-29 23:35:46 +02:00
#' if (require("dplyr")) {
2022-08-28 10:31:50 +02:00
#' # calculate the resistance per group first
#' resistance_data <- example_isolates %>%
#' group_by(
#' order = mo_order(mo), # group on anything, like order
#' genus = mo_genus(mo)
#' ) %>% # and genus as we do here;
#' filter(n() >= 30) %>% # filter on only 30 results per group
2023-01-21 23:47:20 +01:00
#' summarise_if(is.sir, resistance) # then get resistance of all drugs
2022-08-28 10:31:50 +02:00
#'
2022-11-13 13:44:25 +01:00
#' # now conduct PCA for certain antimicrobial drugs
2022-08-28 10:31:50 +02:00
#' pca_result <- resistance_data %>%
#' pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT)
#'
2022-08-21 16:37:20 +02:00
#' summary(pca_result)
2022-08-28 10:31:50 +02:00
#'
2022-08-21 16:37:20 +02:00
#' # old base R plotting method:
2023-12-03 11:34:48 +01:00
#' biplot(pca_result, main = "Base R biplot")
2022-08-28 10:31:50 +02:00
#'
2022-11-05 09:31:19 +01:00
#' # new ggplot2 plotting method using this package:
2020-07-28 18:39:57 +02:00
#' if (require("ggplot2")) {
2023-12-03 11:34:48 +01:00
#' ggplot_pca(pca_result) +
#' labs(title = "ggplot2 biplot")
#' }
#' if (require("ggplot2")) {
2022-11-05 09:31:19 +01:00
#' # still extendible with any ggplot2 function
2022-08-21 16:37:20 +02:00
#' ggplot_pca(pca_result) +
2020-07-28 18:39:57 +02:00
#' scale_colour_viridis_d() +
2023-12-03 11:34:48 +01:00
#' labs(title = "ggplot2 biplot")
2020-07-28 18:39:57 +02:00
#' }
2020-05-16 13:05:47 +02:00
#' }
2021-05-24 09:00:11 +02:00
#' }
2020-03-07 21:48:21 +01:00
ggplot_pca <- function ( x ,
choices = 1 : 2 ,
2020-10-19 17:09:19 +02:00
scale = 1 ,
2020-03-08 11:18:59 +01:00
pc.biplot = TRUE ,
2020-03-07 21:48:21 +01:00
labels = NULL ,
labels_textsize = 3 ,
labels_text_placement = 1.5 ,
groups = NULL ,
2020-03-08 11:18:59 +01:00
ellipse = TRUE ,
2020-03-07 21:48:21 +01:00
ellipse_prob = 0.68 ,
ellipse_size = 0.5 ,
2020-03-08 11:18:59 +01:00
ellipse_alpha = 0.5 ,
2020-03-07 21:48:21 +01:00
points_size = 2 ,
points_alpha = 0.25 ,
arrows = TRUE ,
arrows_colour = " darkblue" ,
arrows_size = 0.5 ,
arrows_textsize = 3 ,
2020-07-28 18:39:57 +02:00
arrows_textangled = TRUE ,
2020-03-07 21:48:21 +01:00
arrows_alpha = 0.75 ,
base_textsize = 10 ,
... ) {
2020-06-22 11:18:40 +02:00
stop_ifnot_installed ( " ggplot2" )
2020-10-19 17:09:19 +02:00
meet_criteria ( x , allow_class = c ( " prcomp" , " princomp" , " PCA" , " lda" ) )
2021-01-24 14:48:56 +01:00
meet_criteria ( choices , allow_class = c ( " numeric" , " integer" ) , has_length = 2 , is_positive = TRUE , is_finite = TRUE )
2020-10-19 17:09:19 +02:00
meet_criteria ( scale , allow_class = c ( " numeric" , " integer" , " logical" ) , has_length = 1 )
meet_criteria ( pc.biplot , allow_class = " logical" , has_length = 1 )
meet_criteria ( labels , allow_class = " character" , allow_NULL = TRUE )
2021-01-24 14:48:56 +01:00
meet_criteria ( labels_textsize , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
meet_criteria ( labels_text_placement , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
2020-10-19 17:09:19 +02:00
meet_criteria ( groups , allow_class = " character" , allow_NULL = TRUE )
meet_criteria ( ellipse , allow_class = " logical" , has_length = 1 )
2021-01-24 14:48:56 +01:00
meet_criteria ( ellipse_prob , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
meet_criteria ( ellipse_size , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
meet_criteria ( ellipse_alpha , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
meet_criteria ( points_size , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
meet_criteria ( points_alpha , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
2020-10-19 17:09:19 +02:00
meet_criteria ( arrows , allow_class = " logical" , has_length = 1 )
meet_criteria ( arrows_colour , allow_class = " character" , has_length = 1 )
2021-01-24 14:48:56 +01:00
meet_criteria ( arrows_size , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
meet_criteria ( arrows_textsize , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
2020-10-19 17:09:19 +02:00
meet_criteria ( arrows_textangled , allow_class = " logical" , has_length = 1 )
2021-01-24 14:48:56 +01:00
meet_criteria ( arrows_alpha , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
meet_criteria ( base_textsize , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive = TRUE , is_finite = TRUE )
2022-08-28 10:31:50 +02:00
calculations <- pca_calculations (
pca_model = x ,
groups = groups ,
groups_missing = missing ( groups ) ,
labels = labels ,
labels_missing = missing ( labels ) ,
choices = choices ,
scale = scale ,
pc.biplot = pc.biplot ,
ellipse_prob = ellipse_prob ,
labels_text_placement = labels_text_placement
)
2020-03-07 21:48:21 +01:00
choices <- calculations $ choices
df.u <- calculations $ df.u
df.v <- calculations $ df.v
ell <- calculations $ ell
groups <- calculations $ groups
group_name <- calculations $ group_name
labels <- calculations $ labels
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
# Append the proportion of explained variance to the axis labels
if ( ( 1 - as.integer ( scale ) ) == 0 ) {
2020-03-08 11:18:59 +01:00
u.axis.labs <- paste0 ( " Standardised PC" , choices )
2020-03-07 21:48:21 +01:00
} else {
2020-03-08 11:18:59 +01:00
u.axis.labs <- paste0 ( " PC" , choices )
2020-03-07 21:48:21 +01:00
}
2022-08-28 10:31:50 +02:00
u.axis.labs <- paste0 (
u.axis.labs ,
paste0 (
" \n(explained var: " ,
percentage ( x $ sdev [choices ] ^2 / sum ( x $ sdev ^2 ) ) ,
" )"
)
)
2020-03-07 21:48:21 +01:00
# Score Labels
if ( ! is.null ( labels ) ) {
df.u $ labels <- labels
}
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
# Grouping variable
if ( ! is.null ( groups ) ) {
df.u $ groups <- groups
}
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
# Base plot
2022-08-28 10:31:50 +02:00
g <- ggplot2 :: ggplot (
data = df.u ,
ggplot2 :: aes ( x = xvar , y = yvar )
) +
ggplot2 :: xlab ( u.axis.labs [1 ] ) +
ggplot2 :: ylab ( u.axis.labs [2 ] ) +
ggplot2 :: expand_limits (
x = c ( -1.15 , 1.15 ) ,
y = c ( -1.15 , 1.15 )
)
2020-03-07 21:48:21 +01:00
# Draw either labels or points
if ( ! is.null ( df.u $ labels ) ) {
if ( ! is.null ( df.u $ groups ) ) {
2020-03-08 11:18:59 +01:00
g <- g + ggplot2 :: geom_point ( ggplot2 :: aes ( colour = groups ) ,
2022-08-28 10:31:50 +02:00
alpha = points_alpha ,
size = points_size
) +
2020-03-07 21:48:21 +01:00
ggplot2 :: geom_text ( ggplot2 :: aes ( label = labels , colour = groups ) ,
2022-08-28 10:31:50 +02:00
nudge_y = -0.05 ,
size = labels_textsize
) +
2020-03-07 21:48:21 +01:00
ggplot2 :: labs ( colour = group_name )
} else {
2022-08-28 10:31:50 +02:00
g <- g + ggplot2 :: geom_point (
alpha = points_alpha ,
size = points_size
) +
2020-03-07 21:48:21 +01:00
ggplot2 :: geom_text ( ggplot2 :: aes ( label = labels ) ,
2022-08-28 10:31:50 +02:00
nudge_y = -0.05 ,
size = labels_textsize
)
2020-03-07 21:48:21 +01:00
}
} else {
if ( ! is.null ( df.u $ groups ) ) {
2020-03-08 11:18:59 +01:00
g <- g + ggplot2 :: geom_point ( ggplot2 :: aes ( colour = groups ) ,
2022-08-28 10:31:50 +02:00
alpha = points_alpha ,
size = points_size
) +
2020-03-07 21:48:21 +01:00
ggplot2 :: labs ( colour = group_name )
} else {
2022-08-28 10:31:50 +02:00
g <- g + ggplot2 :: geom_point (
alpha = points_alpha ,
size = points_size
)
2020-03-07 21:48:21 +01:00
}
}
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
# Overlay a concentration ellipse if there are groups
2022-10-05 09:12:22 +02:00
if ( ! is.null ( df.u $ groups ) && ! is.null ( ell ) && isTRUE ( ellipse ) ) {
2022-08-28 10:31:50 +02:00
g <- g + ggplot2 :: geom_path (
data = ell ,
ggplot2 :: aes ( colour = groups , group = groups ) ,
size = ellipse_size ,
alpha = points_alpha
)
2020-03-07 21:48:21 +01:00
}
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
# Label the variable axes
if ( arrows == TRUE ) {
2022-08-28 10:31:50 +02:00
g <- g + ggplot2 :: geom_segment (
data = df.v ,
ggplot2 :: aes ( x = 0 , y = 0 , xend = xvar , yend = yvar ) ,
arrow = ggplot2 :: arrow (
length = ggplot2 :: unit ( 0.5 , " picas" ) ,
angle = 20 ,
ends = " last" ,
type = " open"
) ,
colour = arrows_colour ,
size = arrows_size ,
alpha = arrows_alpha
)
2020-07-28 18:39:57 +02:00
if ( arrows_textangled == TRUE ) {
2022-08-28 10:31:50 +02:00
g <- g + ggplot2 :: geom_text (
data = df.v ,
ggplot2 :: aes ( label = varname , x = xvar , y = yvar , angle = angle , hjust = hjust ) ,
colour = arrows_colour ,
size = arrows_textsize ,
alpha = arrows_alpha
)
2020-07-28 18:39:57 +02:00
} else {
2022-08-28 10:31:50 +02:00
g <- g + ggplot2 :: geom_text (
data = df.v ,
ggplot2 :: aes ( label = varname , x = xvar , y = yvar , hjust = hjust ) ,
colour = arrows_colour ,
size = arrows_textsize ,
alpha = arrows_alpha
)
2020-07-28 18:39:57 +02:00
}
2020-03-07 21:48:21 +01:00
}
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
# Add caption label about total explained variance
2022-08-28 10:31:50 +02:00
g <- g + ggplot2 :: labs ( caption = paste0 (
" Total explained variance: " ,
percentage ( sum ( x $ sdev [choices ] ^2 / sum ( x $ sdev ^2 ) ) )
) )
2020-03-07 21:48:21 +01:00
# mark-up nicely
g <- g + ggplot2 :: theme_minimal ( base_size = base_textsize ) +
2022-08-28 10:31:50 +02:00
ggplot2 :: theme (
panel.grid.major = ggplot2 :: element_line ( colour = " grey85" ) ,
panel.grid.minor = ggplot2 :: element_blank ( ) ,
# centre title and subtitle
plot.title = ggplot2 :: element_text ( hjust = 0.5 ) ,
plot.subtitle = ggplot2 :: element_text ( hjust = 0.5 )
)
2020-03-07 21:48:21 +01:00
g
}
2020-03-08 11:18:59 +01:00
#' @importFrom stats qchisq var
2020-03-07 21:48:21 +01:00
pca_calculations <- function ( pca_model ,
groups = NULL ,
groups_missing = TRUE ,
labels = NULL ,
labels_missing = TRUE ,
choices = 1 : 2 ,
scale = 1 ,
2020-03-08 11:18:59 +01:00
pc.biplot = TRUE ,
2020-03-07 21:48:21 +01:00
ellipse_prob = 0.68 ,
labels_text_placement = 1.5 ) {
non_numeric_cols <- attributes ( pca_model ) $ non_numeric_cols
if ( groups_missing ) {
groups <- tryCatch ( non_numeric_cols [ [1 ] ] ,
2022-08-28 10:31:50 +02:00
error = function ( e ) NULL
)
2020-03-07 21:48:21 +01:00
group_name <- tryCatch ( colnames ( non_numeric_cols [1 ] ) ,
2022-08-28 10:31:50 +02:00
error = function ( e ) NULL
)
2020-03-07 21:48:21 +01:00
}
if ( labels_missing ) {
labels <- tryCatch ( non_numeric_cols [ [2 ] ] ,
2022-08-28 10:31:50 +02:00
error = function ( e ) NULL
)
2020-03-07 21:48:21 +01:00
}
2022-10-05 09:12:22 +02:00
if ( ! is.null ( groups ) && is.null ( labels ) ) {
2020-03-07 21:48:21 +01:00
# turn them around
labels <- groups
groups <- NULL
group_name <- NULL
}
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
# Recover the SVD
if ( inherits ( pca_model , " prcomp" ) ) {
nobs.factor <- sqrt ( nrow ( pca_model $ x ) - 1 )
d <- pca_model $ sdev
u <- sweep ( pca_model $ x , 2 , 1 / ( d * nobs.factor ) , FUN = " *" )
v <- pca_model $ rotation
} else if ( inherits ( pca_model , " princomp" ) ) {
nobs.factor <- sqrt ( pca_model $ n.obs )
d <- pca_model $ sdev
u <- sweep ( pca_model $ scores , 2 , 1 / ( d * nobs.factor ) , FUN = " *" )
v <- pca_model $ loadings
} else if ( inherits ( pca_model , " PCA" ) ) {
nobs.factor <- sqrt ( nrow ( pca_model $ call $ X ) )
d <- unlist ( sqrt ( pca_model $ eig ) [1 ] )
u <- sweep ( pca_model $ ind $ coord , 2 , 1 / ( d * nobs.factor ) , FUN = " *" )
v <- sweep ( pca_model $ var $ coord , 2 , sqrt ( pca_model $ eig [seq_len ( ncol ( pca_model $ var $ coord ) ) , 1 ] ) , FUN = " /" )
} else if ( inherits ( pca_model , " lda" ) ) {
nobs.factor <- sqrt ( pca_model $ N )
d <- pca_model $ svd
u <- predict ( pca_model ) $ x / nobs.factor
v <- pca_model $ scaling
} else {
2020-10-19 17:09:19 +02:00
stop ( " Expected an object of class prcomp, princomp, PCA, or lda" )
2020-03-07 21:48:21 +01:00
}
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
# Scores
choices <- pmin ( choices , ncol ( u ) )
obs.scale <- 1 - as.integer ( scale )
2022-08-28 10:31:50 +02:00
df.u <- as.data.frame ( sweep ( u [ , choices ] , 2 , d [choices ] ^obs.scale , FUN = " *" ) ,
stringsAsFactors = FALSE
)
2020-03-07 21:48:21 +01:00
# Directions
2022-08-28 10:31:50 +02:00
v <- sweep ( v , 2 , d ^as.integer ( scale ) , FUN = " *" )
2020-11-11 16:49:27 +01:00
df.v <- as.data.frame ( v [ , choices ] ,
2022-08-28 10:31:50 +02:00
stringsAsFactors = FALSE
)
2020-03-07 21:48:21 +01:00
names ( df.u ) <- c ( " xvar" , " yvar" )
names ( df.v ) <- names ( df.u )
2022-08-28 10:31:50 +02:00
2020-03-08 11:18:59 +01:00
if ( isTRUE ( pc.biplot ) ) {
df.u <- df.u * nobs.factor
}
2022-08-28 10:31:50 +02:00
# Scale the radius of the correlation circle so that it corresponds to
2020-03-07 21:48:21 +01:00
# a data ellipse for the standardized PC scores
circle_prob <- 0.69
2022-08-28 10:31:50 +02:00
r <- sqrt ( qchisq ( circle_prob , df = 2 ) ) * prod ( colMeans ( df.u ^2 ) ) ^ ( 0.25 )
2020-03-07 21:48:21 +01:00
# Scale directions
2022-08-28 10:31:50 +02:00
v.scale <- rowSums ( v ^2 )
2020-03-07 21:48:21 +01:00
df.v <- r * df.v / sqrt ( max ( v.scale ) )
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
# Grouping variable
if ( ! is.null ( groups ) ) {
df.u $ groups <- groups
}
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
df.v $ varname <- rownames ( v )
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
# Variables for text label placement
df.v $ angle <- with ( df.v , ( 180 / pi ) * atan ( yvar / xvar ) )
df.v $ hjust <- with ( df.v , ( 1 - labels_text_placement * sign ( xvar ) ) / 2 )
2022-08-28 10:31:50 +02:00
2020-03-07 21:48:21 +01:00
if ( ! is.null ( df.u $ groups ) ) {
2020-03-08 11:18:59 +01:00
theta <- c ( seq ( - pi , pi , length = 50 ) , seq ( pi , - pi , length = 50 ) )
circle <- cbind ( cos ( theta ) , sin ( theta ) )
2022-08-28 10:31:50 +02:00
2020-05-16 13:05:47 +02:00
df.groups <- lapply ( unique ( df.u $ groups ) , function ( g , df = df.u ) {
x <- df [which ( df $ groups == g ) , , drop = FALSE ]
if ( nrow ( x ) <= 2 ) {
2022-08-28 10:31:50 +02:00
return ( data.frame (
X1 = numeric ( 0 ) ,
X2 = numeric ( 0 ) ,
groups = character ( 0 ) ,
stringsAsFactors = FALSE
) )
2020-05-16 13:05:47 +02:00
}
sigma <- var ( cbind ( x $ xvar , x $ yvar ) )
mu <- c ( mean ( x $ xvar ) , mean ( x $ yvar ) )
ed <- sqrt ( qchisq ( ellipse_prob , df = 2 ) )
2023-01-23 15:01:21 +01:00
data.frame (
sweep ( circle %*% chol ( sigma ) * ed ,
MARGIN = 2 ,
STATS = mu ,
FUN = " +"
) ,
groups = x $ groups [1 ] ,
stringsAsFactors = FALSE
2022-08-28 10:31:50 +02:00
)
2020-05-16 13:05:47 +02:00
} )
ell <- do.call ( rbind , df.groups )
2020-03-08 09:12:11 +01:00
if ( NROW ( ell ) == 0 ) {
ell <- NULL
2020-03-08 11:18:59 +01:00
} else {
names ( ell ) [1 : 2 ] <- c ( " xvar" , " yvar" )
2020-03-08 09:12:11 +01:00
}
2020-03-07 21:48:21 +01:00
} else {
ell <- NULL
}
2022-08-28 10:31:50 +02:00
list (
choices = choices ,
df.u = df.u ,
df.v = df.v ,
ell = ell ,
groups = groups ,
group_name = group_name ,
labels = labels
2020-03-07 21:48:21 +01:00
)
}