forked from GRIAC/system_genetics
201 lines
4.3 KiB
R
201 lines
4.3 KiB
R
|
# Principle Component Analysis
|
||
|
# Normalized with limma::voom
|
||
|
|
||
|
source("__ - Preloader.R", verbose=T)
|
||
|
|
||
|
|
||
|
# PCA variables
|
||
|
do.center = TRUE
|
||
|
do.scale = FALSE
|
||
|
|
||
|
|
||
|
# The analysis
|
||
|
master.Table %>% readr::write_csv(
|
||
|
file.path(results.dir, "patient.table.csv")
|
||
|
)
|
||
|
|
||
|
norm.expr.data <- expression.data %>%
|
||
|
tibble::column_to_rownames("Gene")
|
||
|
norm.expr.data <- norm.expr.data[rowSums(norm.expr.data) >= 10,] %>%
|
||
|
limma::voom() %>%
|
||
|
as.matrix()
|
||
|
|
||
|
# Principle Component analysis
|
||
|
results.dir.pca <- file.path(results.dir, "principle.components")
|
||
|
dir.create(results.dir.pca, recursive=TRUE)
|
||
|
|
||
|
norm.expr.data.pcs <- norm.expr.data %>%
|
||
|
t() %>%
|
||
|
stats::prcomp(
|
||
|
center = do.center,
|
||
|
scale. = do.scale
|
||
|
)
|
||
|
|
||
|
# Write summary of PCAs to files
|
||
|
pcs.summery <- summary(norm.expr.data.pcs)
|
||
|
pcs.summery$importance %>%
|
||
|
t() %>%
|
||
|
as.data.frame() %>%
|
||
|
tibble::rownames_to_column("PC.name") %>%
|
||
|
readr::write_csv(
|
||
|
file.path(results.dir.pca, "importance.csv")
|
||
|
)
|
||
|
|
||
|
pcs.summery$x %>%
|
||
|
t() %>%
|
||
|
as.data.frame() %>%
|
||
|
tibble::rownames_to_column("ensembl.id") %>%
|
||
|
readr::write_csv(
|
||
|
file.path(results.dir.pca, "values.csv")
|
||
|
)
|
||
|
|
||
|
pcs.summery$rotation %>%
|
||
|
t() %>%
|
||
|
as.data.frame() %>%
|
||
|
tibble::rownames_to_column("sample.id") %>%
|
||
|
readr::write_csv(
|
||
|
file.path(results.dir.pca, "rotation.csv")
|
||
|
)
|
||
|
|
||
|
data.frame(
|
||
|
rownames = names(pcs.summery$center),
|
||
|
center = pcs.summery$center,
|
||
|
scale = pcs.summery$scale
|
||
|
) %>%
|
||
|
readr::write_csv(
|
||
|
file.path(results.dir.pca, "rest.csv")
|
||
|
)
|
||
|
|
||
|
# Not saved: pcs.summery$sdev,
|
||
|
|
||
|
|
||
|
|
||
|
# Plot PCAs
|
||
|
# https://github.com/kevinblighe/PCAtools
|
||
|
results.dir.pca.plot <- file.path(results.dir.pca, "img")
|
||
|
dir.create(results.dir.pca.plot)
|
||
|
|
||
|
metadata <- master.Table %>%
|
||
|
dplyr::filter(
|
||
|
!is.na(GenomeScan_ID)
|
||
|
) %>%
|
||
|
tibble::column_to_rownames("GenomeScan_ID") %>%
|
||
|
select.rows.in.order(
|
||
|
colnames(norm.expr.data)
|
||
|
)
|
||
|
|
||
|
|
||
|
p <- pca(norm.expr.data,
|
||
|
metadata = metadata,
|
||
|
center = do.center,
|
||
|
scale = do.scale
|
||
|
)
|
||
|
elbow <- findElbowPoint(p$variance)
|
||
|
metavars <- c('Age','Gender','Smoking_status','COPD_Y_or_N','SEO_COPD_Y_or_N','GOLD_stage')
|
||
|
|
||
|
|
||
|
png(filename = file.path(results.dir.pca.plot,"scree_plot.png"),
|
||
|
width = 800, height = 800,
|
||
|
units = "px", pointsize = 12,
|
||
|
type = "Xlib")
|
||
|
print(screeplot(p,
|
||
|
axisLabSize = 12,
|
||
|
titleLabSize = 12,
|
||
|
components = getComponents(p, 1:(elbow+5)),
|
||
|
vline = c(elbow)
|
||
|
) +
|
||
|
geom_label(
|
||
|
aes(
|
||
|
x = elbow + 1,
|
||
|
y = 50,
|
||
|
label = 'Elbow method',
|
||
|
vjust = -1,
|
||
|
size = 8
|
||
|
)
|
||
|
))
|
||
|
dev.off()
|
||
|
|
||
|
|
||
|
png(filename = file.path(results.dir.pca.plot, "eigen_corr_plot.png"),
|
||
|
width = 1200, height = 1200,
|
||
|
units = "px", pointsize = 12,
|
||
|
type = "Xlib")
|
||
|
print(eigencorplot(p,
|
||
|
metavars = metavars
|
||
|
))
|
||
|
dev.off()
|
||
|
|
||
|
|
||
|
dir.create(file.path(results.dir.pca.plot, "pairsplot"))
|
||
|
for (var in metavars) {
|
||
|
png(
|
||
|
filename = file.path(results.dir.pca.plot, "pairsplot", paste0(var,".png")),
|
||
|
width = 1200, height = 1200,
|
||
|
units = "px", pointsize = 12,
|
||
|
type = "Xlib"
|
||
|
)
|
||
|
print(pairsplot(p,
|
||
|
components = getComponents(p, c(1:(elbow+1))),
|
||
|
triangle = TRUE,
|
||
|
trianglelabSize = 12,
|
||
|
hline = 0, vline = 0,
|
||
|
pointSize = 0.4,
|
||
|
gridlines.major = FALSE,
|
||
|
gridlines.minor = FALSE,
|
||
|
colby = var,
|
||
|
title = paste0('Pairs plot: ',var),
|
||
|
plotaxes = TRUE
|
||
|
))
|
||
|
dev.off()
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
# Plot PCAs - old failure
|
||
|
pca.combinations <- combinations(
|
||
|
n = (elbow+1),
|
||
|
r = 2,
|
||
|
v = 1:(elbow+1),
|
||
|
repeats.allowed = FALSE
|
||
|
)
|
||
|
|
||
|
dir.create(file.path(results.dir.pca.plot, "biplots"))
|
||
|
for (var in metavars) {
|
||
|
for (i in 1:nrow(pca.combinations)) {
|
||
|
pca.combi <- pca.combinations[i,]
|
||
|
pca.title <- paste(paste0("PC", pca.combi), collapse="_")
|
||
|
png(
|
||
|
filename = file.path(results.dir.pca.plot, "biplots", paste0(var, "-", pca.title, ".png")),
|
||
|
width = 800, height = 800,
|
||
|
units = "px", pointsize = 12,
|
||
|
type = "Xlib"
|
||
|
)
|
||
|
print(
|
||
|
autoplot(
|
||
|
norm.expr.data.pcs,
|
||
|
data = master.Table %>%
|
||
|
dplyr::filter(
|
||
|
!is.na(GenomeScan_ID)
|
||
|
) %>%
|
||
|
tibble::column_to_rownames("GenomeScan_ID") %>%
|
||
|
select.rows.in.order(
|
||
|
rownames(norm.expr.data.pcs$x)
|
||
|
),
|
||
|
x = pca.combi[1],
|
||
|
y = pca.combi[2],
|
||
|
colour = var,
|
||
|
loadings = FALSE,
|
||
|
loadings.label = FALSE,
|
||
|
#label = FALSE,
|
||
|
label.size = 3
|
||
|
) +
|
||
|
ggprism::theme_prism() +
|
||
|
#ggprism::scale_colour_prism() +
|
||
|
ggprism::scale_shape_prism() +
|
||
|
ggplot2::labs(subtitle = paste0(str_to_title(var), " (", pca.title,")"))
|
||
|
)
|
||
|
dev.off()
|
||
|
}
|
||
|
}
|
||
|
|