system_genetics/rnaseq/step6_overall_QC/01 - Principle Component An...

201 lines
4.3 KiB
R

# Principle Component Analysis
# Normalized with limma::voom
source("__ - Preloader.R", verbose=T)
# PCA variables
do.center = TRUE
do.scale = FALSE
# The analysis
master.Table %>% readr::write_csv(
file.path(results.dir, "patient.table.csv")
)
norm.expr.data <- expression.data %>%
tibble::column_to_rownames("Gene")
norm.expr.data <- norm.expr.data[rowSums(norm.expr.data) >= 10,] %>%
limma::voom() %>%
as.matrix()
# Principle Component analysis
results.dir.pca <- file.path(results.dir, "principle.components")
dir.create(results.dir.pca, recursive=TRUE)
norm.expr.data.pcs <- norm.expr.data %>%
t() %>%
stats::prcomp(
center = do.center,
scale. = do.scale
)
# Write summary of PCAs to files
pcs.summery <- summary(norm.expr.data.pcs)
pcs.summery$importance %>%
t() %>%
as.data.frame() %>%
tibble::rownames_to_column("PC.name") %>%
readr::write_csv(
file.path(results.dir.pca, "importance.csv")
)
pcs.summery$x %>%
t() %>%
as.data.frame() %>%
tibble::rownames_to_column("ensembl.id") %>%
readr::write_csv(
file.path(results.dir.pca, "values.csv")
)
pcs.summery$rotation %>%
t() %>%
as.data.frame() %>%
tibble::rownames_to_column("sample.id") %>%
readr::write_csv(
file.path(results.dir.pca, "rotation.csv")
)
data.frame(
rownames = names(pcs.summery$center),
center = pcs.summery$center,
scale = pcs.summery$scale
) %>%
readr::write_csv(
file.path(results.dir.pca, "rest.csv")
)
# Not saved: pcs.summery$sdev,
# Plot PCAs
# https://github.com/kevinblighe/PCAtools
results.dir.pca.plot <- file.path(results.dir.pca, "img")
dir.create(results.dir.pca.plot)
metadata <- master.Table %>%
dplyr::filter(
!is.na(GenomeScan_ID)
) %>%
tibble::column_to_rownames("GenomeScan_ID") %>%
select.rows.in.order(
colnames(norm.expr.data)
)
p <- pca(norm.expr.data,
metadata = metadata,
center = do.center,
scale = do.scale
)
elbow <- findElbowPoint(p$variance)
metavars <- c('Age','Gender','Smoking_status','COPD_Y_or_N','SEO_COPD_Y_or_N','GOLD_stage')
png(filename = file.path(results.dir.pca.plot,"scree_plot.png"),
width = 800, height = 800,
units = "px", pointsize = 12,
type = "Xlib")
print(screeplot(p,
axisLabSize = 12,
titleLabSize = 12,
components = getComponents(p, 1:(elbow+5)),
vline = c(elbow)
) +
geom_label(
aes(
x = elbow + 1,
y = 50,
label = 'Elbow method',
vjust = -1,
size = 8
)
))
dev.off()
png(filename = file.path(results.dir.pca.plot, "eigen_corr_plot.png"),
width = 1200, height = 1200,
units = "px", pointsize = 12,
type = "Xlib")
print(eigencorplot(p,
metavars = metavars
))
dev.off()
dir.create(file.path(results.dir.pca.plot, "pairsplot"))
for (var in metavars) {
png(
filename = file.path(results.dir.pca.plot, "pairsplot", paste0(var,".png")),
width = 1200, height = 1200,
units = "px", pointsize = 12,
type = "Xlib"
)
print(pairsplot(p,
components = getComponents(p, c(1:(elbow+1))),
triangle = TRUE,
trianglelabSize = 12,
hline = 0, vline = 0,
pointSize = 0.4,
gridlines.major = FALSE,
gridlines.minor = FALSE,
colby = var,
title = paste0('Pairs plot: ',var),
plotaxes = TRUE
))
dev.off()
}
# Plot PCAs - old failure
pca.combinations <- combinations(
n = (elbow+1),
r = 2,
v = 1:(elbow+1),
repeats.allowed = FALSE
)
dir.create(file.path(results.dir.pca.plot, "biplots"))
for (var in metavars) {
for (i in 1:nrow(pca.combinations)) {
pca.combi <- pca.combinations[i,]
pca.title <- paste(paste0("PC", pca.combi), collapse="_")
png(
filename = file.path(results.dir.pca.plot, "biplots", paste0(var, "-", pca.title, ".png")),
width = 800, height = 800,
units = "px", pointsize = 12,
type = "Xlib"
)
print(
autoplot(
norm.expr.data.pcs,
data = master.Table %>%
dplyr::filter(
!is.na(GenomeScan_ID)
) %>%
tibble::column_to_rownames("GenomeScan_ID") %>%
select.rows.in.order(
rownames(norm.expr.data.pcs$x)
),
x = pca.combi[1],
y = pca.combi[2],
colour = var,
loadings = FALSE,
loadings.label = FALSE,
#label = FALSE,
label.size = 3
) +
ggprism::theme_prism() +
#ggprism::scale_colour_prism() +
ggprism::scale_shape_prism() +
ggplot2::labs(subtitle = paste0(str_to_title(var), " (", pca.title,")"))
)
dev.off()
}
}