# Principle Component Analysis # Normalized with limma::voom source("__ - Preloader.R", verbose=T) # PCA variables do.center = TRUE do.scale = FALSE # The analysis master.Table %>% readr::write_csv( file.path(results.dir, "patient.table.csv") ) norm.expr.data <- expression.data %>% tibble::column_to_rownames("Gene") norm.expr.data <- norm.expr.data[rowSums(norm.expr.data) >= 10,] %>% limma::voom() %>% as.matrix() # Principle Component analysis results.dir.pca <- file.path(results.dir, "principle.components") dir.create(results.dir.pca, recursive=TRUE) norm.expr.data.pcs <- norm.expr.data %>% t() %>% stats::prcomp( center = do.center, scale. = do.scale ) # Write summary of PCAs to files pcs.summery <- summary(norm.expr.data.pcs) pcs.summery$importance %>% t() %>% as.data.frame() %>% tibble::rownames_to_column("PC.name") %>% readr::write_csv( file.path(results.dir.pca, "importance.csv") ) pcs.summery$x %>% t() %>% as.data.frame() %>% tibble::rownames_to_column("ensembl.id") %>% readr::write_csv( file.path(results.dir.pca, "values.csv") ) pcs.summery$rotation %>% t() %>% as.data.frame() %>% tibble::rownames_to_column("sample.id") %>% readr::write_csv( file.path(results.dir.pca, "rotation.csv") ) data.frame( rownames = names(pcs.summery$center), center = pcs.summery$center, scale = pcs.summery$scale ) %>% readr::write_csv( file.path(results.dir.pca, "rest.csv") ) # Not saved: pcs.summery$sdev, # Plot PCAs # https://github.com/kevinblighe/PCAtools results.dir.pca.plot <- file.path(results.dir.pca, "img") dir.create(results.dir.pca.plot) metadata <- master.Table %>% dplyr::filter( !is.na(GenomeScan_ID) ) %>% tibble::column_to_rownames("GenomeScan_ID") %>% select.rows.in.order( colnames(norm.expr.data) ) p <- pca(norm.expr.data, metadata = metadata, center = do.center, scale = do.scale ) elbow <- findElbowPoint(p$variance) metavars <- c('Age','Gender','Smoking_status','COPD_Y_or_N','SEO_COPD_Y_or_N','GOLD_stage') png(filename = file.path(results.dir.pca.plot,"scree_plot.png"), width = 800, height = 800, units = "px", pointsize = 12, type = "Xlib") print(screeplot(p, axisLabSize = 12, titleLabSize = 12, components = getComponents(p, 1:(elbow+5)), vline = c(elbow) ) + geom_label( aes( x = elbow + 1, y = 50, label = 'Elbow method', vjust = -1, size = 8 ) )) dev.off() png(filename = file.path(results.dir.pca.plot, "eigen_corr_plot.png"), width = 1200, height = 1200, units = "px", pointsize = 12, type = "Xlib") print(eigencorplot(p, metavars = metavars )) dev.off() dir.create(file.path(results.dir.pca.plot, "pairsplot")) for (var in metavars) { png( filename = file.path(results.dir.pca.plot, "pairsplot", paste0(var,".png")), width = 1200, height = 1200, units = "px", pointsize = 12, type = "Xlib" ) print(pairsplot(p, components = getComponents(p, c(1:(elbow+1))), triangle = TRUE, trianglelabSize = 12, hline = 0, vline = 0, pointSize = 0.4, gridlines.major = FALSE, gridlines.minor = FALSE, colby = var, title = paste0('Pairs plot: ',var), plotaxes = TRUE )) dev.off() } # Plot PCAs - old failure pca.combinations <- combinations( n = (elbow+1), r = 2, v = 1:(elbow+1), repeats.allowed = FALSE ) dir.create(file.path(results.dir.pca.plot, "biplots")) for (var in metavars) { for (i in 1:nrow(pca.combinations)) { pca.combi <- pca.combinations[i,] pca.title <- paste(paste0("PC", pca.combi), collapse="_") png( filename = file.path(results.dir.pca.plot, "biplots", paste0(var, "-", pca.title, ".png")), width = 800, height = 800, units = "px", pointsize = 12, type = "Xlib" ) print( autoplot( norm.expr.data.pcs, data = master.Table %>% dplyr::filter( !is.na(GenomeScan_ID) ) %>% tibble::column_to_rownames("GenomeScan_ID") %>% select.rows.in.order( rownames(norm.expr.data.pcs$x) ), x = pca.combi[1], y = pca.combi[2], colour = var, loadings = FALSE, loadings.label = FALSE, #label = FALSE, label.size = 3 ) + ggprism::theme_prism() + #ggprism::scale_colour_prism() + ggprism::scale_shape_prism() + ggplot2::labs(subtitle = paste0(str_to_title(var), " (", pca.title,")")) ) dev.off() } }