# Principle Component Analysis # Normalized with limma::voom library(limma) library(tidyverse) # expression.data. Has columns: # - Gene (gene identifier) # - [Sample identifiers] expression.data <- "mrna_counts_table.csv" # results.dir. The directory of where to put the resulting tables (and later your plots.) results.dir <- "results" # PCA variables do.center = TRUE do.scale = FALSE # The analysis # We ise prcomp to calculate the PCAs. Afterwards you should plot the results. norm.expr.data <- expression.data %>% tibble::column_to_rownames("Gene") norm.expr.data <- norm.expr.data[rowSums(norm.expr.data) >= 10,] %>% limma::voom() %>% as.matrix() # Principle Component analysis results.dir.pca <- file.path(results.dir, "principle.components") dir.create(results.dir.pca, recursive=TRUE) norm.expr.data.pcs <- norm.expr.data %>% t() %>% stats::prcomp( center = do.center, scale. = do.scale ) # Write summary of PCAs to files pcs.summery <- summary(norm.expr.data.pcs) pcs.summery$importance %>% t() %>% as.data.frame() %>% tibble::rownames_to_column("PC.name") %>% readr::write_csv( file.path(results.dir.pca, "importance.csv") ) pcs.summery$x %>% t() %>% as.data.frame() %>% tibble::rownames_to_column("ensembl.id") %>% readr::write_csv( file.path(results.dir.pca, "values.csv") ) pcs.summery$rotation %>% t() %>% as.data.frame() %>% tibble::rownames_to_column("sample.id") %>% readr::write_csv( file.path(results.dir.pca, "rotation.csv") ) data.frame( rownames = names(pcs.summery$center), center = pcs.summery$center, scale = pcs.summery$scale ) %>% readr::write_csv( file.path(results.dir.pca, "rest.csv") ) # Not saved: pcs.summery$sdev, # Next thing to do: # - (Optional) scree plot - to determine the optimal cutoff for PCA inclusion based on explaination of variance # - (Optional) eigencorplot - to correlate PCAs to clinical variables so that you know which PCA to include for which analysis # - (Optional) pairsplot - plot multiple PCAs against each other in a single figure # - Plot the first couple of PCAs against each other