# Total counts per sample # Normalized with limma::voom library(limma) library(tidyverse) # expression.data. Has columns: # - Gene (gene identifier) # - [Sample identifiers] expression.data <- "mrna_counts_table.csv" # master.Table. Has columns: # - GenomeScan_ID # - gender, levels = c("male", "female") # - age # - factor(smoking.status, levels = c("Ex-smoker", "Current smoker")) master.Table <- "patient_table.csv" # results.dir. The directory of where to put the resulting tables (and later your plots.) results.dir <- "results" # The analysis # We calculate the number of mapped reads per sample. total.count.per.sample <- expression.data %>% tibble::column_to_rownames("Gene") %>% colSums() data.frame( sample = names(total.count.per.sample), counts = as.numeric(total.count.per.sample) ) %>% readr::write_csv(file.path(results.dir, "total.counts.per.sample.csv")) # Next thing to do: # - Check the number of reads per sample in total.counts.per.sample.csv # - Plot the reads distribution (all reads) per sample in a boxplot. # - (Optional) Calculate the number of unmapped, multimapped, unique mapped to # feature and unique mapped to no feature and plot these in a stacked bar graph.