system_genetics/rnaseq/step6_overall_QC/03 - Sample Counts.R

40 lines
1.2 KiB
R

# Total counts per sample
# Normalized with limma::voom
library(limma)
library(tidyverse)
# expression.data. Has columns:
# - Gene (gene identifier)
# - [Sample identifiers]
expression.data <- "mrna_counts_table.csv"
# master.Table. Has columns:
# - GenomeScan_ID
# - gender, levels = c("male", "female")
# - age
# - factor(smoking.status, levels = c("Ex-smoker", "Current smoker"))
master.Table <- "patient_table.csv"
# results.dir. The directory of where to put the resulting tables (and later your plots.)
results.dir <- "results"
# The analysis
# We calculate the number of mapped reads per sample.
total.count.per.sample <- expression.data %>%
tibble::column_to_rownames("Gene") %>%
colSums()
data.frame(
sample = names(total.count.per.sample),
counts = as.numeric(total.count.per.sample)
) %>%
readr::write_csv(file.path(results.dir, "total.counts.per.sample.csv"))
# Next thing to do:
# - Check the number of reads per sample in total.counts.per.sample.csv
# - Plot the reads distribution (all reads) per sample in a boxplot.
# - (Optional) Calculate the number of unmapped, multimapped, unique mapped to
# feature and unique mapped to no feature and plot these in a stacked bar graph.