# Total counts per sample
# Normalized with limma::voom
library(limma)
library(tidyverse)


# expression.data. Has columns:
# - Gene (gene identifier)
# - [Sample identifiers]
expression.data <- "mrna_counts_table.csv"
# master.Table. Has columns:
# - GenomeScan_ID
# - gender, levels = c("male", "female")
# - age
# - factor(smoking.status, levels = c("Ex-smoker", "Current smoker"))
master.Table <- "patient_table.csv"
# results.dir. The directory of where to put the resulting tables (and later your plots.)
results.dir <- "results"


# The analysis
# We calculate the number of mapped reads per sample.
total.count.per.sample <- expression.data %>%
	tibble::column_to_rownames("Gene") %>%
	colSums()

data.frame(
		sample = names(total.count.per.sample),
		counts = as.numeric(total.count.per.sample)
	) %>%
	readr::write_csv(file.path(results.dir, "total.counts.per.sample.csv"))


# Next thing to do:
# - Check the number of reads per sample in total.counts.per.sample.csv
# - Plot the reads distribution (all reads) per sample in a boxplot.
# - (Optional) Calculate the number of unmapped, multimapped, unique mapped to
#   feature and unique mapped to no feature and plot these in a stacked bar graph.