2021-02-09 12:52:44 +01:00
|
|
|
# Total counts per sample
|
|
|
|
# Normalized with limma::voom
|
2021-02-10 13:42:51 +01:00
|
|
|
library(limma)
|
|
|
|
library(tidyverse)
|
2021-02-09 12:52:44 +01:00
|
|
|
|
|
|
|
|
2021-02-10 13:42:51 +01:00
|
|
|
# expression.data. Has columns:
|
|
|
|
# - Gene (gene identifier)
|
|
|
|
# - [Sample identifiers]
|
|
|
|
expression.data <- "mrna_counts_table.csv"
|
|
|
|
# master.Table. Has columns:
|
|
|
|
# - GenomeScan_ID
|
|
|
|
# - gender, levels = c("male", "female")
|
|
|
|
# - age
|
|
|
|
# - factor(smoking.status, levels = c("Ex-smoker", "Current smoker"))
|
|
|
|
master.Table <- "patient_table.csv"
|
|
|
|
# results.dir. The directory of where to put the resulting tables (and later your plots.)
|
|
|
|
results.dir <- "results"
|
2021-02-09 12:52:44 +01:00
|
|
|
|
|
|
|
|
2021-02-10 13:42:51 +01:00
|
|
|
# The analysis
|
|
|
|
# We calculate the number of mapped reads per sample.
|
2021-02-09 12:52:44 +01:00
|
|
|
total.count.per.sample <- expression.data %>%
|
|
|
|
tibble::column_to_rownames("Gene") %>%
|
|
|
|
colSums()
|
|
|
|
|
|
|
|
data.frame(
|
|
|
|
sample = names(total.count.per.sample),
|
|
|
|
counts = as.numeric(total.count.per.sample)
|
|
|
|
) %>%
|
|
|
|
readr::write_csv(file.path(results.dir, "total.counts.per.sample.csv"))
|
|
|
|
|
|
|
|
|
2021-02-10 13:42:51 +01:00
|
|
|
# Next thing to do:
|
|
|
|
# - Check the number of reads per sample in total.counts.per.sample.csv
|
|
|
|
# - Plot the reads distribution (all reads) per sample in a boxplot.
|
|
|
|
# - (Optional) Calculate the number of unmapped, multimapped, unique mapped to
|
|
|
|
# feature and unique mapped to no feature and plot these in a stacked bar graph.
|
2021-02-09 12:52:44 +01:00
|
|
|
|