fastqc: simple comment. trimming: reverted back. overall qc: simplified the scripts and made sure to add instructions.
This commit is contained in:
@@ -1,17 +1,25 @@
|
||||
# Total counts per sample
|
||||
# Normalized with limma::voom
|
||||
library(limma)
|
||||
library(tidyverse)
|
||||
|
||||
source("__ - Preloader.R", verbose=T)
|
||||
|
||||
# expression.data. Has columns:
|
||||
# - Gene (gene identifier)
|
||||
# - [Sample identifiers]
|
||||
expression.data <- "mrna_counts_table.csv"
|
||||
# master.Table. Has columns:
|
||||
# - GenomeScan_ID
|
||||
# - gender, levels = c("male", "female")
|
||||
# - age
|
||||
# - factor(smoking.status, levels = c("Ex-smoker", "Current smoker"))
|
||||
master.Table <- "patient_table.csv"
|
||||
# results.dir. The directory of where to put the resulting tables (and later your plots.)
|
||||
results.dir <- "results"
|
||||
|
||||
|
||||
# The analysis
|
||||
norm.expr.data <- expression.data %>%
|
||||
tibble::column_to_rownames("Gene")
|
||||
norm.expr.data <- norm.expr.data[rowSums(norm.expr.data) >= 10,] %>%
|
||||
limma::voom() %>%
|
||||
as.matrix()
|
||||
|
||||
# Total counts per sample
|
||||
# We calculate the number of mapped reads per sample.
|
||||
total.count.per.sample <- expression.data %>%
|
||||
tibble::column_to_rownames("Gene") %>%
|
||||
colSums()
|
||||
@@ -23,141 +31,9 @@ data.frame(
|
||||
readr::write_csv(file.path(results.dir, "total.counts.per.sample.csv"))
|
||||
|
||||
|
||||
norm.data <- norm.expr.data %>%
|
||||
as.data.frame() %>%
|
||||
tibble::rownames_to_column(
|
||||
"Gene"
|
||||
) %>%
|
||||
tidyr::gather(
|
||||
key = "sample.id",
|
||||
value = "expr.value",
|
||||
-Gene
|
||||
) %>%
|
||||
dplyr::left_join(
|
||||
y = master.Table %>%
|
||||
dplyr::filter(
|
||||
!is.na(GenomeScan_ID)
|
||||
) %>%
|
||||
dplyr::mutate(
|
||||
id = dplyr::case_when(
|
||||
stringr::str_trim(gender) == "" ~ paste0("Water ", dplyr::row_number()),
|
||||
TRUE ~ sample.id
|
||||
),
|
||||
gender = dplyr::case_when(
|
||||
stringr::str_trim(gender) == "" ~ "water",
|
||||
!is.na(gender) ~ as.character(gender)
|
||||
)
|
||||
) %>%
|
||||
dplyr::select(
|
||||
GenomeScan_ID,
|
||||
gender,
|
||||
id
|
||||
),
|
||||
by = c("sample.id" = "GenomeScan_ID")
|
||||
)
|
||||
|
||||
norm.plot <- norm.data %>%
|
||||
ggplot2::ggplot(
|
||||
mapping = ggplot2::aes(
|
||||
x = id,
|
||||
y = expr.value,
|
||||
fill = gender
|
||||
)
|
||||
) +
|
||||
ggplot2::geom_boxplot() +
|
||||
ggplot2::scale_fill_manual(
|
||||
values = c(
|
||||
"male" = "blue",
|
||||
"female" = "red",
|
||||
"water" = "green"
|
||||
)
|
||||
) +
|
||||
ggplot2::labs(
|
||||
title = "Normalized expression values distribution",
|
||||
y = "Normalized expression values (limma::voom)",
|
||||
x = "Sample",
|
||||
gender = "Gender"
|
||||
) +
|
||||
ggprism::theme_prism() +
|
||||
ggplot2::theme(
|
||||
axis.text.x = ggplot2::element_text(angle = 90)
|
||||
)
|
||||
|
||||
ggplot2::ggsave(
|
||||
filename = file.path(results.dir, "counts.per.sample.normalised.png"),
|
||||
plot = norm.plot,
|
||||
width = 40,
|
||||
height = 20,
|
||||
units = "cm"
|
||||
)
|
||||
|
||||
|
||||
|
||||
expr.data <- expression.data %>%
|
||||
tidyr::gather(
|
||||
key = "sample.id",
|
||||
value = "expr.value",
|
||||
-Gene
|
||||
) %>%
|
||||
dplyr::filter(
|
||||
expr.value != 0
|
||||
) %>%
|
||||
dplyr::left_join(
|
||||
y = master.Table %>%
|
||||
dplyr::filter(
|
||||
!is.na(GenomeScan_ID)
|
||||
) %>%
|
||||
dplyr::mutate(
|
||||
id = dplyr::case_when(
|
||||
stringr::str_trim(gender) == "" ~ paste0("Water ", dplyr::row_number()),
|
||||
TRUE ~ sample.id
|
||||
),
|
||||
gender = dplyr::case_when(
|
||||
stringr::str_trim(gender) == "" ~ "water",
|
||||
!is.na(gender) ~ as.character(gender)
|
||||
)
|
||||
) %>%
|
||||
dplyr::select(
|
||||
GenomeScan_ID,
|
||||
gender,
|
||||
id
|
||||
),
|
||||
by = c("sample.id" = "GenomeScan_ID")
|
||||
)
|
||||
|
||||
expr.plot <- expr.data %>%
|
||||
ggplot2::ggplot(
|
||||
mapping = ggplot2::aes(
|
||||
x = id,
|
||||
y = expr.value,
|
||||
fill = gender
|
||||
)
|
||||
) +
|
||||
ggplot2::geom_boxplot() +
|
||||
ggplot2::scale_fill_manual(
|
||||
values = c(
|
||||
"male" = "blue",
|
||||
"female" = "red",
|
||||
"water" = "green"
|
||||
)
|
||||
) +
|
||||
ggplot2::scale_y_continuous(trans='log2') +
|
||||
ggplot2::labs(
|
||||
title = "Raw expression values distribution, without zero's",
|
||||
y = "Expression values",
|
||||
x = "Sample",
|
||||
gender = "Gender"
|
||||
) +
|
||||
ggprism::theme_prism() +
|
||||
ggplot2::theme(
|
||||
axis.text.x = ggplot2::element_text(angle = 90)
|
||||
)
|
||||
|
||||
ggplot2::ggsave(
|
||||
filename = file.path(results.dir, "counts.per.sample.raw.zeros.removed.png"),
|
||||
plot = expr.plot,
|
||||
width = 40,
|
||||
height = 20,
|
||||
units = "cm"
|
||||
)
|
||||
# Next thing to do:
|
||||
# - Check the number of reads per sample in total.counts.per.sample.csv
|
||||
# - Plot the reads distribution (all reads) per sample in a boxplot.
|
||||
# - (Optional) Calculate the number of unmapped, multimapped, unique mapped to
|
||||
# feature and unique mapped to no feature and plot these in a stacked bar graph.
|
||||
|
||||
|
Reference in New Issue
Block a user