system_genetics/rnaseq/step6_overall_QC/03 - Sample Counts.R

164 lines
3.5 KiB
R

# Total counts per sample
# Normalized with limma::voom
source("__ - Preloader.R", verbose=T)
# The analysis
norm.expr.data <- expression.data %>%
tibble::column_to_rownames("Gene")
norm.expr.data <- norm.expr.data[rowSums(norm.expr.data) >= 10,] %>%
limma::voom() %>%
as.matrix()
# Total counts per sample
total.count.per.sample <- expression.data %>%
tibble::column_to_rownames("Gene") %>%
colSums()
data.frame(
sample = names(total.count.per.sample),
counts = as.numeric(total.count.per.sample)
) %>%
readr::write_csv(file.path(results.dir, "total.counts.per.sample.csv"))
norm.data <- norm.expr.data %>%
as.data.frame() %>%
tibble::rownames_to_column(
"Gene"
) %>%
tidyr::gather(
key = "sample.id",
value = "expr.value",
-Gene
) %>%
dplyr::left_join(
y = master.Table %>%
dplyr::filter(
!is.na(GenomeScan_ID)
) %>%
dplyr::mutate(
id = dplyr::case_when(
stringr::str_trim(gender) == "" ~ paste0("Water ", dplyr::row_number()),
TRUE ~ sample.id
),
gender = dplyr::case_when(
stringr::str_trim(gender) == "" ~ "water",
!is.na(gender) ~ as.character(gender)
)
) %>%
dplyr::select(
GenomeScan_ID,
gender,
id
),
by = c("sample.id" = "GenomeScan_ID")
)
norm.plot <- norm.data %>%
ggplot2::ggplot(
mapping = ggplot2::aes(
x = id,
y = expr.value,
fill = gender
)
) +
ggplot2::geom_boxplot() +
ggplot2::scale_fill_manual(
values = c(
"male" = "blue",
"female" = "red",
"water" = "green"
)
) +
ggplot2::labs(
title = "Normalized expression values distribution",
y = "Normalized expression values (limma::voom)",
x = "Sample",
gender = "Gender"
) +
ggprism::theme_prism() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 90)
)
ggplot2::ggsave(
filename = file.path(results.dir, "counts.per.sample.normalised.png"),
plot = norm.plot,
width = 40,
height = 20,
units = "cm"
)
expr.data <- expression.data %>%
tidyr::gather(
key = "sample.id",
value = "expr.value",
-Gene
) %>%
dplyr::filter(
expr.value != 0
) %>%
dplyr::left_join(
y = master.Table %>%
dplyr::filter(
!is.na(GenomeScan_ID)
) %>%
dplyr::mutate(
id = dplyr::case_when(
stringr::str_trim(gender) == "" ~ paste0("Water ", dplyr::row_number()),
TRUE ~ sample.id
),
gender = dplyr::case_when(
stringr::str_trim(gender) == "" ~ "water",
!is.na(gender) ~ as.character(gender)
)
) %>%
dplyr::select(
GenomeScan_ID,
gender,
id
),
by = c("sample.id" = "GenomeScan_ID")
)
expr.plot <- expr.data %>%
ggplot2::ggplot(
mapping = ggplot2::aes(
x = id,
y = expr.value,
fill = gender
)
) +
ggplot2::geom_boxplot() +
ggplot2::scale_fill_manual(
values = c(
"male" = "blue",
"female" = "red",
"water" = "green"
)
) +
ggplot2::scale_y_continuous(trans='log2') +
ggplot2::labs(
title = "Raw expression values distribution, without zero's",
y = "Expression values",
x = "Sample",
gender = "Gender"
) +
ggprism::theme_prism() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 90)
)
ggplot2::ggsave(
filename = file.path(results.dir, "counts.per.sample.raw.zeros.removed.png"),
plot = expr.plot,
width = 40,
height = 20,
units = "cm"
)