forked from GRIAC/system_genetics
164 lines
3.5 KiB
R
164 lines
3.5 KiB
R
# Total counts per sample
|
|
# Normalized with limma::voom
|
|
|
|
source("__ - Preloader.R", verbose=T)
|
|
|
|
|
|
# The analysis
|
|
norm.expr.data <- expression.data %>%
|
|
tibble::column_to_rownames("Gene")
|
|
norm.expr.data <- norm.expr.data[rowSums(norm.expr.data) >= 10,] %>%
|
|
limma::voom() %>%
|
|
as.matrix()
|
|
|
|
# Total counts per sample
|
|
total.count.per.sample <- expression.data %>%
|
|
tibble::column_to_rownames("Gene") %>%
|
|
colSums()
|
|
|
|
data.frame(
|
|
sample = names(total.count.per.sample),
|
|
counts = as.numeric(total.count.per.sample)
|
|
) %>%
|
|
readr::write_csv(file.path(results.dir, "total.counts.per.sample.csv"))
|
|
|
|
|
|
norm.data <- norm.expr.data %>%
|
|
as.data.frame() %>%
|
|
tibble::rownames_to_column(
|
|
"Gene"
|
|
) %>%
|
|
tidyr::gather(
|
|
key = "sample.id",
|
|
value = "expr.value",
|
|
-Gene
|
|
) %>%
|
|
dplyr::left_join(
|
|
y = master.Table %>%
|
|
dplyr::filter(
|
|
!is.na(GenomeScan_ID)
|
|
) %>%
|
|
dplyr::mutate(
|
|
id = dplyr::case_when(
|
|
stringr::str_trim(gender) == "" ~ paste0("Water ", dplyr::row_number()),
|
|
TRUE ~ sample.id
|
|
),
|
|
gender = dplyr::case_when(
|
|
stringr::str_trim(gender) == "" ~ "water",
|
|
!is.na(gender) ~ as.character(gender)
|
|
)
|
|
) %>%
|
|
dplyr::select(
|
|
GenomeScan_ID,
|
|
gender,
|
|
id
|
|
),
|
|
by = c("sample.id" = "GenomeScan_ID")
|
|
)
|
|
|
|
norm.plot <- norm.data %>%
|
|
ggplot2::ggplot(
|
|
mapping = ggplot2::aes(
|
|
x = id,
|
|
y = expr.value,
|
|
fill = gender
|
|
)
|
|
) +
|
|
ggplot2::geom_boxplot() +
|
|
ggplot2::scale_fill_manual(
|
|
values = c(
|
|
"male" = "blue",
|
|
"female" = "red",
|
|
"water" = "green"
|
|
)
|
|
) +
|
|
ggplot2::labs(
|
|
title = "Normalized expression values distribution",
|
|
y = "Normalized expression values (limma::voom)",
|
|
x = "Sample",
|
|
gender = "Gender"
|
|
) +
|
|
ggprism::theme_prism() +
|
|
ggplot2::theme(
|
|
axis.text.x = ggplot2::element_text(angle = 90)
|
|
)
|
|
|
|
ggplot2::ggsave(
|
|
filename = file.path(results.dir, "counts.per.sample.normalised.png"),
|
|
plot = norm.plot,
|
|
width = 40,
|
|
height = 20,
|
|
units = "cm"
|
|
)
|
|
|
|
|
|
|
|
expr.data <- expression.data %>%
|
|
tidyr::gather(
|
|
key = "sample.id",
|
|
value = "expr.value",
|
|
-Gene
|
|
) %>%
|
|
dplyr::filter(
|
|
expr.value != 0
|
|
) %>%
|
|
dplyr::left_join(
|
|
y = master.Table %>%
|
|
dplyr::filter(
|
|
!is.na(GenomeScan_ID)
|
|
) %>%
|
|
dplyr::mutate(
|
|
id = dplyr::case_when(
|
|
stringr::str_trim(gender) == "" ~ paste0("Water ", dplyr::row_number()),
|
|
TRUE ~ sample.id
|
|
),
|
|
gender = dplyr::case_when(
|
|
stringr::str_trim(gender) == "" ~ "water",
|
|
!is.na(gender) ~ as.character(gender)
|
|
)
|
|
) %>%
|
|
dplyr::select(
|
|
GenomeScan_ID,
|
|
gender,
|
|
id
|
|
),
|
|
by = c("sample.id" = "GenomeScan_ID")
|
|
)
|
|
|
|
expr.plot <- expr.data %>%
|
|
ggplot2::ggplot(
|
|
mapping = ggplot2::aes(
|
|
x = id,
|
|
y = expr.value,
|
|
fill = gender
|
|
)
|
|
) +
|
|
ggplot2::geom_boxplot() +
|
|
ggplot2::scale_fill_manual(
|
|
values = c(
|
|
"male" = "blue",
|
|
"female" = "red",
|
|
"water" = "green"
|
|
)
|
|
) +
|
|
ggplot2::scale_y_continuous(trans='log2') +
|
|
ggplot2::labs(
|
|
title = "Raw expression values distribution, without zero's",
|
|
y = "Expression values",
|
|
x = "Sample",
|
|
gender = "Gender"
|
|
) +
|
|
ggprism::theme_prism() +
|
|
ggplot2::theme(
|
|
axis.text.x = ggplot2::element_text(angle = 90)
|
|
)
|
|
|
|
ggplot2::ggsave(
|
|
filename = file.path(results.dir, "counts.per.sample.raw.zeros.removed.png"),
|
|
plot = expr.plot,
|
|
width = 40,
|
|
height = 20,
|
|
units = "cm"
|
|
)
|
|
|