# Total counts per sample # Normalized with limma::voom source("__ - Preloader.R", verbose=T) # The analysis norm.expr.data <- expression.data %>% tibble::column_to_rownames("Gene") norm.expr.data <- norm.expr.data[rowSums(norm.expr.data) >= 10,] %>% limma::voom() %>% as.matrix() # Total counts per sample total.count.per.sample <- expression.data %>% tibble::column_to_rownames("Gene") %>% colSums() data.frame( sample = names(total.count.per.sample), counts = as.numeric(total.count.per.sample) ) %>% readr::write_csv(file.path(results.dir, "total.counts.per.sample.csv")) norm.data <- norm.expr.data %>% as.data.frame() %>% tibble::rownames_to_column( "Gene" ) %>% tidyr::gather( key = "sample.id", value = "expr.value", -Gene ) %>% dplyr::left_join( y = master.Table %>% dplyr::filter( !is.na(GenomeScan_ID) ) %>% dplyr::mutate( id = dplyr::case_when( stringr::str_trim(gender) == "" ~ paste0("Water ", dplyr::row_number()), TRUE ~ sample.id ), gender = dplyr::case_when( stringr::str_trim(gender) == "" ~ "water", !is.na(gender) ~ as.character(gender) ) ) %>% dplyr::select( GenomeScan_ID, gender, id ), by = c("sample.id" = "GenomeScan_ID") ) norm.plot <- norm.data %>% ggplot2::ggplot( mapping = ggplot2::aes( x = id, y = expr.value, fill = gender ) ) + ggplot2::geom_boxplot() + ggplot2::scale_fill_manual( values = c( "male" = "blue", "female" = "red", "water" = "green" ) ) + ggplot2::labs( title = "Normalized expression values distribution", y = "Normalized expression values (limma::voom)", x = "Sample", gender = "Gender" ) + ggprism::theme_prism() + ggplot2::theme( axis.text.x = ggplot2::element_text(angle = 90) ) ggplot2::ggsave( filename = file.path(results.dir, "counts.per.sample.normalised.png"), plot = norm.plot, width = 40, height = 20, units = "cm" ) expr.data <- expression.data %>% tidyr::gather( key = "sample.id", value = "expr.value", -Gene ) %>% dplyr::filter( expr.value != 0 ) %>% dplyr::left_join( y = master.Table %>% dplyr::filter( !is.na(GenomeScan_ID) ) %>% dplyr::mutate( id = dplyr::case_when( stringr::str_trim(gender) == "" ~ paste0("Water ", dplyr::row_number()), TRUE ~ sample.id ), gender = dplyr::case_when( stringr::str_trim(gender) == "" ~ "water", !is.na(gender) ~ as.character(gender) ) ) %>% dplyr::select( GenomeScan_ID, gender, id ), by = c("sample.id" = "GenomeScan_ID") ) expr.plot <- expr.data %>% ggplot2::ggplot( mapping = ggplot2::aes( x = id, y = expr.value, fill = gender ) ) + ggplot2::geom_boxplot() + ggplot2::scale_fill_manual( values = c( "male" = "blue", "female" = "red", "water" = "green" ) ) + ggplot2::scale_y_continuous(trans='log2') + ggplot2::labs( title = "Raw expression values distribution, without zero's", y = "Expression values", x = "Sample", gender = "Gender" ) + ggprism::theme_prism() + ggplot2::theme( axis.text.x = ggplot2::element_text(angle = 90) ) ggplot2::ggsave( filename = file.path(results.dir, "counts.per.sample.raw.zeros.removed.png"), plot = expr.plot, width = 40, height = 20, units = "cm" )