Added recommended empty cutoff function
This commit is contained in:
parent
48dc9d81e6
commit
c457e087be
@ -7,7 +7,8 @@
|
|||||||
# empty droplets and removing per gene the highest value found in the ambient
|
# empty droplets and removing per gene the highest value found in the ambient
|
||||||
# RNA if it's found to contaminate more than a certain threshold of droplets
|
# RNA if it's found to contaminate more than a certain threshold of droplets
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# This file contains base functions
|
# TODO
|
||||||
|
#
|
||||||
###############################################################################
|
###############################################################################
|
||||||
library(Matrix)
|
library(Matrix)
|
||||||
library(Seurat)
|
library(Seurat)
|
||||||
@ -22,6 +23,14 @@ library(qlcMatrix)
|
|||||||
# p: the where in 'i' and 'x' a new column starts
|
# p: the where in 'i' and 'x' a new column starts
|
||||||
# Dimnames: The names of the rows and columns
|
# Dimnames: The names of the rows and columns
|
||||||
remove.background = function(geneCellMatrix, ambientRNAprofile){
|
remove.background = function(geneCellMatrix, ambientRNAprofile){
|
||||||
|
# do some input checks first
|
||||||
|
if(!("dgCMatrix" %in% class(geneCellMatrix))){
|
||||||
|
cat("geneCellMatrix should be a sparse matrix of the \"dgCMatrix\" class")
|
||||||
|
stop()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Here is the actual functionality
|
||||||
for(gene in names(ambientProfile[ambientProfile > 0])){
|
for(gene in names(ambientProfile[ambientProfile > 0])){
|
||||||
# Determine the locations where the gene is not zero, therefore referenced in i
|
# Determine the locations where the gene is not zero, therefore referenced in i
|
||||||
iLocs = which(geneCellMatrix@Dimnames[[1]] == gene)
|
iLocs = which(geneCellMatrix@Dimnames[[1]] == gene)
|
||||||
@ -72,20 +81,23 @@ read.full.matrix = function(fullFolderLocation){
|
|||||||
}
|
}
|
||||||
|
|
||||||
##############
|
##############
|
||||||
|
# Turns out that cellranger output looks different from WriteMM output and Read10X can't read the latter
|
||||||
|
# TODO
|
||||||
|
# Commented out until I find a fix
|
||||||
|
|
||||||
write.corrected.matrix = function(correctedMatrix, folderLocation, correctedSignal){
|
# write.corrected.matrix = function(correctedMatrix, folderLocation, correctedSignal){
|
||||||
dir.create(folderLocation)
|
# dir.create(folderLocation)
|
||||||
|
#
|
||||||
writeMM(obj = correctedMatrix, file=paste(folderLocation, "/matrix.mtx", sep = ""))
|
# writeMM(obj = correctedMatrix, file=paste(folderLocation, "/matrix.mtx", sep = ""))
|
||||||
|
#
|
||||||
# save genes and cells names
|
# # save genes and cells names
|
||||||
write(x = rownames(correctedMatrix), file = paste(folderLocation, "/genes.tsv", sep = ""))
|
# write(x = rownames(correctedMatrix), file = paste(folderLocation, "/genes.tsv", sep = ""))
|
||||||
write(x = colnames(correctedMatrix), file = paste(folderLocation, "/barcodes.tsv", sep = ""))
|
# write(x = colnames(correctedMatrix), file = paste(folderLocation, "/barcodes.tsv", sep = ""))
|
||||||
|
#
|
||||||
correctedSignal = correctedSignal[correctedSignal > 0]
|
# correctedSignal = correctedSignal[correctedSignal > 0]
|
||||||
write.table(list(correctedSignal), file = paste(folderLocation, "/genesCorrectedFor.csv", sep = ""), row.names = TRUE, col.names = FALSE)
|
# write.table(list(correctedSignal), file = paste(folderLocation, "/genesCorrectedFor.csv", sep = ""), row.names = TRUE, col.names = FALSE)
|
||||||
|
#
|
||||||
}
|
# }
|
||||||
|
|
||||||
# describe the number of genes identified in the background
|
# describe the number of genes identified in the background
|
||||||
# and the number of genes failing the contaminiation chance threshold
|
# and the number of genes failing the contaminiation chance threshold
|
||||||
@ -132,7 +144,13 @@ plot.ambient.profile = function(ambientProfile){
|
|||||||
ylab = "Genes identified as contamination")
|
ylab = "Genes identified as contamination")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# I noticed that the number of genes removed tends to even out over time
|
||||||
|
# Test whether the point where this first happens is a good empty cutoff point
|
||||||
|
recommendedRemoval = function(ambientProfile){
|
||||||
|
highestNumberOfGenes = max(ambientProfile[,3])
|
||||||
|
firstOccurence = match(highestNumberOfGenes, ambientProfile[,3])
|
||||||
|
return(as.numeric(rownames(ambientProfile[firstOccurence,])))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user