Compare commits
3 Commits
59f1b425b1
...
981a983dd5
Author | SHA1 | Date | |
---|---|---|---|
|
981a983dd5 | ||
|
c05d3f7868 | ||
|
a7651f6a21 |
60
rnaseq/umi/07_remove_dups.pl
Normal file
60
rnaseq/umi/07_remove_dups.pl
Normal file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/perl -w
|
||||
use strict;
|
||||
use Parallel::ForkManager;
|
||||
# this script creats one file with UMI unique reads and one with UMI duplicated reads
|
||||
my @torun = ();
|
||||
foreach my $file ( <*Aligned.sortedByCoord.out.bam> ) {
|
||||
push @torun, $file;
|
||||
}
|
||||
|
||||
my $pm = Parallel::ForkManager->new( 12 );
|
||||
foreach my $file ( @torun ) {
|
||||
my $sample = $file;
|
||||
$sample =~ s/Aligned\.sortedByCoord\.out\.bam$//;
|
||||
next if -s $sample.'_uniq.bam';
|
||||
warn "Parsing $sample\n";
|
||||
$pm->start and next;
|
||||
my %seen = ();
|
||||
my %duplicates = ();
|
||||
my ( $uniqs, $dups ) = ( 0, 0 );
|
||||
open F, 'samtools view -h '.$file.' |';
|
||||
open F1, '| samtools view -bS - >'.$sample.'_uniq.bam';
|
||||
open F2, '| samtools view -bS - >'.$sample.'_dups.bam';
|
||||
while ( <F> ) {
|
||||
if ( m/^\@/ ) {
|
||||
print F1;
|
||||
print F2;
|
||||
next;
|
||||
}
|
||||
my ( $id, $flag, $chr, $pos, $mapq, $cigar, $chr2, $pos2, $tlen ) = split /\t/;
|
||||
next if $flag & 256 or $flag & 512 or $flag & 1024; #skip if the read is not primary alignment/read fails platform/vendor quality checks/read is PCR or optical duplicate
|
||||
# foreach ( 256, 512, 1024 ) { $flag-=$_ if $flag&$_ }
|
||||
my ( $bc ) = $id =~ m/\:([GATCN\d]+)$/; #extract UMI barcode
|
||||
my $uniq = join( ':', $chr, $pos, $flag, $tlen, $bc );
|
||||
my $pos_ = $pos-1;
|
||||
while ( $cigar =~ m/(\d+)([SHMDIN=])/g ) {
|
||||
$pos_+=$1 if $2 eq 'M' or $2 eq '=' or $2 eq 'D' or $2 eq 'N';
|
||||
}
|
||||
my $uniq2 = join( ':', $chr, $pos_, $flag, $tlen, $bc );
|
||||
if ( exists($duplicates{$id}) or # already marked as duplicate
|
||||
( not($flag&16) and ++$seen{ $uniq } > 1 ) or # plus strand
|
||||
( $flag&16 and ++$seen{ $uniq2 } > 1 )
|
||||
) {
|
||||
print F2;
|
||||
$dups++;
|
||||
$duplicates{$id} = 1;
|
||||
}
|
||||
else {
|
||||
print F1;
|
||||
$uniqs++;
|
||||
}
|
||||
}
|
||||
close F;
|
||||
close F1;
|
||||
close F2;
|
||||
warn "Unique: $uniqs (", 100*$uniqs/($uniqs+$dups), ")\n";
|
||||
system( 'samtools', 'index', $sample.'_uniq.bam' );
|
||||
# last;
|
||||
$pm->finish;
|
||||
}
|
||||
$pm->wait_all_children;
|
Loading…
Reference in New Issue
Block a user