Comments to umi deduplication script #8
|
@ -2,6 +2,7 @@
|
||||||
use strict;
|
use strict;
|
||||||
use Parallel::ForkManager;
|
use Parallel::ForkManager;
|
||||||
# this script creats one file with UMI unique reads and one with UMI duplicated reads
|
# this script creats one file with UMI unique reads and one with UMI duplicated reads
|
||||||
|
# as input you need aligned sorted by coordinate bam file
|
||||||
my @torun = ();
|
my @torun = ();
|
||||||
foreach my $file ( <*Aligned.sortedByCoord.out.bam> ) {
|
foreach my $file ( <*Aligned.sortedByCoord.out.bam> ) {
|
||||||
push @torun, $file;
|
push @torun, $file;
|
||||||
|
@ -33,12 +34,12 @@ foreach my $file ( @torun ) {
|
||||||
my $uniq = join( ':', $chr, $pos, $flag, $tlen, $bc );
|
my $uniq = join( ':', $chr, $pos, $flag, $tlen, $bc );
|
||||||
my $pos_ = $pos-1;
|
my $pos_ = $pos-1;
|
||||||
while ( $cigar =~ m/(\d+)([SHMDIN=])/g ) {
|
while ( $cigar =~ m/(\d+)([SHMDIN=])/g ) {
|
||||||
$pos_+=$1 if $2 eq 'M' or $2 eq '=' or $2 eq 'D' or $2 eq 'N';
|
$pos_+=$1 if $2 eq 'M' or $2 eq '=' or $2 eq 'D' or $2 eq 'N'; # find position for minus strand
|
||||||
}
|
}
|
||||||
my $uniq2 = join( ':', $chr, $pos_, $flag, $tlen, $bc );
|
my $uniq2 = join( ':', $chr, $pos_, $flag, $tlen, $bc );
|
||||||
if ( exists($duplicates{$id}) or # already marked as duplicate
|
if ( exists($duplicates{$id}) or # already marked as duplicate
|
||||||
( not($flag&16) and ++$seen{ $uniq } > 1 ) or # plus strand
|
( not($flag&16) and ++$seen{ $uniq } > 1 ) or # plus strand
|
||||||
( $flag&16 and ++$seen{ $uniq2 } > 1 )
|
( $flag&16 and ++$seen{ $uniq2 } > 1 ) #minus strand
|
||||||
) {
|
) {
|
||||||
print F2;
|
print F2;
|
||||||
$dups++;
|
$dups++;
|
||||||
|
|
Loading…
Reference in New Issue