diff --git a/rnaseq/umi/07_remove_dups.pl b/rnaseq/umi/07_remove_dups.pl index 2aa2859..8834e38 100644 --- a/rnaseq/umi/07_remove_dups.pl +++ b/rnaseq/umi/07_remove_dups.pl @@ -2,6 +2,7 @@ use strict; use Parallel::ForkManager; # this script creats one file with UMI unique reads and one with UMI duplicated reads +# as input you need aligned sorted by coordinate bam file my @torun = (); foreach my $file ( <*Aligned.sortedByCoord.out.bam> ) { push @torun, $file; @@ -33,12 +34,12 @@ foreach my $file ( @torun ) { my $uniq = join( ':', $chr, $pos, $flag, $tlen, $bc ); my $pos_ = $pos-1; while ( $cigar =~ m/(\d+)([SHMDIN=])/g ) { - $pos_+=$1 if $2 eq 'M' or $2 eq '=' or $2 eq 'D' or $2 eq 'N'; + $pos_+=$1 if $2 eq 'M' or $2 eq '=' or $2 eq 'D' or $2 eq 'N'; # find position for minus strand } my $uniq2 = join( ':', $chr, $pos_, $flag, $tlen, $bc ); if ( exists($duplicates{$id}) or # already marked as duplicate ( not($flag&16) and ++$seen{ $uniq } > 1 ) or # plus strand - ( $flag&16 and ++$seen{ $uniq2 } > 1 ) + ( $flag&16 and ++$seen{ $uniq2 } > 1 ) #minus strand ) { print F2; $dups++;