Merge pull request 'Comments to umi deduplication script' (#8) from s3970582/system_genetics:master into master

Reviewed-on: #8
This commit is contained in:
T. Karp 2021-02-23 15:24:45 +01:00
commit 4443b3c2a9
1 changed files with 3 additions and 2 deletions

View File

@ -2,6 +2,7 @@
use strict;
use Parallel::ForkManager;
# this script creats one file with UMI unique reads and one with UMI duplicated reads
# as input you need aligned sorted by coordinate bam file
my @torun = ();
foreach my $file ( <*Aligned.sortedByCoord.out.bam> ) {
push @torun, $file;
@ -33,12 +34,12 @@ foreach my $file ( @torun ) {
my $uniq = join( ':', $chr, $pos, $flag, $tlen, $bc );
my $pos_ = $pos-1;
while ( $cigar =~ m/(\d+)([SHMDIN=])/g ) {
$pos_+=$1 if $2 eq 'M' or $2 eq '=' or $2 eq 'D' or $2 eq 'N';
$pos_+=$1 if $2 eq 'M' or $2 eq '=' or $2 eq 'D' or $2 eq 'N'; # find position for minus strand
}
my $uniq2 = join( ':', $chr, $pos_, $flag, $tlen, $bc );
if ( exists($duplicates{$id}) or # already marked as duplicate
( not($flag&16) and ++$seen{ $uniq } > 1 ) or # plus strand
( $flag&16 and ++$seen{ $uniq2 } > 1 )
( $flag&16 and ++$seen{ $uniq2 } > 1 ) #minus strand
) {
print F2;
$dups++;