diff --git a/rnaseq/umi/07_remove_dups.pl b/rnaseq/umi/07_remove_dups.pl index 94f3acc..2aa2859 100644 --- a/rnaseq/umi/07_remove_dups.pl +++ b/rnaseq/umi/07_remove_dups.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl -w use strict; use Parallel::ForkManager; - +# this script creats one file with UMI unique reads and one with UMI duplicated reads my @torun = (); foreach my $file ( <*Aligned.sortedByCoord.out.bam> ) { push @torun, $file; @@ -27,9 +27,9 @@ foreach my $file ( @torun ) { next; } my ( $id, $flag, $chr, $pos, $mapq, $cigar, $chr2, $pos2, $tlen ) = split /\t/; - next if $flag & 256 or $flag & 512 or $flag & 1024; + next if $flag & 256 or $flag & 512 or $flag & 1024; #skip if the read is not primary alignment/read fails platform/vendor quality checks/read is PCR or optical duplicate # foreach ( 256, 512, 1024 ) { $flag-=$_ if $flag&$_ } - my ( $bc ) = $id =~ m/\:([GATCN\d]+)$/; + my ( $bc ) = $id =~ m/\:([GATCN\d]+)$/; #extract UMI barcode my $uniq = join( ':', $chr, $pos, $flag, $tlen, $bc ); my $pos_ = $pos-1; while ( $cigar =~ m/(\d+)([SHMDIN=])/g ) {