forked from GRIAC/system_genetics
Added description and some comments for UMi deduplication
This commit is contained in:
parent
a7651f6a21
commit
c05d3f7868
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/perl -w
|
#!/usr/bin/perl -w
|
||||||
use strict;
|
use strict;
|
||||||
use Parallel::ForkManager;
|
use Parallel::ForkManager;
|
||||||
|
# this script creats one file with UMI unique reads and one with UMI duplicated reads
|
||||||
my @torun = ();
|
my @torun = ();
|
||||||
foreach my $file ( <*Aligned.sortedByCoord.out.bam> ) {
|
foreach my $file ( <*Aligned.sortedByCoord.out.bam> ) {
|
||||||
push @torun, $file;
|
push @torun, $file;
|
||||||
@ -27,9 +27,9 @@ foreach my $file ( @torun ) {
|
|||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
my ( $id, $flag, $chr, $pos, $mapq, $cigar, $chr2, $pos2, $tlen ) = split /\t/;
|
my ( $id, $flag, $chr, $pos, $mapq, $cigar, $chr2, $pos2, $tlen ) = split /\t/;
|
||||||
next if $flag & 256 or $flag & 512 or $flag & 1024;
|
next if $flag & 256 or $flag & 512 or $flag & 1024; #skip if the read is not primary alignment/read fails platform/vendor quality checks/read is PCR or optical duplicate
|
||||||
# foreach ( 256, 512, 1024 ) { $flag-=$_ if $flag&$_ }
|
# foreach ( 256, 512, 1024 ) { $flag-=$_ if $flag&$_ }
|
||||||
my ( $bc ) = $id =~ m/\:([GATCN\d]+)$/;
|
my ( $bc ) = $id =~ m/\:([GATCN\d]+)$/; #extract UMI barcode
|
||||||
my $uniq = join( ':', $chr, $pos, $flag, $tlen, $bc );
|
my $uniq = join( ':', $chr, $pos, $flag, $tlen, $bc );
|
||||||
my $pos_ = $pos-1;
|
my $pos_ = $pos-1;
|
||||||
while ( $cigar =~ m/(\d+)([SHMDIN=])/g ) {
|
while ( $cigar =~ m/(\d+)([SHMDIN=])/g ) {
|
||||||
|
Loading…
Reference in New Issue
Block a user