Compare commits
No commits in common. "981a983dd50548dff9633eedc9fbd42d99b6c633" and "59f1b425b169c2530fb073d739b15c0c9077cc76" have entirely different histories.
981a983dd5
...
59f1b425b1
@ -1,60 +0,0 @@
|
|||||||
#!/usr/bin/perl -w
|
|
||||||
use strict;
|
|
||||||
use Parallel::ForkManager;
|
|
||||||
# this script creats one file with UMI unique reads and one with UMI duplicated reads
|
|
||||||
my @torun = ();
|
|
||||||
foreach my $file ( <*Aligned.sortedByCoord.out.bam> ) {
|
|
||||||
push @torun, $file;
|
|
||||||
}
|
|
||||||
|
|
||||||
my $pm = Parallel::ForkManager->new( 12 );
|
|
||||||
foreach my $file ( @torun ) {
|
|
||||||
my $sample = $file;
|
|
||||||
$sample =~ s/Aligned\.sortedByCoord\.out\.bam$//;
|
|
||||||
next if -s $sample.'_uniq.bam';
|
|
||||||
warn "Parsing $sample\n";
|
|
||||||
$pm->start and next;
|
|
||||||
my %seen = ();
|
|
||||||
my %duplicates = ();
|
|
||||||
my ( $uniqs, $dups ) = ( 0, 0 );
|
|
||||||
open F, 'samtools view -h '.$file.' |';
|
|
||||||
open F1, '| samtools view -bS - >'.$sample.'_uniq.bam';
|
|
||||||
open F2, '| samtools view -bS - >'.$sample.'_dups.bam';
|
|
||||||
while ( <F> ) {
|
|
||||||
if ( m/^\@/ ) {
|
|
||||||
print F1;
|
|
||||||
print F2;
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
my ( $id, $flag, $chr, $pos, $mapq, $cigar, $chr2, $pos2, $tlen ) = split /\t/;
|
|
||||||
next if $flag & 256 or $flag & 512 or $flag & 1024; #skip if the read is not primary alignment/read fails platform/vendor quality checks/read is PCR or optical duplicate
|
|
||||||
# foreach ( 256, 512, 1024 ) { $flag-=$_ if $flag&$_ }
|
|
||||||
my ( $bc ) = $id =~ m/\:([GATCN\d]+)$/; #extract UMI barcode
|
|
||||||
my $uniq = join( ':', $chr, $pos, $flag, $tlen, $bc );
|
|
||||||
my $pos_ = $pos-1;
|
|
||||||
while ( $cigar =~ m/(\d+)([SHMDIN=])/g ) {
|
|
||||||
$pos_+=$1 if $2 eq 'M' or $2 eq '=' or $2 eq 'D' or $2 eq 'N';
|
|
||||||
}
|
|
||||||
my $uniq2 = join( ':', $chr, $pos_, $flag, $tlen, $bc );
|
|
||||||
if ( exists($duplicates{$id}) or # already marked as duplicate
|
|
||||||
( not($flag&16) and ++$seen{ $uniq } > 1 ) or # plus strand
|
|
||||||
( $flag&16 and ++$seen{ $uniq2 } > 1 )
|
|
||||||
) {
|
|
||||||
print F2;
|
|
||||||
$dups++;
|
|
||||||
$duplicates{$id} = 1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
print F1;
|
|
||||||
$uniqs++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close F;
|
|
||||||
close F1;
|
|
||||||
close F2;
|
|
||||||
warn "Unique: $uniqs (", 100*$uniqs/($uniqs+$dups), ")\n";
|
|
||||||
system( 'samtools', 'index', $sample.'_uniq.bam' );
|
|
||||||
# last;
|
|
||||||
$pm->finish;
|
|
||||||
}
|
|
||||||
$pm->wait_all_children;
|
|
0
rnaseq/umi/snippet.pl
Normal file
0
rnaseq/umi/snippet.pl
Normal file
Loading…
Reference in New Issue
Block a user