added fastQC code snippet (.pl and .sh scripts)

This commit is contained in:
Rene Bults 2021-02-15 15:54:21 +01:00
parent 4b61c099f6
commit 1110d00e8c
2 changed files with 50 additions and 0 deletions

View File

@ -0,0 +1,23 @@
#!/usr/bin/perl -w
use strict;
# this script was made with consideration for UMI-deduplicating.
# this is because there are three .fastq files for each sample.
# the provider states the info about which file contains which info,
# but in our case, from GenomeScan in Leiden, R2 contains the UMI read.
# R1 and R3 contain sequencing information from paired-end sequencing
foreach my $file1 ( <*_R1.fastq.gz> ) {
my $file2 = $file1;
$file2 =~ s/\_R1\./_R2./;
my $file3 =~ s/\_R3\./_R2./;
die "file1==file2" if $file1 eq $file2;
my $sample = $file1;
$sample =~ s/\_R1\.fastq\.gz$//;
mkdir $sample.'_R1', 0700;
system join(' ', 'fastqc', '-o', $sample.'_R1', $file1);
mkdir $sample.'_R2', 0700;
system join(' ', 'fastqc', '-o', $sample.'_R2', $file2);
mkdir $sample.'_R3', 0700;
system join(' ', 'fastqc', '-o', $sample.'_R3', $file3)
}

View File

@ -0,0 +1,27 @@
#!/bin/bash
#SBATCH --job-name=FastQC.for.alveolar_type_2
#SBATCH --comment=FastQC.for.alveolar_type_2
#SBATCH --time=48:00:00
#SBATCH --mincpus=2
#SBATCH --mem=20G
#SBATCH --qos=priority
# For 173 samples, it will take about 24 hrs to run with about 15Gb of memory.
# Should probably parallelize the perl script/make it a bash/slurm script.
module purge
module load Perl/5.26.2-foss-2015b-bare
module load BioPerl/1.6.924-foss-2015b-Perl-5.22.0
module load Java/11.0.2
module load FastQC/0.11.7-Java-1.8.0_144-unlimited_JCE
# Please see
# https://www.youtube.com/watch?v=0Rj_xNuyOyQ
cd /groups/umcg-griac/tmp04/projects/umcg-rbults/alveolar_type2_fastq/
perl scripts/00_fastqc.pl
mkdir rene_FastQC.results
find . -maxdepth 1 -type d -iname "*_R[123]" -exec mv {} ./rene_FastQC.results/ \;
#find . -maxdepth 1 -type f -iname "*.htm*" -exec mv {} ./FastQC.results/ \;