diff --git a/rnaseq/step1_fastqc/00_fastqc.pl b/rnaseq/step1_fastqc/00_fastqc.pl new file mode 100755 index 0000000..a6bf57c --- /dev/null +++ b/rnaseq/step1_fastqc/00_fastqc.pl @@ -0,0 +1,23 @@ +#!/usr/bin/perl -w +use strict; + +# this script was made with consideration for UMI-deduplicating. +# this is because there are three .fastq files for each sample. +# the provider states the info about which file contains which info, +# but in our case, from GenomeScan in Leiden, R2 contains the UMI read. +# R1 and R3 contain sequencing information from paired-end sequencing + +foreach my $file1 ( <*_R1.fastq.gz> ) { + my $file2 = $file1; + $file2 =~ s/\_R1\./_R2./; + my $file3 =~ s/\_R3\./_R2./; + die "file1==file2" if $file1 eq $file2; + my $sample = $file1; + $sample =~ s/\_R1\.fastq\.gz$//; + mkdir $sample.'_R1', 0700; + system join(' ', 'fastqc', '-o', $sample.'_R1', $file1); + mkdir $sample.'_R2', 0700; + system join(' ', 'fastqc', '-o', $sample.'_R2', $file2); + mkdir $sample.'_R3', 0700; + system join(' ', 'fastqc', '-o', $sample.'_R3', $file3) +} diff --git a/rnaseq/step1_fastqc/00_fastqc.sh b/rnaseq/step1_fastqc/00_fastqc.sh new file mode 100644 index 0000000..c24cc86 --- /dev/null +++ b/rnaseq/step1_fastqc/00_fastqc.sh @@ -0,0 +1,27 @@ +#!/bin/bash +#SBATCH --job-name=FastQC.for.alveolar_type_2 +#SBATCH --comment=FastQC.for.alveolar_type_2 +#SBATCH --time=48:00:00 +#SBATCH --mincpus=2 +#SBATCH --mem=20G +#SBATCH --qos=priority + +# For 173 samples, it will take about 24 hrs to run with about 15Gb of memory. +# Should probably parallelize the perl script/make it a bash/slurm script. + +module purge +module load Perl/5.26.2-foss-2015b-bare +module load BioPerl/1.6.924-foss-2015b-Perl-5.22.0 +module load Java/11.0.2 +module load FastQC/0.11.7-Java-1.8.0_144-unlimited_JCE + +# Please see +# https://www.youtube.com/watch?v=0Rj_xNuyOyQ + +cd /groups/umcg-griac/tmp04/projects/umcg-rbults/alveolar_type2_fastq/ +perl scripts/00_fastqc.pl + +mkdir rene_FastQC.results +find . -maxdepth 1 -type d -iname "*_R[123]" -exec mv {} ./rene_FastQC.results/ \; +#find . -maxdepth 1 -type f -iname "*.htm*" -exec mv {} ./FastQC.results/ \; +