added fastQC code snippet (.pl and .sh scripts)

2021-02-15 15:54:21 +01:00
parent 4b61c099f6
commit 1110d00e8c
2 changed files with 50 additions and 0 deletions
--- a/rnaseq/step1_fastqc/00_fastqc.pl
+++ b/rnaseq/step1_fastqc/00_fastqc.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/perl -w
+use strict;
+
+# this script was made with consideration for UMI-deduplicating.
+# this is because there are three .fastq files for each sample. 
+# the provider states the info about which file contains which info,
+# but in our case, from GenomeScan in Leiden, R2 contains the UMI read.
+# R1 and R3 contain sequencing information from paired-end sequencing
+
+foreach my $file1 ( <*_R1.fastq.gz> ) {
+    my $file2 = $file1;
+    $file2 =~ s/\_R1\./_R2./;
+    my $file3 =~ s/\_R3\./_R2./;
+    die "file1==file2" if $file1 eq $file2;
+    my $sample = $file1;
+    $sample =~ s/\_R1\.fastq\.gz$//;
+    mkdir $sample.'_R1', 0700;
+    system join(' ', 'fastqc', '-o', $sample.'_R1', $file1);
+    mkdir $sample.'_R2', 0700;
+    system join(' ', 'fastqc', '-o', $sample.'_R2', $file2);
+    mkdir $sample.'_R3', 0700;
+    system join(' ', 'fastqc', '-o', $sample.'_R3', $file3)
+}
--- a/rnaseq/step1_fastqc/00_fastqc.sh
+++ b/rnaseq/step1_fastqc/00_fastqc.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+#SBATCH --job-name=FastQC.for.alveolar_type_2
+#SBATCH --comment=FastQC.for.alveolar_type_2
+#SBATCH --time=48:00:00
+#SBATCH --mincpus=2
+#SBATCH --mem=20G
+#SBATCH --qos=priority
+
+# For 173 samples, it will take about 24 hrs to run with about 15Gb of memory.
+# Should probably parallelize the perl script/make it a bash/slurm script.
+
+module purge
+module load Perl/5.26.2-foss-2015b-bare
+module load BioPerl/1.6.924-foss-2015b-Perl-5.22.0
+module load Java/11.0.2
+module load FastQC/0.11.7-Java-1.8.0_144-unlimited_JCE
+
+# Please see
+# https://www.youtube.com/watch?v=0Rj_xNuyOyQ
+
+cd /groups/umcg-griac/tmp04/projects/umcg-rbults/alveolar_type2_fastq/
+perl scripts/00_fastqc.pl
+
+mkdir rene_FastQC.results
+find . -maxdepth 1 -type d -iname "*_R[123]" -exec mv {} ./rene_FastQC.results/ \;
+#find . -maxdepth 1 -type f -iname "*.htm*" -exec mv {} ./FastQC.results/ \;
+