added description of general files

added fastQC code snippet (.pl and .sh scripts)
Update 'rnaseq/step2_trim/snippet.sh'
2021-02-22 15:20:41 +01:00 · 2021-02-15 15:54:21 +01:00 · 2021-02-13 19:13:13 +01:00
4 changed files with 86 additions and 0 deletions
--- a/rnaseq/general_files/describing_general_files.docx
+++ b/rnaseq/general_files/describing_general_files.docx
--- a/rnaseq/step1_fastqc/00_fastqc.pl
+++ b/rnaseq/step1_fastqc/00_fastqc.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/perl -w
+use strict;
+
+# this script was made with consideration for UMI-deduplicating.
+# this is because there are three .fastq files for each sample. 
+# the provider states the info about which file contains which info,
+# but in our case, from GenomeScan in Leiden, R2 contains the UMI read.
+# R1 and R3 contain sequencing information from paired-end sequencing
+
+foreach my $file1 ( <*_R1.fastq.gz> ) {
+    my $file2 = $file1;
+    $file2 =~ s/\_R1\./_R2./;
+    my $file3 =~ s/\_R3\./_R2./;
+    die "file1==file2" if $file1 eq $file2;
+    my $sample = $file1;
+    $sample =~ s/\_R1\.fastq\.gz$//;
+    mkdir $sample.'_R1', 0700;
+    system join(' ', 'fastqc', '-o', $sample.'_R1', $file1);
+    mkdir $sample.'_R2', 0700;
+    system join(' ', 'fastqc', '-o', $sample.'_R2', $file2);
+    mkdir $sample.'_R3', 0700;
+    system join(' ', 'fastqc', '-o', $sample.'_R3', $file3)
+}
--- a/rnaseq/step1_fastqc/00_fastqc.sh
+++ b/rnaseq/step1_fastqc/00_fastqc.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+#SBATCH --job-name=FastQC.for.alveolar_type_2
+#SBATCH --comment=FastQC.for.alveolar_type_2
+#SBATCH --time=48:00:00
+#SBATCH --mincpus=2
+#SBATCH --mem=20G
+#SBATCH --qos=priority
+
+# For 173 samples, it will take about 24 hrs to run with about 15Gb of memory.
+# Should probably parallelize the perl script/make it a bash/slurm script.
+
+module purge
+module load Perl/5.26.2-foss-2015b-bare
+module load BioPerl/1.6.924-foss-2015b-Perl-5.22.0
+module load Java/11.0.2
+module load FastQC/0.11.7-Java-1.8.0_144-unlimited_JCE
+
+# Please see
+# https://www.youtube.com/watch?v=0Rj_xNuyOyQ
+
+cd /groups/umcg-griac/tmp04/projects/umcg-rbults/alveolar_type2_fastq/
+perl scripts/00_fastqc.pl
+
+mkdir rene_FastQC.results
+find . -maxdepth 1 -type d -iname "*_R[123]" -exec mv {} ./rene_FastQC.results/ \;
+#find . -maxdepth 1 -type f -iname "*.htm*" -exec mv {} ./FastQC.results/ \;
+
--- a/rnaseq/step2_trim/snippet.sh
+++ b/rnaseq/step2_trim/snippet.sh
@@ -0,0 +1,36 @@
+## this script is to generate jobs of trimming for each samples on the cluster
+## please run this script first and then submit the jobs for each samples
+## reference: http://www.usadellab.org/cms/?page=trimmomatic
+
+#!/bin/bash
+# $1 indicates the path of raw samples. 
+# In the input folder, one sample has one independent folder with two pair-end f
+astq files. 
+# The folder name should be the sample name. 
+# the fastq file should be sample_1.fastq and sample_2.fastq 
+# please prepare a sample.list that include file names for each sample
+
+out="/ * your output folder * /"
+input="/ * your input folder * /"
+cat sample.list | while read line
+
+do
+sample=$(echo $line)
+echo '#!/bin/bash'  > rnaseq.${sample}.sh
+echo "#SBATCH --job-name=RNAseq.${sample}" >> rnaseq.${sample}.sh
+echo "#SBATCH --error=RNAseq.${sample}.err" >> rnaseq.${sample}.sh
+echo "#SBATCH --output=RNAseq.${sample}.out" >> rnaseq.${sample}.sh
+echo "#SBATCH --mem=15gb" >> rnaseq.${sample}.sh
+echo "#SBATCH --time=6:00:00" >> rnaseq.${sample}.sh
+echo "#SBATCH --cpus-per-task=6" >> rnaseq.${sample}.sh
+
+echo "ml Java" >>rnaseq.${sample}.sh
+
+echo "java -jar /* your folder of software */trimmomatic-0.36.jar PE \
+  -phred33 /$input/${sample}\_1.fq.gz /$input/${sample}\_2.fq.gz \
+   $out/trimmomatic/${sample}\_1_paired.fq $out/trimmomatic/${sample}\_1_unpaired.fq \
+   $out/trimmomatic/${sample}\_2_paired.fq $out/trimmomatic/${sample}\_2_unpaired.fq \
+   ILLUMINACLIP: TruSeq3-PE.fa:2:30:10 \
+   LEADING:3 TRAILING:3 SLIDINGWINDOW:4:25 HEADCROP:8 MINLEN:50" >> rnaseq.${sample}.sh
+
+done
Author	SHA1	Message	Date
Rene Bults	e3c88c059f	added description of general files	2021-02-22 15:20:41 +01:00
Rene Bults	1110d00e8c	added fastQC code snippet (.pl and .sh scripts)	2021-02-15 15:54:21 +01:00
C. Qi	4b61c099f6	Update 'rnaseq/step2_trim/snippet.sh'	2021-02-13 19:13:13 +01:00