#!/bin/bash #SBATCH --job-name=trimming #SBATCH --time=2-00:00:00 #SBATCH --ntasks=1 #SBATCH --mem=15G #SBATCH --qos=regular set -e set -u set -x set -o pipefail module purge module load TrimGalore dir_raw_fastq="$(pwd)/fastq/raw" dir_trimmed_fastq="$(pwd)/fastq/trimmed" dir_trimmed_fastq_reports="$(pwd)/fastq/trimmed/reports" adapter_3p="TGGAATTCTCGG" # is _R1 adapter_5p="GATCGTCGGACT" # is _R2 [ -d "${dir_trimmed_fastq_reports}" ] || mkdir -p "${dir_trimmed_fastq_reports}" # Trim all adapters from the sequences while IFS=, read -r GSID Sample do echo ">> Executing trimming of $GSID (${#GSID})" if [ "${#GSID}" == "18" ]; then # check length - don't include sample pools for fqFile in $(find -L "$dir_raw_fastq" -maxdepth 1 -type f -iname "*${GSID}*.fastq.gz"); do newFilename=${fqFile/.fastq.gz/_trimmed.fq.gz} if [ -f "${newFilename}" ]; then echo ">>File exists ${newFilename}. Skipping" else echo ">>File: ${fqFile}" if grep -q "_R1" <<< $(basename "$fqFile"); then adapter_seq=$adapter_3p elif grep -q "_R2" <<< $(basename "$fqFile"); then adapter_seq=$adapter_5p fi echo ">>Trimming with sequence ${adapter_seq}" trim_galore \ --adapter "$adapter_seq" \ --length $(printf "$adapter_seq" | wc -m) \ --output_dir "${dir_trimmed_fastq}" \ --fastqc_args "--noextract" \ "${fqFile}" & fi done else echo "Skipped." fi done < samples.csv wait mv ${dir_trimmed_fastq}/*.zip "${dir_trimmed_fastq_reports}" mv ${dir_trimmed_fastq}/*.txt "${dir_trimmed_fastq_reports}" mv ${dir_trimmed_fastq}/*.html "${dir_trimmed_fastq_reports}"