system_genetics/rnaseq/step2_trim/snippet.sh

64 lines
1.7 KiB
Bash

#!/bin/bash
#SBATCH --job-name=trimming
#SBATCH --time=2-00:00:00
#SBATCH --ntasks=1
#SBATCH --mem=15G
#SBATCH --qos=regular
set -e
set -u
set -x
set -o pipefail
module purge
module load TrimGalore
dir_raw_fastq="$(pwd)/fastq/raw"
dir_trimmed_fastq="$(pwd)/fastq/trimmed"
dir_trimmed_fastq_reports="$(pwd)/fastq/trimmed/reports"
adapter_3p="TGGAATTCTCGG" # is _R1
adapter_5p="GATCGTCGGACT" # is _R2
[ -d "${dir_trimmed_fastq_reports}" ] || mkdir -p "${dir_trimmed_fastq_reports}"
# Trim all adapters from the sequences
while IFS=, read -r GSID Sample
do
echo ">> Executing trimming of $GSID (${#GSID})"
if [ "${#GSID}" == "18" ]; then # check length - don't include sample pools
for fqFile in $(find -L "$dir_raw_fastq" -maxdepth 1 -type f -iname "*${GSID}*.fastq.gz"); do
newFilename=${fqFile/.fastq.gz/_trimmed.fq.gz}
if [ -f "${newFilename}" ]; then
echo ">>File exists ${newFilename}. Skipping"
else
echo ">>File: ${fqFile}"
if grep -q "_R1" <<< $(basename "$fqFile"); then
adapter_seq=$adapter_3p
elif grep -q "_R2" <<< $(basename "$fqFile"); then
adapter_seq=$adapter_5p
fi
echo ">>Trimming with sequence ${adapter_seq}"
trim_galore \
--adapter "$adapter_seq" \
--length $(printf "$adapter_seq" | wc -m) \
--output_dir "${dir_trimmed_fastq}" \
--fastqc_args "--noextract" \
"${fqFile}" &
fi
done
else
echo "Skipped."
fi
done < samples.csv
wait
mv ${dir_trimmed_fastq}/*.zip "${dir_trimmed_fastq_reports}"
mv ${dir_trimmed_fastq}/*.txt "${dir_trimmed_fastq_reports}"
mv ${dir_trimmed_fastq}/*.html "${dir_trimmed_fastq_reports}"