forked from GRIAC/system_genetics
64 lines
1.7 KiB
Bash
64 lines
1.7 KiB
Bash
#!/bin/bash
|
|
#SBATCH --job-name=trimming
|
|
#SBATCH --time=2-00:00:00
|
|
#SBATCH --ntasks=1
|
|
#SBATCH --mem=15G
|
|
#SBATCH --qos=regular
|
|
|
|
|
|
set -e
|
|
set -u
|
|
set -x
|
|
set -o pipefail
|
|
|
|
|
|
module purge
|
|
module load TrimGalore
|
|
|
|
|
|
dir_raw_fastq="$(pwd)/fastq/raw"
|
|
dir_trimmed_fastq="$(pwd)/fastq/trimmed"
|
|
dir_trimmed_fastq_reports="$(pwd)/fastq/trimmed/reports"
|
|
|
|
adapter_3p="TGGAATTCTCGG" # is _R1
|
|
adapter_5p="GATCGTCGGACT" # is _R2
|
|
|
|
|
|
[ -d "${dir_trimmed_fastq_reports}" ] || mkdir -p "${dir_trimmed_fastq_reports}"
|
|
|
|
|
|
# Trim all adapters from the sequences
|
|
while IFS=, read -r GSID Sample
|
|
do
|
|
echo ">> Executing trimming of $GSID (${#GSID})"
|
|
if [ "${#GSID}" == "18" ]; then # check length - don't include sample pools
|
|
for fqFile in $(find -L "$dir_raw_fastq" -maxdepth 1 -type f -iname "*${GSID}*.fastq.gz"); do
|
|
newFilename=${fqFile/.fastq.gz/_trimmed.fq.gz}
|
|
if [ -f "${newFilename}" ]; then
|
|
echo ">>File exists ${newFilename}. Skipping"
|
|
else
|
|
echo ">>File: ${fqFile}"
|
|
if grep -q "_R1" <<< $(basename "$fqFile"); then
|
|
adapter_seq=$adapter_3p
|
|
elif grep -q "_R2" <<< $(basename "$fqFile"); then
|
|
adapter_seq=$adapter_5p
|
|
fi
|
|
echo ">>Trimming with sequence ${adapter_seq}"
|
|
trim_galore \
|
|
--adapter "$adapter_seq" \
|
|
--length $(printf "$adapter_seq" | wc -m) \
|
|
--output_dir "${dir_trimmed_fastq}" \
|
|
--fastqc_args "--noextract" \
|
|
"${fqFile}" &
|
|
fi
|
|
done
|
|
else
|
|
echo "Skipped."
|
|
fi
|
|
done < samples.csv
|
|
|
|
|
|
wait
|
|
mv ${dir_trimmed_fastq}/*.zip "${dir_trimmed_fastq_reports}"
|
|
mv ${dir_trimmed_fastq}/*.txt "${dir_trimmed_fastq_reports}"
|
|
mv ${dir_trimmed_fastq}/*.html "${dir_trimmed_fastq_reports}" |