#!/bin/bash
PROJECT_DIRECTORY="/groups/umcg-griac/tmp01/rawdata/$(whoami)/rnaseq"
COUNT_OUTPUT="${PROJECT_DIRECTORY}/step5"
mkdir -p "${COUNT_OUTPUT}"

# Storage location of annotation on Gearshift.
REFERENCE_DATA="/groups/umcg-griac/prm03/rawdata/reference/genome"
GTF_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.100.gtf"

# Where our alignment file was stored.
BAM="${PROJECT_DIRECTORY}/step3/alignment/sample1_Aligned.sortedByCoord.out.bam"

# Compute counts using htseq-count.
#
# N.B.:
# - If you are processing multiple files, consider using the `--nprocesses` flag
#   to distribute computation of the files to different cores.
# - The BAM file must be position sorted. If you used STAR with the
#   `SortedByCoordinate` option you should be okay. If not, sort your BAM file
#   using `samtools sort`.
# - By default, strand aware library preparation is assumed. If not, specify the
#   `--stranded` flag.
htseq-count \
    --order pos \
    ${BAM} \
    ${GTF_FILE} \
    > ${COUNT_OUTPUT}/counts.txt