system_genetics/rnaseq/step5_count/snippet.sh

29 lines
1022 B
Bash

#!/bin/bash
PROJECT_DIRECTORY="/groups/umcg-griac/tmp01/rawdata/$(whoami)/rnaseq"
COUNT_OUTPUT="${PROJECT_DIRECTORY}/step5"
mkdir -p "${COUNT_OUTPUT}"
# Storage location of annotation on Gearshift.
REFERENCE_DATA="/groups/umcg-griac/prm03/rawdata/reference/genome"
GTF_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.100.gtf"
# Where our alignment file was stored.
BAM="${PROJECT_DIRECTORY}/step3/alignment/sample1_Aligned.sortedByCoord.out.bam"
# Compute counts using htseq-count.
#
# N.B.:
# - If you are processing multiple files, consider using the `--nprocesses` flag
# to distribute computation of the files to different cores.
# - The BAM file must be position sorted. If you used STAR with the
# `SortedByCoordinate` option you should be okay. If not, sort your BAM file
# using `samtools sort`.
# - By default, strand aware library preparation is assumed. If not, specify the
# `--stranded` flag.
htseq-count \
--order pos \
-f bam \
${BAM} \
${GTF_FILE} \
> ${COUNT_OUTPUT}/counts.txt