#!/bin/bash PROJECT_DIRECTORY="/groups/umcg-griac/tmp01/rawdata/$(whoami)/rnaseq" COUNT_OUTPUT="${PROJECT_DIRECTORY}/step5" mkdir -p "${COUNT_OUTPUT}" # Storage location of annotation on Gearshift. REFERENCE_DATA="/groups/umcg-griac/prm03/rawdata/reference/genome" GTF_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.100.gtf" # Where our alignment file was stored. BAM="${PROJECT_DIRECTORY}/step3/alignment/sample1_Aligned.sortedByCoord.out.bam" # Compute counts using htseq-count. # # N.B.: # - If you are processing multiple files, consider using the `--nprocesses` flag # to distribute computation of the files to different cores. # - The BAM file must be position sorted. If you used STAR with the # `SortedByCoordinate` option you should be okay. If not, sort your BAM file # using `samtools sort`. # - By default, strand aware library preparation is assumed. If not, specify the # `--stranded` flag. htseq-count \ --order pos \ -f bam \ ${BAM} \ ${GTF_FILE} \ > ${COUNT_OUTPUT}/counts.txt