diff --git a/rnaseq/step3_align/snippet.sh b/rnaseq/step3_align/snippet.sh index 61af8cd..d419a11 100644 --- a/rnaseq/step3_align/snippet.sh +++ b/rnaseq/step3_align/snippet.sh @@ -2,7 +2,7 @@ # # Align reads against reference genome. -STORAGE="/groups/umcg-griac/tmp01/rawdata/$(whoami)/step3" +STORAGE="/groups/umcg-griac/tmp04/rawdata/$(whoami)/step3" # Store genome index in this location:. GENOME_INDEX="${STORAGE}/genome_index" mkdir -p "${GENOME_INDEX}" @@ -16,19 +16,22 @@ mkdir -p "${OUTPUT}" # - We're assuming a read size of 100 bp (--sjdbOverhang 100). Refer back to the # previous quality control steps if you are unsure about the size. In case of # reads of varying length, the ideal value is max(ReadLength)-1. +# - We're using gzip compressed reference data (--readFilesCommand zcat), i.e., +# .gtf.gz and fa.gz. If not, you can remove the `zcat` flag. -# Where the reference data is stored. +# Storage location reference data (in this case on calculon). REFERENCE_DATA="/groups/umcg-griac/prm02/rawdata/reference/genome" -GTF_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.100.gtf" -FASTA_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.dna.primary_assembly.fa" +GTF_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.100.gtf.gz" +FASTA_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz" STAR \ --runThreadN 8 \ - --runMode genomeGenerate \ - --sjdbOverhang 100 \ - --genomeFastaFiles ${FASTA_FILE} \ - --sjdbGTFfile ${GTF_FILE} \ - --genomeDir ${GENOME_INDEX} + --runMode genomeGenerate \ + --readFilesCommand zcat \ + --sjdbOverhang 100 \ + --genomeFastaFiles ${FASTA_FILE} \ + --sjdbGTFfile ${GTF_FILE} \ + --genomeDir ${GENOME_INDEX} # 2) Do the actual alignment.