Tailored storage locations to calculon.

2021-02-11 10:47:21 +01:00
parent ef4a25f54b
commit d71460a7dd
1 changed files with 12 additions and 9 deletions
--- a/rnaseq/step3_align/snippet.sh
+++ b/rnaseq/step3_align/snippet.sh
@@ -2,7 +2,7 @@
 #
 # Align reads against reference genome.

-STORAGE="/groups/umcg-griac/tmp01/rawdata/$(whoami)/step3"
+STORAGE="/groups/umcg-griac/tmp04/rawdata/$(whoami)/step3"
 # Store genome index in this location:.
 GENOME_INDEX="${STORAGE}/genome_index"
 mkdir -p "${GENOME_INDEX}"
@@ -16,19 +16,22 @@ mkdir -p "${OUTPUT}"
 # - We're assuming a read size of 100 bp (--sjdbOverhang 100). Refer back to the
 #   previous quality control steps if you are unsure about the size. In case of
 #   reads of varying length, the ideal value is max(ReadLength)-1.
+# - We're using gzip compressed reference data (--readFilesCommand zcat), i.e.,
+#   .gtf.gz and fa.gz. If not, you can remove the `zcat` flag.

-# Where the reference data is stored.
+# Storage location reference data (in this case on calculon).
 REFERENCE_DATA="/groups/umcg-griac/prm02/rawdata/reference/genome"
-GTF_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.100.gtf"
-FASTA_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.dna.primary_assembly.fa"
+GTF_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.100.gtf.gz"
+FASTA_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"

 STAR \
    --runThreadN 8 \
-   --runMode genomeGenerate \
-   --sjdbOverhang 100 \
-   --genomeFastaFiles ${FASTA_FILE} \
-   --sjdbGTFfile ${GTF_FILE} \
-   --genomeDir ${GENOME_INDEX}
+    --runMode genomeGenerate \
+    --readFilesCommand zcat \
+    --sjdbOverhang 100 \
+    --genomeFastaFiles ${FASTA_FILE} \
+    --sjdbGTFfile ${GTF_FILE} \
+    --genomeDir ${GENOME_INDEX}


 # 2) Do the actual alignment.