Add 'rnaseq/step5_count/snippet_counts'

2021-02-12 15:54:26 +01:00
2 changed files with 18 additions and 53 deletions
--- a/rnaseq/step3_align/snippet.sh
+++ b/rnaseq/step3_align/snippet.sh
@@ -1,53 +0,0 @@
 #!/bin/bash
 #
 # Align reads against reference genome.
 STORAGE="/groups/umcg-griac/tmp04/rawdata/$(whoami)/step3"
 # Store genome index in this location:.
 GENOME_INDEX="${STORAGE}/genome_index"
 mkdir -p "${GENOME_INDEX}"
 # Store the generated `Aligned.sortedByCoord.out.bam` in this dir.
 ALIGNMENT_OUTPUT="${STORAGE}/alignment"
 mkdir -p "${ALIGNMENT_OUTPUT}"
 # 1) Generate genome index.
 #
 # N.B.:
 # - We're assuming a read size of 100 bp (--sjdbOverhang 100). Refer back to the
 #   previous quality control steps if you are unsure about the size. In case of
 #   reads of varying length, the ideal value is max(ReadLength)-1.
 # - We're using gzip compressed reference data (--readFilesCommand zcat), i.e.,
 #   .gtf.gz and fa.gz. If not, you can remove the `zcat` flag.
 # Storage location reference data (in this case on calculon).
 REFERENCE_DATA="/groups/umcg-griac/prm02/rawdata/reference/genome"
 GTF_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.100.gtf.gz"
 FASTA_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
 STAR \
    --runThreadN 8 \
    --runMode genomeGenerate \
    --readFilesCommand zcat \
    --sjdbOverhang 100 \
    --genomeFastaFiles ${FASTA_FILE} \
    --sjdbGTFfile ${GTF_FILE} \
    --genomeDir ${GENOME_INDEX}
 # 2) Do the actual alignment.
 #
 # N.B.:
 # - We are assuming paired-end, gzip compressed (--readFilesCommand zcat)  FastQ
 #   files.
 # THe compressed paired-end FastQ's that we are aligning.
 R1="sample1_R1.fastq.gz"
 R2="sample1_R2.fastq.gz"
 STAR \
    --runThreadN 8 \
    --readFilesCommand zcat \
    --readFilesIn "${R1}" "${R2}" \
    --outSAMtype BAM SortedByCoordinate \
    --genomeDir ${GENOME_INDEX} \
    --outFileNamePrefix "${ALIGNMENT_OUTPUT}"
--- a/rnaseq/step5_count/snippet_counts
+++ b/rnaseq/step5_count/snippet_counts
@@ -0,0 +1,18 @@
 ## Generate Raw Counts using FeatureCounts/HTSeqCounts 
 ## Load package Subread (packge within which FeatureCounts is present)
 + ml Subread/1.6.4-foss-2015b FeatureCounts
 ## Features used in FeatureCounts : 
 ## -s : reversely stranded 
 ## -T : number of threads used 
 ## -a : reference genome 
 ## -o : output file 
 + featureCounts \
 + --a "path to reference genome" \
 + --T 6 \
 + --o "path to output file" \
 + --s 2