added description of general files

added fastQC code snippet (.pl and .sh scripts)
2021-02-22 15:20:41 +01:00 · 2021-02-15 15:54:21 +01:00
15 changed files with 80 additions and 453 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +0,0 @@
-
-.DS_Store
-*.nosync
-*.RData
--- a/rnaseq/general_files/describing_general_files.docx
+++ b/rnaseq/general_files/describing_general_files.docx
--- a/rnaseq/step1_fastqc/00_fastqc.pl
+++ b/rnaseq/step1_fastqc/00_fastqc.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/perl -w
+use strict;
+
+# this script was made with consideration for UMI-deduplicating.
+# this is because there are three .fastq files for each sample. 
+# the provider states the info about which file contains which info,
+# but in our case, from GenomeScan in Leiden, R2 contains the UMI read.
+# R1 and R3 contain sequencing information from paired-end sequencing
+
+foreach my $file1 ( <*_R1.fastq.gz> ) {
+    my $file2 = $file1;
+    $file2 =~ s/\_R1\./_R2./;
+    my $file3 =~ s/\_R3\./_R2./;
+    die "file1==file2" if $file1 eq $file2;
+    my $sample = $file1;
+    $sample =~ s/\_R1\.fastq\.gz$//;
+    mkdir $sample.'_R1', 0700;
+    system join(' ', 'fastqc', '-o', $sample.'_R1', $file1);
+    mkdir $sample.'_R2', 0700;
+    system join(' ', 'fastqc', '-o', $sample.'_R2', $file2);
+    mkdir $sample.'_R3', 0700;
+    system join(' ', 'fastqc', '-o', $sample.'_R3', $file3)
+}
--- a/rnaseq/step1_fastqc/00_fastqc.sh
+++ b/rnaseq/step1_fastqc/00_fastqc.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+#SBATCH --job-name=FastQC.for.alveolar_type_2
+#SBATCH --comment=FastQC.for.alveolar_type_2
+#SBATCH --time=48:00:00
+#SBATCH --mincpus=2
+#SBATCH --mem=20G
+#SBATCH --qos=priority
+
+# For 173 samples, it will take about 24 hrs to run with about 15Gb of memory.
+# Should probably parallelize the perl script/make it a bash/slurm script.
+
+module purge
+module load Perl/5.26.2-foss-2015b-bare
+module load BioPerl/1.6.924-foss-2015b-Perl-5.22.0
+module load Java/11.0.2
+module load FastQC/0.11.7-Java-1.8.0_144-unlimited_JCE
+
+# Please see
+# https://www.youtube.com/watch?v=0Rj_xNuyOyQ
+
+cd /groups/umcg-griac/tmp04/projects/umcg-rbults/alveolar_type2_fastq/
+perl scripts/00_fastqc.pl
+
+mkdir rene_FastQC.results
+find . -maxdepth 1 -type d -iname "*_R[123]" -exec mv {} ./rene_FastQC.results/ \;
+#find . -maxdepth 1 -type f -iname "*.htm*" -exec mv {} ./FastQC.results/ \;
+
--- a/rnaseq/step1_fastqc/snippet.sh
+++ b/rnaseq/step1_fastqc/snippet.sh
@@ -1,12 +0,0 @@
-#!/bin/bash
-R1="sample1_R1.fastq.gz"
-R2="sample1_R2.fastq.gz"
-
-PROJECT_DIRECTORY="/groups/umcg-griac/tmp01/rawdata/$(whoami)/rnaseq"
-FASTQC_OUT="${PROJECT_DIRECTORY}/step1/"
-mkdir -p "${FASTQC_OUT}"
-
-# Run FastQC on paired-end data.
-fastqc \
-    -o "${FASTQC_OUT}" \
-    "${R1}" "${R2}"
--- a/rnaseq/step2_trim/snippet.sh
+++ b/rnaseq/step2_trim/snippet.sh
@@ -1,54 +1,36 @@
+## this script is to generate jobs of trimming for each samples on the cluster
+## please run this script first and then submit the jobs for each samples
+## reference: http://www.usadellab.org/cms/?page=trimmomatic
+
 #!/bin/bash
-#
-# Reference: http://www.usadellab.org/cms/?page=trimmomatic
+# $1 indicates the path of raw samples. 
+# In the input folder, one sample has one independent folder with two pair-end f
+astq files. 
+# The folder name should be the sample name. 
+# the fastq file should be sample_1.fastq and sample_2.fastq 
+# please prepare a sample.list that include file names for each sample

+out="/ * your output folder * /"
+input="/ * your input folder * /"
+cat sample.list | while read line

-module load Trimmomatic
-PROJECT_DIRECTORY="/groups/umcg-griac/tmp01/rawdata/$(whoami)/rnaseq"
-FASTQ_OUT="${PROJECT_DIRECTORY}/step2/"
-mkdir -p "${FASTQ_OUT}"
+do
+sample=$(echo $line)
+echo '#!/bin/bash'  > rnaseq.${sample}.sh
+echo "#SBATCH --job-name=RNAseq.${sample}" >> rnaseq.${sample}.sh
+echo "#SBATCH --error=RNAseq.${sample}.err" >> rnaseq.${sample}.sh
+echo "#SBATCH --output=RNAseq.${sample}.out" >> rnaseq.${sample}.sh
+echo "#SBATCH --mem=15gb" >> rnaseq.${sample}.sh
+echo "#SBATCH --time=6:00:00" >> rnaseq.${sample}.sh
+echo "#SBATCH --cpus-per-task=6" >> rnaseq.${sample}.sh

+echo "ml Java" >>rnaseq.${sample}.sh

-# Adapters can be found at
-# https://github.com/timflutre/trimmomatic/tree/master/adapters
-# But should be verified with FastQC, or in another way.
+echo "java -jar /* your folder of software */trimmomatic-0.36.jar PE \
+  -phred33 /$input/${sample}\_1.fq.gz /$input/${sample}\_2.fq.gz \
+   $out/trimmomatic/${sample}\_1_paired.fq $out/trimmomatic/${sample}\_1_unpaired.fq \
+   $out/trimmomatic/${sample}\_2_paired.fq $out/trimmomatic/${sample}\_2_unpaired.fq \
+   ILLUMINACLIP: TruSeq3-PE.fa:2:30:10 \
+   LEADING:3 TRAILING:3 SLIDINGWINDOW:4:25 HEADCROP:8 MINLEN:50" >> rnaseq.${sample}.sh

-
-# Trimmomatic example Paired end data.
-#
-# Flags:
-# - ILLUMINACLIP: Cut adapter and other illumina-specific sequences from the
-#                 read.
-# - SLIDINGWINDOW: Perform a sliding window trimming, cutting once the average
-#                  quality within the window falls below a threshold.
-# - LEADING: Cut bases off the start of a read, if below a threshold quality.
-# - TRAILING: Cut bases off the end of a read, if below a threshold quality.
-# - HEADCROP: Cut the specified number of bases from the start of the read.
-# - MINLEN: Drop the read if it is below a specified length.
-java -jar $EBROOTTRIMMOMATIC/trimmomatic.jar PE \
-  -phred33 \
-  sample1_R1.fastq.gz \
-  sample1_R2.fastq.gz \
-  "${FASTQ_OUT}/sample1_R1_paired.fastq.gz" \
-  "${FASTQ_OUT}/sample1_R1_unpaired.fastq.gz" \
-  "${FASTQ_OUT}/sample1_R2_paired.fastq.gz" \
-  "${FASTQ_OUT}/sample1_R2_unpaired.fastq.gz" \
-  ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 \
-  LEADING:3 \
-  TRAILING:3 \
-  SLIDINGWINDOW:4:25 \
-  HEADCROP:8 \
-  MINLEN:50
-
-
-# Example single end data.
-java -jar $EBROOTTRIMMOMATIC/trimmomatic.jar SE \
-  -phred33 \
-  sample1.fastq.gz \
-  output.fastq.gz \
-  ILLUMINACLIP:TruSeq3-SE:2:30:10 \
-  LEADING:3 \
-  TRAILING:3 \
-  SLIDINGWINDOW:4:15 \
-  HEADCROP:8 \
-  MINLEN:50
+done
--- a/rnaseq/step3_align/snippet.sh
+++ b/rnaseq/step3_align/snippet.sh
@@ -1,63 +0,0 @@
-#!/bin/bash
-#
-# Align reads against reference genome.
-
-REFERENCE_DATA="/groups/umcg-griac/prm03/rawdata/reference/genome"
-
-PROJECT_DIRECTORY="/groups/umcg-griac/tmp01/rawdata/$(whoami)/rnaseq"
-# Store the generated `sample1_Aligned.sortedByCoord.out.bam` in this dir.
-ALIGNMENT_OUTPUT="${PROJECT_DIRECTORY}/step3/alignment"
-mkdir -p "${ALIGNMENT_OUTPUT}"
-
-# 1) Generate genome index (optional).
-#
-# Depending on your read size, reference genome and annotation, you may need to
-# generate a new genome index. In most cases, this is not necessary and you can
-# directly use the pre-build genome index from the cluster:
-#
-# GENOME_INDEX="${REFERENCE_DATA}/index_GRCh38_gtf100_overhang100"
-#
-# and ignore the first STAR command below.
-#
-# N.B.:
-# - We're assuming a read size of 100 bp (--sjdbOverhang 100). An alternative
-#   cut-off is 150, for low-input methods. In general, refer back to the
-#   previous quality control steps if you are unsure about the size. In case of
-#   reads of varying length, the ideal value is max(ReadLength)-1.
-# - If you're using gzip compressed reference data, i.e., .gtf.gz and fa.gz,
-#   pass the `--readFilesCommand zcat` flag.
-
-# Store created genome index in this location:.
-GENOME_INDEX="${PROJECT_DIRECTORY}/step3/genome_index"
-mkdir -p "${GENOME_INDEX}"
-
-# Storage location reference data on Gearshift.
-GTF_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.100.gtf"
-FASTA_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.dna.primary_assembly.fa"
-
-STAR \
-    --runThreadN 8 \
-    --runMode genomeGenerate \
-    --sjdbOverhang 100 \
-    --genomeFastaFiles ${FASTA_FILE} \
-    --sjdbGTFfile ${GTF_FILE} \
-    --genomeDir ${GENOME_INDEX}
-
-
-# 2) Do the actual alignment.
-#
-# N.B.:
-# - We are assuming paired-end, gzip compressed (--readFilesCommand zcat)  FastQ
-#   files.
-
-# The compressed, paired-end, FastQ's after trimming (step 2).
-R1="${PROJECT_DIRECTORY}/step2/sample1_R1_paired.fastq.gz"
-R2="${PROJECT_DIRECTORY}/step2/sample1_R2_paired.fastq.gz"
-
-STAR \
-    --runThreadN 8 \
-    --readFilesCommand zcat \
-    --readFilesIn "${R1}" "${R2}" \
-    --outSAMtype BAM SortedByCoordinate \
-    --genomeDir ${GENOME_INDEX} \
-    --outFileNamePrefix "${ALIGNMENT_OUTPUT}/sample1_"
--- a/rnaseq/step4_multiqc/snippet.sh
+++ b/rnaseq/step4_multiqc/snippet.sh
@@ -1,5 +0,0 @@
-#!/bin/bash
-module load multiqc
-
-PROJECT_DIRECTORY="/groups/umcg-griac/tmp01/rawdata/$(whoami)/rnaseq"
-multiqc "${PROJECT_DIRECTORY}"
--- a/rnaseq/step5_count/snippet.sh
+++ b/rnaseq/step5_count/snippet.sh
@@ -1,28 +0,0 @@
-#!/bin/bash
-PROJECT_DIRECTORY="/groups/umcg-griac/tmp01/rawdata/$(whoami)/rnaseq"
-COUNT_OUTPUT="${PROJECT_DIRECTORY}/step5"
-mkdir -p "${COUNT_OUTPUT}"
-
-# Storage location of annotation on Gearshift.
-REFERENCE_DATA="/groups/umcg-griac/prm03/rawdata/reference/genome"
-GTF_FILE="${REFERENCE_DATA}/Homo_sapiens.GRCh38.100.gtf"
-
-# Where our alignment file was stored.
-BAM="${PROJECT_DIRECTORY}/step3/alignment/sample1_Aligned.sortedByCoord.out.bam"
-
-# Compute counts using htseq-count.
-#
-# N.B.:
-# - If you are processing multiple files, consider using the `--nprocesses` flag
-#   to distribute computation of the files to different cores.
-# - The BAM file must be position sorted. If you used STAR with the
-#   `SortedByCoordinate` option you should be okay. If not, sort your BAM file
-#   using `samtools sort`.
-# - By default, strand aware library preparation is assumed. If not, specify the
-#   `--stranded` flag.
-htseq-count \
-    --order pos \
-    -f bam \
-    ${BAM} \
-    ${GTF_FILE} \
-    > ${COUNT_OUTPUT}/counts.txt
--- a/rnaseq/step6_overall_QC/01
+++ b/rnaseq/step6_overall_QC/01
@@ -1,81 +0,0 @@
-# Principle Component Analysis
-# Normalized with limma::voom
-library(limma)
-library(tidyverse)
-
-
-# expression.data. Has columns:
-# - Gene (gene identifier)
-# - [Sample identifiers]
-expression.data <- "mrna_counts_table.csv"
-# results.dir. The directory of where to put the resulting tables (and later your plots.)
-results.dir <- "results"
-
-
-# PCA variables
-do.center = TRUE
-do.scale = FALSE
-
-
-# The analysis
-# We use prcomp to calculate the PCAs. Afterwards you should plot the results.
-norm.expr.data <- expression.data %>%
-	tibble::column_to_rownames("Gene")
-norm.expr.data <- norm.expr.data[rowSums(norm.expr.data) >= 10,] %>%
-	limma::voom() %>%
-	as.matrix()
-
-# Principle Component analysis
-results.dir.pca <- file.path(results.dir, "principle.components")
-dir.create(results.dir.pca, recursive=TRUE)
-
-norm.expr.data.pcs <- norm.expr.data %>%
-  t() %>%
-  stats::prcomp(
-  	center = do.center,
-  	scale. = do.scale
-  )
-
-# Write summary of PCAs to files
-pcs.summary <- summary(norm.expr.data.pcs)
-pcs.summary$importance %>%
-	t() %>%
-	as.data.frame() %>%
-	tibble::rownames_to_column("PC.name") %>%
-	readr::write_csv(
-		file.path(results.dir.pca, "importance.csv")
-	)
-
-pcs.summary$x %>%
-	t() %>%
-	as.data.frame() %>%
-	tibble::rownames_to_column("ensembl.id") %>%
-	readr::write_csv(
-		file.path(results.dir.pca, "values.csv")
-	)
-
-pcs.summary$rotation %>%
-	t() %>%
-	as.data.frame() %>%
-	tibble::rownames_to_column("sample.id") %>%
-	readr::write_csv(
-		file.path(results.dir.pca, "rotation.csv")
-	)
-
-data.frame(
-		rownames = names(pcs.summary$center),
-		center = pcs.summary$center,
-		scale = pcs.summary$scale
-	) %>%
-	readr::write_csv(
-		file.path(results.dir.pca, "rest.csv")
-	)
-
-# Not saved: pcs.summary$sdev,
-
-
-# Next thing to do:
-# - (Optional) scree plot - to determine the optimal cutoff for PCA inclusion based on explaination of variance
-# - (Optional) eigencorplot - to correlate PCAs to clinical variables so that you know which PCA to include for which analysis
-# - (Optional) pairsplot - plot multiple PCAs against each other in a single figure
-# - Plot the first couple of PCAs against each other
--- a/rnaseq/step6_overall_QC/02
+++ b/rnaseq/step6_overall_QC/02
@@ -1,112 +0,0 @@
-# Gender QC
-# Normalized with limma::voom
-library(GSVA)
-library(limma)
-library(edgeR)
-library(tidyverse)
-
-
-# expression.data. Has columns:
-# - Gene (gene identifier)
-# - [Sample identifiers]
-expression.data <- "mrna_counts_table.csv"
-# master.Table. Has columns:
-# - GenomeScan_ID
-# - gender, levels = c("male", "female")
-# - age
-# - factor(smoking.status, levels = c("Ex-smoker", "Current smoker"))
-master.Table <- "patient_table.csv"
-# results.dir. The directory of where to put the resulting tables (and later your plots.)
-results.dir <- "results"
-
-
-# The analysis
-# We first do a differential expression analysis on gender using EdgeR.
-# Afterwards you should plot these results.
-x.genes <- gene.data %>%
-    dplyr::filter(chromosome_name == "X") %>%
-    dplyr::pull(ensembl_gene_id)
-
-y.genes <- gene.data %>%
-    dplyr::filter(chromosome_name == "Y") %>%
-    dplyr::pull(ensembl_gene_id)
-
-# Gender QC
-results.dir.gender <- file.path(results.dir, "gender.check")
-dir.create(results.dir.gender, recursive=TRUE)
-
-# Differential Expression on Gender
-gender.qc.patients <- master.Table %>%
-    dplyr::filter(
-        !is.na(GenomeScan_ID)
-    ) %>%
-    dplyr::mutate(
-        gender = factor(gender, levels = c("male", "female")),
-        age = as.numeric(age),
-        smoking.status = factor(smoking.status, levels = c("Ex-smoker", "Current smoker"))
-    ) %>%
-    dplyr::filter(
-        !is.na(gender)
-    )
-gender.qc.sample.order <- gender.qc.patients %>%
-    dplyr::pull(GenomeScan_ID)
-
-gender.qc.expression.data <- expression.data %>%
-    tibble::column_to_rownames("Gene") %>%
-    select.columns.in.order(gender.qc.sample.order) %>%
-    as.matrix()
-
-design <- model.matrix( ~0 + gender, data = gender.qc.patients)
-
-DGEL <- edgeR::DGEList(gender.qc.expression.data)
-keep <- edgeR::filterByExpr(DGEL)
-keep[names(keep) %in% x.genes] <- TRUE
-keep[names(keep) %in% y.genes] <- TRUE
-DGEL <- DGEL[keep, , keep.lib.sizes=FALSE]
-DGEL <- edgeR::calcNormFactors(DGEL, method = "TMM")
-
-DGEL <- edgeR::estimateDisp(DGEL, design)
-fit <- edgeR::glmQLFit(DGEL,design)
-
-contrasts <- limma::makeContrasts(
-    gender = gendermale - genderfemale,
-    levels = design
-)
-qlf   <- edgeR::glmQLFTest(fit, contrast = contrasts[,"gender"])
-
-gender.qc.results <- edgeR::topTags(
-        qlf,
-        n=nrow(DGEL)
-    )$table %>%
-    tibble::rownames_to_column("ensembl.id") %>%
-    dplyr::left_join(
-        y = gene.data,
-        by = c("ensembl.id" = "ensembl_gene_id")
-    ) %>%
-    readr::write_csv(
-        file.path(results.dir.gender, "differential.expression.on.gender.csv")
-    )
-
-# Plotting of gender expression
-results.dir.gender.plot <- file.path(results.dir.gender, "img")
-dir.create(results.dir.gender.plot, recursive=TRUE)
-
-gender.qc.genes.to.plot <- gender.qc.results %>%
-    dplyr::arrange(PValue) %>%
-    dplyr::group_by(chromosome_name) %>%
-    dplyr::filter(
-        (
-            chromosome_name %in% c("X", "Y") &
-            FDR < 0.05 &
-            dplyr::row_number() <= 5
-        )
-    )
-
-
-# Next thing to do:
-# - Plot the normalized expressino values for the genes in gender.qc.genes.to.plot in a boxplot, split and colored by gender.
-# - (Optional) Do a GSVA with as genesets the genes found in gender.qc.genes.to.plot. Plot the boxplots as per the previous point.
-# - (Optional) Plot the number of Y-chromosome reads devided by the number of X chromosome reads in a boxplot as per the first point.
-#     x=$(samtools view -q 20 -f 2 $bam_file X | wc -l)
-#     y=$(samtools view -q 20 -f 2 $bam_file Y | wc -l)
-# - (Optional) Plot the number of Y-chromosome SNPs devided by the number of X chromosome SNPs in a boxplot as per the first point.
--- a/rnaseq/step6_overall_QC/03
+++ b/rnaseq/step6_overall_QC/03
@@ -1,39 +0,0 @@
-# Total counts per sample
-# Normalized with limma::voom
-library(limma)
-library(tidyverse)
-
-
-# expression.data. Has columns:
-# - Gene (gene identifier)
-# - [Sample identifiers]
-expression.data <- "mrna_counts_table.csv"
-# master.Table. Has columns:
-# - GenomeScan_ID
-# - gender, levels = c("male", "female")
-# - age
-# - factor(smoking.status, levels = c("Ex-smoker", "Current smoker"))
-master.Table <- "patient_table.csv"
-# results.dir. The directory of where to put the resulting tables (and later your plots.)
-results.dir <- "results"
-
-
-# The analysis
-# We calculate the number of mapped reads per sample.
-total.count.per.sample <- expression.data %>%
-	tibble::column_to_rownames("Gene") %>%
-	colSums()
-
-data.frame(
-		sample = names(total.count.per.sample),
-		counts = as.numeric(total.count.per.sample)
-	) %>%
-	readr::write_csv(file.path(results.dir, "total.counts.per.sample.csv"))
-
-
-# Next thing to do:
-# - Check the number of reads per sample in total.counts.per.sample.csv
-# - Plot the reads distribution (all reads) per sample in a boxplot.
-# - (Optional) Calculate the number of unmapped, multimapped, unique mapped to
-#   feature and unique mapped to no feature and plot these in a stacked bar graph.
-
--- a/rnaseq/step6_overall_QC/snippet.R
+++ b/rnaseq/step6_overall_QC/snippet.R
--- a/rnaseq/umi/07_remove_dups.pl
+++ b/rnaseq/umi/07_remove_dups.pl
@@ -1,61 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-use Parallel::ForkManager;
-# this script creats one file with UMI unique reads and one with UMI duplicated reads
-# as input you need aligned sorted by coordinate bam file
-my @torun = ();
-foreach my $file ( <*Aligned.sortedByCoord.out.bam> ) {
-    push @torun, $file;
-}
-
-my $pm = Parallel::ForkManager->new( 12 );
-foreach my $file ( @torun ) {
-    my $sample = $file;
-    $sample =~ s/Aligned\.sortedByCoord\.out\.bam$//;
-    next if -s $sample.'_uniq.bam';
-    warn "Parsing $sample\n";
-    $pm->start and next;
-    my %seen = ();
-    my %duplicates = ();
-    my ( $uniqs, $dups ) = ( 0, 0 );
-    open F, 'samtools view -h '.$file.' |';
-    open F1, '| samtools view -bS - >'.$sample.'_uniq.bam';
-    open F2, '| samtools view -bS - >'.$sample.'_dups.bam';
-    while ( <F> ) {
-        if ( m/^\@/ ) {
-            print F1;
-            print F2;
-            next;
-        }
-        my ( $id, $flag, $chr, $pos, $mapq, $cigar, $chr2, $pos2, $tlen ) = split /\t/;
-        next if $flag & 256 or $flag & 512 or $flag & 1024; #skip if the read is not primary alignment/read fails platform/vendor quality checks/read is PCR or optical duplicate
-#        foreach ( 256, 512, 1024 ) { $flag-=$_ if $flag&$_ }
-        my ( $bc ) = $id =~ m/\:([GATCN\d]+)$/; #extract UMI barcode
-        my $uniq = join( ':', $chr, $pos, $flag, $tlen, $bc );
-        my $pos_ = $pos-1;
-        while ( $cigar =~ m/(\d+)([SHMDIN=])/g ) {
-            $pos_+=$1 if $2 eq 'M' or $2 eq '=' or $2 eq 'D' or $2 eq 'N'; # find position for minus strand
-        }
-        my $uniq2 = join( ':', $chr, $pos_, $flag, $tlen, $bc );
-        if ( exists($duplicates{$id}) or # already marked as duplicate
-            ( not($flag&16) and ++$seen{ $uniq } > 1 ) or # plus strand
-            ( $flag&16 and ++$seen{ $uniq2 } > 1 ) #minus strand
-            ) {
-            print F2;
-            $dups++;
-            $duplicates{$id} = 1;
-        }
-        else {
-            print F1;
-            $uniqs++;
-        }
-    }
-    close F;
-    close F1;
-    close F2;
-    warn "Unique: $uniqs (", 100*$uniqs/($uniqs+$dups), ")\n";
-    system( 'samtools', 'index', $sample.'_uniq.bam' );
-#    last;
-    $pm->finish;
-}
-$pm->wait_all_children;
--- a/rnaseq/umi/snippet.pl
+++ b/rnaseq/umi/snippet.pl
Author	SHA1	Message	Date
Rene Bults	e3c88c059f	added description of general files	2021-02-22 15:20:41 +01:00
Rene Bults	1110d00e8c	added fastQC code snippet (.pl and .sh scripts)	2021-02-15 15:54:21 +01:00