Cluster bwa mapping
Example of a mapping job script for a computing cluster.
The following shell script submits a mapping job. The input files are: zipped forward and reverse reads, and sequence(s) to map to. The output is a bam and its index of only the reads that map. Can be used to visualise how different individuals map to the same reference.
Submit through
bsub < script.sh
The whole script is below, paste it into a new text file script.sh
#/bin/bash
#BSUB -L /bin/bash
#BSUB -o run_interesting_sequence_individual_output.txt
#BSUB -e run_interesting_sequence_individual-error.txt
#BSUB -J run_interesting_sequence_individual
#BSUB -u [email protected]
#BSUB -N
#BSUB -n 4
#BSUB -R "span[ptile=4]"
#BSUB -R "rusage[mem=10000]"
#BSUB -M 10194304
module add UHTS/Aligner/bwa/0.7.13;
module add UHTS/Analysis/samtools/1.3;
# Variables to change
## Change once per project
INPUTDIR=/scratch/beegfs/monthly/user/Project/input
OUTPUTDIR=/scratch/beegfs/monthly/user/Project/output
## Never change, the files will be copied in a subfolder named after the reads and reference, inside a project folder inside the scratch folder
SCRATCHDIR=/scratch/beegfs/monthly/user/scratch
## Change for each combination of mapped reads to a reference. The reads can be `.fa` or `.fq`. Find and replace in this file `individual` and `interesting_sequence`.
READS1=individual_1.fq
READS2=individual_2.fq
REFERENCE=interesting_sequence.fasta
NAME=interesting_sequence_individual
cd $SCRATCHDIR || exit 1
mkdir $NAME || exit 2
cd $NAME
cp $INPUTDIR/$READS1.gz .
cp $INPUTDIR/$READS2.gz .
cp $INPUTDIR/$REFERENCE .
# unzip, delete ziped files
gzip -d *.gz
# index reference genome
bwa index -a bwtsw $REFERENCE || exit 3
touch progress_index_done
# bwa alignment
bwa aln $REFERENCE $READS1 > aln_sa1.sai
bwa aln $REFERENCE $READS2 > aln_sa2.sai
bwa sampe $REFERENCE aln_sa1.sai aln_sa2.sai $READS1 $READS2 > $NAME.sam || exit 5
rm $READS1
rm $READS2
touch progress_bwa_aln_done
# conversion between sam to bam
samtools view -Sb $NAME.sam > $NAME.bam || exit 6
rm $NAME.sam
touch progress_bam_done
# bam_sorting
samtools sort $NAME.bam -o $NAME.srt.bam || exit 7
rm $NAME.bam
touch progress_sorting_done
# remove_duplicates, count all reads, keep and index mapped reads only.
samtools rmdup -s $NAME.srt.bam $NAME.srt_rmdup.bam || exit 8
rm $NAME.srt.bam
samtools flagstat $NAME.srt_rmdup.bam > $NAME.flagstat.txt
samtools view -b -F 4 $NAME.srt_rmdup.bam > $NAME.mapped.bam
samtools index $NAME.mapped.bam
cp $NAME.mapped.bam $OUTPUTDIR
cp $NAME.mapped.bam.bai $OUTPUTDIR
cp $NAME.flagstat.txt $OUTPUTDIR
cd ..
rm -rf $SCRATCHDIR/$NAME || exit 10