star_driver_generic.sh

#!/bin/bash
#$ -V
#$ -cwd
#$ -S /bin/bash
#$ -N myoAus_map
#$ -o $JOB_NAME.o$JOB_ID
#$ -e $JOB_NAME.e$JOB_ID
#$ -q Yoda
#$ -pe fill 6 
#$ -P communitycluster

#I had trouble running this on any queues other than Yoda. The problem seems to be associated with memory. I also had trouble if I tried to use more than 6 processors. It still runs in a few hours with a reasonable amount of data.

BASEDIR=/lustre/scratch/daray/Ray_low_cov_work/star-mAus 
WORKDIR=$BASEDIR/output

cd $WORKDIR

THREADS=5

RAW_READS_HOME=$BASEDIR/reads   #the location of your raw data. Not needed but legacied from other scripts
PROCESSED_READS_HOME=$BASEDIR/processed_reads #The paired and unpaired read files generated by trimmomatic from the raw data
DRAFTS_HOME=$BASEDIR/drafts #the location of the genome draft(s).  Should at least house the genome draft you are mapping to.
GENOME_HOME=$BASEDIR/genome	#the location of your genome indexes to be generated by STAR

######
#set up alias' for major programs
######
BWA_HOME=/lustre/work/apps/bwa-0.7.12
SAMTOOLS_HOME=/lustre/work/apps/samtools-1.2
SAMTOOLS1_8_HOME=/lustre/work/apps/samtools-0.1.18
PICARD_HOME=/lustre/work/apps/picard-tools-1.91
BCFTOOLS_HOME=/lustre/work/apps/samtools-0.1.18/bcftools
RAY_SOFTWARE=/lustre/work/daray/software
TRIM_HOME=/lustre/work/apps/Trimmomatic-0.27
FASTX_HOME=/lustre/work/apps/fastx_toolkit-0.0.14/bin
VCFTOOLS_HOME=/lustre/work/daray/software/vcftools_0.1.12b/bin
BEDTOOLS_HOME=/lustre/work/apps/bedtools-2.17.0/bin
TOPHAT_HOME=/lustre/work/apps/tophat-2.1.0.Linux_x86_64
CUFFLINKS_HOME=/lustre/work/apps/cufflinks-2.2.1/bin
BOWTIE2_HOME=/lustre/work/apps/bowtie2-2.0.5
STAR_HOME=/lustre/work/apps/STAR-2.4/bin


################################################################################
# 1 Generate index for genome with STAR
#~~~~~~~~~~~
GENOME=M8132 #Identifier for your genome draft
DRAFT=myoAus	# taxon dexignation for your genome draft
RNASEQ_TAXON=myoVel	# taxon origin for your RNAseq data.  May or may not be the same as your draft taxon

$STAR_HOME/STAR \
	--runThreadN $THREADS	\
	--runMode genomeGenerate	\
	--genomeDir $GENOME_HOME	\
	--genomeFastaFiles $DRAFTS_HOME/$GENOME"_mem.fa"

echo $DRAFT"_index_finished" |  mailx -s $DRAFT"_index_finished" david.4.ray@gmail.com	

################################################################################
# 2 Map RNA-Seq reads to genome with STAR
#~~~~~~~~~~~

SAMPLE1=11750X4	#Use these to designate your paired reads for --readFilesIn
SAMPLE2=11750X11
SAMPLE3=11750X12
SAMPLE4=11750X14

$STAR_HOME/STAR \
        --runThreadN $THREADS   \
        --genomeDir $GENOME_HOME        \
        --readFilesIn \
		$PROCESSED_READS_HOME/$SAMPLE1"_R1_paired.fastq.gz",$PROCESSED_READS_HOME/$SAMPLE2"_R1_paired.fastq.gz",$PROCESSED_READS_HOME/$SAMPLE3"_R1_paired.fastq.gz",$PROCESSED_READS_HOME/$SAMPLE4"_R1_paired.fastq.gz" $PROCESSED_READS_HOME/$SAMPLE1"_R2_paired.fastq.gz",$PROCESSED_READS_HOME/$SAMPLE2"_R2_paired.fastq.gz",$PROCESSED_READS_HOME/$SAMPLE3"_R2_paired.fastq.gz",$PROCESSED_READS_HOME/$SAMPLE4"_R2_paired.fastq.gz"	\
        --readFilesCommand zcat \
        --outFileNamePrefix $DRAFT"_v_"$RNASEQ_TAXON	\
	--outSAMtype BAM SortedByCoordinate	\
	--outBAMsortingThreadN $THREADS	

echo $DRAFT"_v_"$RNASEQ_TAXON"_mapping_finished" |  mailx -s $DRAFT"_v_"$RNASEQ_TAXON"_mapping_finished" david.4.ray@gmail.com	


sleep 5