#!/bin/bash
## Job Name
#SBATCH --job-name=gac_2021_hisat2
## Allocation Definition
#SBATCH --account=srlab
#SBATCH --partition=srlab
## Nodes
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=2-00:00:00
## Memory per node
#SBATCH --mem=120GB
##turn on e-mail notification
#SBATCH --mail-type=ALL
#SBATCH --mail-user=graceac9@uw.edu
## Specify the working directory for this job
#SBATCH --chdir=/gscratch/srlab/graceac9/analyses/20231025-hisat2

#Code modified from A. Huffmeyer: https://github.com/AHuffmyer/EarlyLifeHistory_Energetics/blob/master/Mcap2020/Scripts/TagSeq/TagSeq_BioInf.md

#Exit script if any command fails
set -e

#Load modules needed
module load /gscratch/srlab/programs/hisat2-2.1.0
module load /gscratch/srlab/programs/samtools-1.9

#Set variable paths
data_dir="/gscratch/scrubbed/graceac9/ncbi_dataset/data/GCA_032158295.1"
hisat2_dir="/gscratch/srlab/programs/hisat2-2.2.0"
reads_dir="/gscratch/srlab/graceac9/analyses/pycno/20220810_PSC2021_trimming"

#Index the reference genome for P. helianthoides
${hisat2_dir}/hisat2-build -f ${data_dir}/GCA_032158295.1_ASM3215829v1_genomic.fna ${data_dir}/Phelianthoides_ref # called the reference genome (scaffolds)
echo "Reference genome indexed. Starting alignment" $(date)

#Make an array of trimmed sequences
array=($(ls ${reads_dir}/*fq.gz))

#For each sample in the array
#Isolate the sample name
#Specify the file for alignment (-U)
#Specify number of threads (-p)
#Align to the indexed genome (-x)
#Report alignments tailed for transcript assembles (--dta)
#File for output SAM alignments (-S)
#Sort SAM and convert to BAM the bam file because Stringtie takes a sorted file for input
#Delete SAM file
for i in ${array[@]}
  do
    sample_name=`echo $i | awk -F [.] '{print $2}' | awk -F [/] '{print $6}' | sed 's/_trimmed_trimmed_trimmed//'`
    ${hisat2_dir}/hisat2 -p 8 --dta -x ${data_dir}/Phelianthoides_ref -U ${i} -S ${sample_name}.sam
        samtools sort -@ 8 -o ${sample_name}.bam ${sample_name}.sam
    		echo "${i} bam-ified!"
        rm ${sample_name}.sam
done