#!/bin/bash #SBATCH --job-name=pca-wgsassign #SBATCH --output=/home/lspencer/pcod-sex/pca-wgsassign-out.txt #SBATCH --mail-user=laura.spencer@noaa.gov #SBATCH --mail-type=ALL #SBATCH -t 2-0:0:0 # load pcangsd programs module unload bio/pcangsd/0.99 module load bio/pcangsd/0.99 source /opt/bioinformatics/venv/pcangsd-0.99/bin/activate # load wgsassign programs source ~/.bashrc mamba activate WGSassign base=/home/lspencer/pcod-sex beagle=${base}/pcod-sex.beagle.gz sites=${base}/sex-markers-logp4.txt ids=${base}/fish-ids-sex.txt sex_marks_beagle=${base}/pcod-sex-markers.beagle outname=${base}/sex-assign #sex_marks_afs=${assign}/snp-testing/top-1250.pop_af.npy # Filter beagle for sex markers (identified from GWAS) # combine header row with filtered sites zcat ${beagle} | head -n 1 > ${sex_marks_beagle} awk 'NR==FNR{c[$1]++;next};c[$1]' ${sites} <(zcat ${beagle}) >> ${sex_marks_beagle} gzip ${sex_marks_beagle} # IF BEAGLE FILE HAS NAMES OF SAMPLES IN HEADER ROW # Generate file listing samples in order they appear in beagle file using sample IDs in header row zcat ${sex_marks_beagle}.gz | cut --complement -f1-3 | head -n 1 | tr '\t' '\n' | \ sed -e 's/_AA//g' -e 's/_AB//g' -e 's/_BB//g' | uniq > ${base}/sex-sample-order.txt # Run PCAnagsd to generate covariance matrix of sex-assigned fish using only GWAS-identified sex markers pcangsd.py -threads 10 \ -beagle /home/lspencer/pcod-sex/pcod-sex-markers.beagle.gz \ -o /home/lspencer/pcod-sex/pcod-sex -pcadapt # Get allele frequencies by sex WGSassign --beagle ${sex_marks_beagle}.gz --pop_af_IDs ${ids} --get_reference_af --out ${base}/sex-marks --threads 10 ## Run assignment to identify source population of experimental fish #WGSassign --beagle ${sex_marks_beagle} --pop_af_file ${TBD} --get_pop_like --out ${outname} --threads 20