{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mon Apr 18 07:41:44 PDT 2016\r\n" ] } ], "source": [ "!date" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Software:\n", "\n", " System Software Overview:\n", "\n", " System Version: OS X 10.9.5 (13F34)\n", " Kernel Version: Darwin 13.4.0\n", " Boot Volume: Hummingbird\n", " Boot Mode: Normal\n", " Computer Name: hummingbird\n", " User Name: Sam (Sam)\n", " Secure Virtual Memory: Enabled\n", " Time since boot: 5 days 23:15\n", "\n" ] } ], "source": [ "%%bash\n", "system_profiler SPSoftwareDataType" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", " Model Name: Xserve\n", " Model Identifier: Xserve3,1\n", " Processor Name: Quad-Core Intel Xeon\n", " Processor Speed: 2.26 GHz\n", " Number of Processors: 2\n", " Total Number of Cores: 8\n", " L2 Cache (per Core): 256 KB\n", " L3 Cache (per Processor): 8 MB\n", " Memory: 24 GB\n", " Processor Interconnect Speed: 5.86 GT/s\n", " Boot ROM Version: XS31.0081.B06\n", " SMC Version (system): 1.43f4\n", " LOM Revision: 1.1.8\n", "\n" ] } ], "source": [ "%%bash\n", "#Excludes serial number and hardware UUID\n", "system_profiler SPHardwareDataType | grep -v [SH][ea]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Install pyrad and dependencies on Hummingbird" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Note: Due to permissions issues (i.e. requiring sudo and a password), most of the installation took place outside of the notebook. I've entered most of the commands here anyway, for demonstration purposes." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/usr/local/bioinformatics\n" ] } ], "source": [ "cd /usr/local/bioinformatics/" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "mkdir: pyrad: Permission denied\n" ] } ], "source": [ "%%bash\n", "mkdir pyrad" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/usr/local/bioinformatics/pyrad\n" ] } ], "source": [ "cd pyrad" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "--2016-04-18 07:50:10-- https://github.com/dereneaton/pyrad/archive/3.0.66.tar.gz\n", "Resolving github.com... 192.30.252.128\n", "Connecting to github.com|192.30.252.128|:443... connected.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: https://codeload.github.com/dereneaton/pyrad/tar.gz/3.0.66 [following]\n", "--2016-04-18 07:50:11-- https://codeload.github.com/dereneaton/pyrad/tar.gz/3.0.66\n", "Resolving codeload.github.com... 192.30.252.163\n", "Connecting to codeload.github.com|192.30.252.163|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 76100 (74K) [application/x-gzip]\n", "3.0.66.tar.gz: Permission denied\n", "\n", "Cannot write to '3.0.66.tar.gz' (Success).\n" ] } ], "source": [ "%%bash\n", "wget https://github.com/dereneaton/pyrad/archive/3.0.66.tar.gz" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.0.66.tar.gz\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "x pyrad-3.0.66/: Can't create 'pyrad-3.0.66'\n", "x pyrad-3.0.66/.gitignore: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/.gitignore'\n", "x pyrad-3.0.66/LICENSE: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/LICENSE'\n", "x pyrad-3.0.66/README.rst: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/README.rst'\n", "x pyrad-3.0.66/pyrad/: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad'\n", "x pyrad-3.0.66/pyrad/Dtest.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/Dtest.py'\n", "x pyrad-3.0.66/pyrad/Dtest_5.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/Dtest_5.py'\n", "x pyrad-3.0.66/pyrad/Dtest_foil.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/Dtest_foil.py'\n", "x pyrad-3.0.66/pyrad/H_err_dp.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/H_err_dp.py'\n", "x pyrad-3.0.66/pyrad/__init__.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/__init__.py'\n", "x pyrad-3.0.66/pyrad/alignable.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/alignable.py'\n", "x pyrad-3.0.66/pyrad/cluster7dp.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/cluster7dp.py'\n", "x pyrad-3.0.66/pyrad/cluster_cons7_shuf.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/cluster_cons7_shuf.py'\n", "x pyrad-3.0.66/pyrad/consens_pairs.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/consens_pairs.py'\n", "x pyrad-3.0.66/pyrad/consensdp.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/consensdp.py'\n", "x pyrad-3.0.66/pyrad/createfile.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/createfile.py'\n", "x pyrad-3.0.66/pyrad/editraw_merges.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/editraw_merges.py'\n", "x pyrad-3.0.66/pyrad/editraw_pairs.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/editraw_pairs.py'\n", "x pyrad-3.0.66/pyrad/editraw_rads.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/editraw_rads.py'\n", "x pyrad-3.0.66/pyrad/loci2SNP.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2SNP.py'\n", "x pyrad-3.0.66/pyrad/loci2gphocs.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2gphocs.py'\n", "x pyrad-3.0.66/pyrad/loci2mig.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2mig.py'\n", "x pyrad-3.0.66/pyrad/loci2phynex.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2phynex.py'\n", "x pyrad-3.0.66/pyrad/loci2treemix.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2treemix.py'\n", "x pyrad-3.0.66/pyrad/loci2vcf.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2vcf.py'\n", "x pyrad-3.0.66/pyrad/overlapcheck.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/overlapcheck.py'\n", "x pyrad-3.0.66/pyrad/potpour.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/potpour.py'\n", "x pyrad-3.0.66/pyrad/pyRAD.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/pyRAD.py'\n", "x pyrad-3.0.66/pyrad/sortandcheck2.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/sortandcheck2.py'\n", "x pyrad-3.0.66/pyrad/tier2clust.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/tier2clust.py'\n", "x pyrad-3.0.66/setup.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/setup.py'\n", "x pyrad-3.0.66/tox.ini: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/tox.ini'\n", "tar: Error exit delayed from previous errors.\n" ] } ], "source": [ "%%bash\n", "tar -zxvf 3.0.66.tar.gz" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/usr/local/bioinformatics/pyrad-3.0.66\n" ] } ], "source": [ "cd /usr/local/bioinformatics/pyrad-3.0.66/" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LICENSE README.rst \u001b[34mpyrad\u001b[m\u001b[m/ setup.py tox.ini\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "ename": "SyntaxError", "evalue": "invalid syntax (, line 1)", "output_type": "error", "traceback": [ "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m setup.py install\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" ] } ], "source": [ "setup.py install" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LICENSE README.rst \u001b[34mbuild\u001b[m\u001b[m/ \u001b[34mdist\u001b[m\u001b[m/ \u001b[34mpyrad\u001b[m\u001b[m/ \u001b[34mpyrad.egg-info\u001b[m\u001b[m/ setup.py tox.ini\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/usr/local/bioinformatics\n" ] } ], "source": [ "cd .." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "--2016-04-18 08:04:19-- http://www.drive5.com/muscle/downloads3.8.31/muscle3.8.31_i86darwin64.tar.gz\n", "Resolving www.drive5.com... 205.196.220.130\n", "Connecting to www.drive5.com|205.196.220.130|:80... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 166130 (162K) [application/x-tar]\n", "muscle3.8.31_i86darwin64.tar.gz: Permission denied\n", "\n", "Cannot write to 'muscle3.8.31_i86darwin64.tar.gz' (Success).\n" ] } ], "source": [ "%%bash\n", "wget http://www.drive5.com/muscle/downloads3.8.31/muscle3.8.31_i86darwin64.tar.gz" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "x muscle3.8.31_i86darwin64: Can't create 'muscle3.8.31_i86darwin64'\n", "tar: Error exit delayed from previous errors.\n" ] } ], "source": [ "%%bash\n", "tar -zxvf muscle3.8.31_i86darwin64.tar.gz" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[34mFastQC\u001b[m\u001b[m/ \u001b[35mbowtie\u001b[m\u001b[m@ \u001b[35mhmmer\u001b[m\u001b[m@ \u001b[34mngsplot-2.08\u001b[m\u001b[m/ \u001b[34mstacks-1.13\u001b[m\u001b[m/\r\n", "\u001b[35mTrimmomatic\u001b[m\u001b[m@ \u001b[34mbowtie-1.0.0\u001b[m\u001b[m/ \u001b[34mhmmer-3.1b1\u001b[m\u001b[m/ \u001b[34mpyrad-3.0.66\u001b[m\u001b[m/ \u001b[34mstacks-1.37\u001b[m\u001b[m/\r\n", "\u001b[34mTrimmomatic-0.30\u001b[m\u001b[m/ \u001b[35mbowtie2\u001b[m\u001b[m@ \u001b[34mlibgtextutils-0.6.1\u001b[m\u001b[m/ \u001b[35mrsem\u001b[m\u001b[m@ stacks-1.37.tar.gz\r\n", "\u001b[35mTrinotate\u001b[m\u001b[m@ \u001b[34mbowtie2-2.1.0\u001b[m\u001b[m/ \u001b[31mmuscle3.8.31_i86darwin64\u001b[m\u001b[m* \u001b[34mrsem-1.2.10\u001b[m\u001b[m/ \u001b[34mtophat-2.0.13.OSX_x86_64\u001b[m\u001b[m/\r\n", "\u001b[34mTrinotate_r20140708\u001b[m\u001b[m/ \u001b[34mcufflinks-2.2.1.OSX_x86_64\u001b[m\u001b[m/ muscle3.8.31_i86darwin64.tar.gz \u001b[34msamtools-0.1.19\u001b[m\u001b[m/ \u001b[35mtrinity\u001b[m\u001b[m@\r\n", "\u001b[34manaconda\u001b[m\u001b[m/ \u001b[34mdbs\u001b[m\u001b[m/ \u001b[35mncbi-blast\u001b[m\u001b[m@ \u001b[34msignalp-4.1\u001b[m\u001b[m/ \u001b[34mtrinityrnaseq_r20131110\u001b[m\u001b[m/\r\n", "\u001b[34mbedtools-2.22.1\u001b[m\u001b[m/ \u001b[35mfastx_toolkit\u001b[m\u001b[m@ \u001b[34mncbi-blast-2.2.29+\u001b[m\u001b[m/ source.sh\r\n", "\u001b[34mbismark_v0.14.2\u001b[m\u001b[m/ \u001b[34mfastx_toolkit-0.0.13.2\u001b[m\u001b[m/ \u001b[35mngsplot\u001b[m\u001b[m@ \u001b[35mstacks\u001b[m\u001b[m@\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[Errno 20] Not a directory: 'muscle3.8.31_i86darwin64'\n", "/usr/local/bioinformatics\n" ] } ], "source": [ "cd muscle3.8.31_i86darwin64" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# Variables to use the Bioinformatics directory\r\n", "\r\n", "export BIO=/usr/local/bioinformatics\r\n", "\r\n", "# Anaconda\r\n", "export PATH=${BIO}/anaconda/bin:${PATH}\r\n", "\r\n", "# PYTHONPATH for using R with iPython\r\n", "export PYTHONPATH=/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages\r\n", "\r\n", "# BLAST\r\n", "export PATH=${BIO}/ncbi-blast/bin:${PATH}\r\n", "export BLASTDB=/Volumes/Data/blast_db\r\n", "\r\n", "# Trinity\r\n", "export PATH=${BIO}/trinity:${PATH}\r\n", "\r\n", "# HMMER\r\n", "export PATH=${BIO}/hmmer:${PATH}\r\n", "\r\n", "# Bowtie1\r\n", "export PATH=${BIO}/bowtie:${PATH}\r\n", "\r\n", "# Bowtie2\r\n", "export PATH=${BIO}/bowtie2:${PATH}\r\n", "\r\n", "# RSEM\r\n", "export PATH=${BIO}/rsem:${PATH}\r\n", "\r\n", "# Khmer tools\r\n", "export PATH=/opt/local/Library/Frameworks/Python.framework/Versions/2.7/bin:${PATH}\r\n", "\r\n", "# FASTX toolkit\r\n", "export PATH=${BIO}/fastx_toolkit/bin:${PATH}\r\n", "\r\n", "# Samtools (via RSEM)\r\n", "export PATH=${BIO}/rsem/sam:${PATH}\r\n", "\r\n", "# stacks\r\n", "export PATH=${BIO}/stacks/bin:${PATH}\r\n", "\r\n", "# Trimmomatic\r\n", "alias trimmomatic=\"java -jar ${BIO}/Trimmomatic/trimmomatic-0.30.jar\"\r\n", "\r\n", "# ngsplot\r\n", "export PATH=${BIO}/ngsplot/bin:${PATH}\r\n", "export NGSPLOT=${BIO}/ngsplot\r\n", "\r\n", "# Trinotate\r\n", "export PATH=${BIO}/Trinotate:${PATH}\r\n", "\r\n", "# signalp\r\n", "export PATH=${BIO}/signalp-4.1:${PATH}\r\n", "\r\n", "# Tophat\r\n", "export PATH=${BIO}/tophat-2.0.13.OSX_x86_64:${PATH}\r\n", "\r\n", "# FastQC\r\n", "export PATH=${BIO}/FastQC:${PATH}\r\n", "\r\n", "# Cufflinks\r\n", "export PATH=${BIO}/cufflinks-2.2.1.OSX_x86_64:${PATH}\r\n", "\r\n", "# bedtools\r\n", "export PATH=${BIO}/bedtools-2.22.1/bin:${PATH}\r\n", "\r\n", "# bismark\r\n", "export PATH=${BIO}/bismark_v0.14.2:${PATH}\r\n", "\r\n", "# muscle\r\n", "export PATH=${BIO}/muscle3.8.31_i86darwin64:${PATH}\r\n" ] } ], "source": [ "cat source.sh" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "--2016-04-18 08:15:13-- https://github.com/torognes/vsearch/releases/download/v1.11.1/vsearch-1.11.1-osx-x86_64.tar.gz\n", "Resolving github.com... 192.30.252.131\n", "Connecting to github.com|192.30.252.131|:443... connected.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: https://github-cloud.s3.amazonaws.com/releases/19848353/5a974bd0-018b-11e6-810c-daa981b4e335.gz?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAISTNZFOVBIJMK3TQ%2F20160418%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20160418T151513Z&X-Amz-Expires=300&X-Amz-Signature=e3f2e38959bdb1a13e4a665eae70c0e1326e67e05c899107eb91e0aff543e5e6&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dvsearch-1.11.1-osx-x86_64.tar.gz&response-content-type=application%2Foctet-stream [following]\n", "--2016-04-18 08:15:13-- https://github-cloud.s3.amazonaws.com/releases/19848353/5a974bd0-018b-11e6-810c-daa981b4e335.gz?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAISTNZFOVBIJMK3TQ%2F20160418%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20160418T151513Z&X-Amz-Expires=300&X-Amz-Signature=e3f2e38959bdb1a13e4a665eae70c0e1326e67e05c899107eb91e0aff543e5e6&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dvsearch-1.11.1-osx-x86_64.tar.gz&response-content-type=application%2Foctet-stream\n", "Resolving github-cloud.s3.amazonaws.com... 54.231.49.88\n", "Connecting to github-cloud.s3.amazonaws.com|54.231.49.88|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 274585 (268K) [application/octet-stream]\n", "vsearch-1.11.1-osx-x86_64.tar.gz: Permission denied\n", "\n", "Cannot write to 'vsearch-1.11.1-osx-x86_64.tar.gz' (Success).\n" ] } ], "source": [ "%%bash\n", "wget https://github.com/torognes/vsearch/releases/download/v1.11.1/vsearch-1.11.1-osx-x86_64.tar.gz" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "./._vsearch-1.11.1-osx-x86_64.rzY4Q7: Can't create '._vsearch-1.11.1-osx-x86_64.rzY4Q7'\n", "vsearch-1.11.1-osx-x86_64/: Can't update time for vsearch-1.11.1-osx-x86_64\n", "vsearch-1.11.1-osx-x86_64/._bin.D23mUv: Can't create 'vsearch-1.11.1-osx-x86_64/._bin.D23mUv'\n", "vsearch-1.11.1-osx-x86_64/bin/: Can't create 'vsearch-1.11.1-osx-x86_64/bin'\n", "vsearch-1.11.1-osx-x86_64/._doc.HzJsUU: Can't create 'vsearch-1.11.1-osx-x86_64/._doc.HzJsUU'\n", "vsearch-1.11.1-osx-x86_64/doc/: Can't create 'vsearch-1.11.1-osx-x86_64/doc'\n", "vsearch-1.11.1-osx-x86_64/._LICENSE.txt.eqYDGZ: Can't create 'vsearch-1.11.1-osx-x86_64/._LICENSE.txt.eqYDGZ'\n", "vsearch-1.11.1-osx-x86_64/LICENSE.txt: Can't create 'vsearch-1.11.1-osx-x86_64/LICENSE.txt'\n", "vsearch-1.11.1-osx-x86_64/._LICENSE_GNU_GPL3.txt.M9efN0: Can't create 'vsearch-1.11.1-osx-x86_64/._LICENSE_GNU_GPL3.txt.M9efN0'\n", "vsearch-1.11.1-osx-x86_64/LICENSE_GNU_GPL3.txt: Can't create 'vsearch-1.11.1-osx-x86_64/LICENSE_GNU_GPL3.txt'\n", "vsearch-1.11.1-osx-x86_64/._man.ilEZyS: Can't create 'vsearch-1.11.1-osx-x86_64/._man.ilEZyS'\n", "vsearch-1.11.1-osx-x86_64/man/: Can't create 'vsearch-1.11.1-osx-x86_64/man'\n", "vsearch-1.11.1-osx-x86_64/._README.md.QPpbcs: Can't create 'vsearch-1.11.1-osx-x86_64/._README.md.QPpbcs'\n", "vsearch-1.11.1-osx-x86_64/README.md: Can't create 'vsearch-1.11.1-osx-x86_64/README.md'\n", "vsearch-1.11.1-osx-x86_64/man/._vsearch.1.fEowYE: Can't create 'vsearch-1.11.1-osx-x86_64/man/._vsearch.1.fEowYE'\n", "vsearch-1.11.1-osx-x86_64/man/vsearch.1: Can't create 'vsearch-1.11.1-osx-x86_64/man/vsearch.1'\n", "vsearch-1.11.1-osx-x86_64/doc/._vsearch_manual.pdf.hQbbyY: Can't create 'vsearch-1.11.1-osx-x86_64/doc/._vsearch_manual.pdf.hQbbyY'\n", "vsearch-1.11.1-osx-x86_64/doc/vsearch_manual.pdf: Can't create 'vsearch-1.11.1-osx-x86_64/doc/vsearch_manual.pdf'\n", "vsearch-1.11.1-osx-x86_64/bin/._vsearch.q3PzBD: Can't create 'vsearch-1.11.1-osx-x86_64/bin/._vsearch.q3PzBD'\n", "vsearch-1.11.1-osx-x86_64/bin/vsearch: Can't create 'vsearch-1.11.1-osx-x86_64/bin/vsearch'\n", "tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/bin/vsearch) failed: Permission denied\n", "tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/doc/vsearch_manual.pdf) failed: Permission denied\n", "tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/man/vsearch.1) failed: Permission denied\n", "tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/README.md) failed: Permission denied\n", "tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/man) failed: Permission denied\n", "tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/LICENSE_GNU_GPL3.txt) failed: Permission denied\n", "tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/LICENSE.txt) failed: Permission denied\n", "tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/doc) failed: Permission denied\n", "tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/bin) failed: Permission denied\n", "tar: copyfile unpack (./vsearch-1.11.1-osx-x86_64) failed: No such file or directory\n", "tar: Error exit delayed from previous errors.\n" ] } ], "source": [ "%%bash\n", "tar -xzf vsearch-1.11.1-osx-x86_64.tar.gz " ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[34mFastQC\u001b[m\u001b[m/ \u001b[35mbowtie2\u001b[m\u001b[m@ \u001b[35mncbi-blast\u001b[m\u001b[m@ \u001b[35mstacks\u001b[m\u001b[m@\r\n", "\u001b[35mTrimmomatic\u001b[m\u001b[m@ \u001b[34mbowtie2-2.1.0\u001b[m\u001b[m/ \u001b[34mncbi-blast-2.2.29+\u001b[m\u001b[m/ \u001b[34mstacks-1.13\u001b[m\u001b[m/\r\n", "\u001b[34mTrimmomatic-0.30\u001b[m\u001b[m/ \u001b[34mcufflinks-2.2.1.OSX_x86_64\u001b[m\u001b[m/ \u001b[35mngsplot\u001b[m\u001b[m@ \u001b[34mstacks-1.37\u001b[m\u001b[m/\r\n", "\u001b[35mTrinotate\u001b[m\u001b[m@ \u001b[34mdbs\u001b[m\u001b[m/ \u001b[34mngsplot-2.08\u001b[m\u001b[m/ stacks-1.37.tar.gz\r\n", "\u001b[34mTrinotate_r20140708\u001b[m\u001b[m/ \u001b[35mfastx_toolkit\u001b[m\u001b[m@ \u001b[34mpyrad-3.0.66\u001b[m\u001b[m/ \u001b[34mtophat-2.0.13.OSX_x86_64\u001b[m\u001b[m/\r\n", "\u001b[34manaconda\u001b[m\u001b[m/ \u001b[34mfastx_toolkit-0.0.13.2\u001b[m\u001b[m/ \u001b[35mrsem\u001b[m\u001b[m@ \u001b[35mtrinity\u001b[m\u001b[m@\r\n", "\u001b[34mbedtools-2.22.1\u001b[m\u001b[m/ \u001b[35mhmmer\u001b[m\u001b[m@ \u001b[34mrsem-1.2.10\u001b[m\u001b[m/ \u001b[34mtrinityrnaseq_r20131110\u001b[m\u001b[m/\r\n", "\u001b[34mbismark_v0.14.2\u001b[m\u001b[m/ \u001b[34mhmmer-3.1b1\u001b[m\u001b[m/ \u001b[34msamtools-0.1.19\u001b[m\u001b[m/ \u001b[34mvsearch-1.11.1-osx-x86_64\u001b[m\u001b[m/\r\n", "\u001b[35mbowtie\u001b[m\u001b[m@ \u001b[34mlibgtextutils-0.6.1\u001b[m\u001b[m/ \u001b[34msignalp-4.1\u001b[m\u001b[m/ vsearch-1.11.1-osx-x86_64.tar.gz\r\n", "\u001b[34mbowtie-1.0.0\u001b[m\u001b[m/ \u001b[31mmuscle3.8.31_i86darwin64\u001b[m\u001b[m* source.sh\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# Variables to use the Bioinformatics directory\r\n", "\r\n", "export BIO=/usr/local/bioinformatics\r\n", "\r\n", "# Anaconda\r\n", "export PATH=${BIO}/anaconda/bin:${PATH}\r\n", "\r\n", "# PYTHONPATH for using R with iPython\r\n", "export PYTHONPATH=/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages\r\n", "\r\n", "# BLAST\r\n", "export PATH=${BIO}/ncbi-blast/bin:${PATH}\r\n", "export BLASTDB=/Volumes/Data/blast_db\r\n", "\r\n", "# Trinity\r\n", "export PATH=${BIO}/trinity:${PATH}\r\n", "\r\n", "# HMMER\r\n", "export PATH=${BIO}/hmmer:${PATH}\r\n", "\r\n", "# Bowtie1\r\n", "export PATH=${BIO}/bowtie:${PATH}\r\n", "\r\n", "# Bowtie2\r\n", "export PATH=${BIO}/bowtie2:${PATH}\r\n", "\r\n", "# RSEM\r\n", "export PATH=${BIO}/rsem:${PATH}\r\n", "\r\n", "# Khmer tools\r\n", "export PATH=/opt/local/Library/Frameworks/Python.framework/Versions/2.7/bin:${PATH}\r\n", "\r\n", "# FASTX toolkit\r\n", "export PATH=${BIO}/fastx_toolkit/bin:${PATH}\r\n", "\r\n", "# Samtools (via RSEM)\r\n", "export PATH=${BIO}/rsem/sam:${PATH}\r\n", "\r\n", "# stacks\r\n", "export PATH=${BIO}/stacks/bin:${PATH}\r\n", "\r\n", "# Trimmomatic\r\n", "alias trimmomatic=\"java -jar ${BIO}/Trimmomatic/trimmomatic-0.30.jar\"\r\n", "\r\n", "# ngsplot\r\n", "export PATH=${BIO}/ngsplot/bin:${PATH}\r\n", "export NGSPLOT=${BIO}/ngsplot\r\n", "\r\n", "# Trinotate\r\n", "export PATH=${BIO}/Trinotate:${PATH}\r\n", "\r\n", "# signalp\r\n", "export PATH=${BIO}/signalp-4.1:${PATH}\r\n", "\r\n", "# Tophat\r\n", "export PATH=${BIO}/tophat-2.0.13.OSX_x86_64:${PATH}\r\n", "\r\n", "# FastQC\r\n", "export PATH=${BIO}/FastQC:${PATH}\r\n", "\r\n", "# Cufflinks\r\n", "export PATH=${BIO}/cufflinks-2.2.1.OSX_x86_64:${PATH}\r\n", "\r\n", "# bedtools\r\n", "export PATH=${BIO}/bedtools-2.22.1/bin:${PATH}\r\n", "\r\n", "# bismark\r\n", "export PATH=${BIO}/bismark_v0.14.2:${PATH}\r\n", "\r\n", "# muscle\r\n", "export PATH=${BIO}/muscle3.8.31_i86darwin64:${PATH}\r\n", "\r\n", "# vsearch\r\n", "export PATH=${BIO}/vsearch-1.11.1-osx-x86_64/bin:${PATH}\r\n" ] } ], "source": [ "cat source.sh" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# pyRAD analysis of Olympia oyster PE-GBS data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## The remainder of the notebook is taken from the [pyRAD example](http://nbviewer.jupyter.org/gist/dereneaton/1f661bfb205b644086cc/PE-GBS_empirical.ipynb). I've made the appropriate changes to reflect file locations on my local system, as well as to the params.txt file.\n", "\n", "## Any text sections below this line are from the example notebook.\n", "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Example PE-GBS (w/ merged reads) notebook\n", "\n", "Paired-end GBS (or similarly, paired Ez-RAD) data may commonly contain first and second reads that overlap to some degree and therefore should be merged. This notebook will outline how to analyze such data in _pyRAD_ (v.3.03 or newer). I will demonstrate this example using an empirical data set of mine. To begin, we will create an empty directory within our current directory in which to perform the assembly." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Volumes/owl/temp\n" ] } ], "source": [ "cd /Volumes/owl/temp/" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%%bash\n", "mkdir oly_gbs_pyrad" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Volumes/owl/temp/oly_gbs_pyrad\n" ] } ], "source": [ "cd /Volumes/owl/temp/oly_gbs_pyrad/" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "mkdir -p analysis_pyrad" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[30m\u001b[43manalysis_pyrad\u001b[m\u001b[m/\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t6m26.803s\n", "user\t0m0.028s\n", "sys\t0m42.577s\n" ] } ], "source": [ "%%bash\n", "time cp /Volumes/owl_web/nightingales/O_lurida/20160223_gbs/*_{1..10}A_*.fq.gz ." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[30m\u001b[43manalysis_pyrad\u001b[m\u001b[m/ params.txt\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next we create an empty params file by calling pyRAD with the -n option." ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\tnew params.txt file created\n" ] } ], "source": [ "%%bash\n", "pyrad -n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Enter params file arguments" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then we enter some arguments into the params file, which can be done using any text editor. I use the script below to automate it here. Some important arguments for dealing with PE-GBS data in particular include the following: \n", "\n", "+ Set the correct datatype. In the case of this analysis, we will use 'pairgbs' for de-multiplexing in step 1, but then switch it to 'merged' for steps 2-7 after we merge the paired reads. \n", "\n", "+ The filter setting (param 21) is not needed in this case because we will be using an external read merging program that applies this filtering. \n", "\n", "+ We will, however, still probably want to trim the overhanging edges of reads (param 29), which helps to remove barcodes or adapters that were missed. \n", "\n", "+ It can also be helpful with this data type to use a low mindepth setting in conjunction with majority rule consensus base calls (param 31). This will tell pyrad to make statistical base calls for all sites with depth >mindepth, but to make majority rule base calls at sites with depth lower than mindepth, but higher than the majority rule minimum depth. I first run the analysis with the option turned off, and then at the end of the tutorial show the difference with it turned on. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Let's view the params file" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==** parameter inputs for pyRAD version 3.0.66 **======================== affected step ==\r\n", "./ ## 1. Working directory (all)\r\n", " \t ## 2. Loc. of non-demultiplexed files (if not line 18) (s1)\r\n", "\t ## 3. Loc. of barcode file (if not line 18) (s1)\r\n", "/usr/local/bioinformatics/vsearch-1.11.1-osx-x86_64/bin/vsearch ## 4. command (or path) to call vsearch (or usearch) (s3,s6)\r\n", "/usr/local/bioinformatics/muscle3.8.31_i86darwin64 ## 5. command (or path) to call muscle (s3,s7)\r\n", "CWGC ## 6. Restriction overhang (e.g., C|TGCAG -> TGCAG) (s1,s2)\r\n", "16 ## 7. N processors (parallel) (all)\r\n", "6 ## 8. Mindepth: min coverage for a cluster (s4,s5)\r\n", "4 ## 9. NQual: max # sites with qual < 20 (or see line 20)(s2)\r\n", ".88 ## 10. Wclust: clustering threshold as a decimal (s3,s6)\r\n", "pairgbs ## 11. Datatype: rad,gbs,pairgbs,pairddrad,(others:see docs)(all)\r\n", "4 ## 12. MinCov: min samples in a final locus (s7)\r\n", "3 ## 13. MaxSH: max inds with shared hetero site (s7)\r\n", "oly_gbs_pyrad ## 14. Prefix name for final output (no spaces) (s7)\r\n", "==== optional params below this line =================================== affected step ==\r\n", " ## 15.opt.: select subset (prefix* only selector) (s2-s7)\r\n", " ## 16.opt.: add-on (outgroup) taxa (list or prefix*) (s6,s7)\r\n", " ## 17.opt.: exclude taxa (list or prefix*) (s7)\r\n", "/Volumes/nightingales/O_lurida/20160223_gbs/*.fq.gz ## 18.opt.: loc. of de-multiplexed data (s2)\r\n", " ## 19.opt.: maxM: N mismatches in barcodes (def= 1) (s1)\r\n", " ## 20.opt.: phred Qscore offset (def= 33) (s2)\r\n", "2 ## 21.opt.: filter: def=0=NQual 1=NQual+adapters. 2=strict (s2)\r\n", " ## 22.opt.: a priori E,H (def= 0.001,0.01, if not estimated) (s5)\r\n", "4 ## 23.opt.: maxN: max Ns in a cons seq (def=5) (s5)\r\n", "8 ## 24.opt.: maxH: max heterozyg. sites in cons seq (def=5) (s5)\r\n", " ## 25.opt.: ploidy: max alleles in cons seq (def=2;see docs) (s4,s5)\r\n", " ## 26.opt.: maxSNPs: (def=100). Paired (def=100,100) (s7)\r\n", " ## 27.opt.: maxIndels: within-clust,across-clust (def. 3,99) (s3,s7)\r\n", " ## 28.opt.: random number seed (def. 112233) (s3,s6,s7)\r\n", " ## 29.opt.: trim overhang left,right on final loci, def(0,0) (s7)\r\n", "* ## 30.opt.: output formats: p,n,a,s,v,u,t,m,k,g,* (see docs) (s7)\r\n", " ## 31.opt.: maj. base call at depth>x> pear.log 2>&1\n" ] } ], "source": [ "%%bash\n", "for gfile in analysis_pyrad/fastq/*_1.fq;\n", " do pear -f $gfile \\\n", " -r ${gfile/_1.fq/_2.fq} \\\n", " -o ${gfile/_1.fq/} \\\n", " -n 33 \\\n", " -t 33 \\\n", " -q 10 \\\n", " -j 20 >> pear.log 2>&1;\n", "done" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the data location to the de-multiplexed & merged data " ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/fastq\n" ] } ], "source": [ "cd ./analysis_pyrad/fastq/" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1HL_10A.assembled.fastq 1HL_7A.unassembled.forward.fastq 1NF_5A.unassembled.reverse.fastq \u001b[31m1SN_2A_2.fq\u001b[m\u001b[m*\r\n", "1HL_10A.discarded.fastq 1HL_7A.unassembled.reverse.fastq \u001b[31m1NF_5A_1.fq\u001b[m\u001b[m* 1SN_3A.assembled.fastq\r\n", "1HL_10A.unassembled.forward.fastq 1HL_7A_1.fq \u001b[31m1NF_5A_2.fq\u001b[m\u001b[m* 1SN_3A.discarded.fastq\r\n", "1HL_10A.unassembled.reverse.fastq \u001b[31m1HL_7A_1.fq.gz\u001b[m\u001b[m* 1NF_6A.assembled.fastq 1SN_3A.unassembled.forward.fastq\r\n", "\u001b[31m1HL_10A_1.fq\u001b[m\u001b[m* \u001b[31m1HL_7A_2.fq\u001b[m\u001b[m* 1NF_6A.discarded.fastq 1SN_3A.unassembled.reverse.fastq\r\n", "\u001b[31m1HL_10A_2.fq\u001b[m\u001b[m* 1HL_8A.assembled.fastq 1NF_6A.unassembled.forward.fastq \u001b[31m1SN_3A_1.fq\u001b[m\u001b[m*\r\n", "1HL_1A.assembled.fastq 1HL_8A.discarded.fastq 1NF_6A.unassembled.reverse.fastq \u001b[31m1SN_3A_2.fq\u001b[m\u001b[m*\r\n", "1HL_1A.discarded.fastq 1HL_8A.unassembled.forward.fastq \u001b[31m1NF_6A_1.fq\u001b[m\u001b[m* 1SN_4A.assembled.fastq\r\n", "1HL_1A.unassembled.forward.fastq 1HL_8A.unassembled.reverse.fastq \u001b[31m1NF_6A_2.fq\u001b[m\u001b[m* 1SN_4A.discarded.fastq\r\n", "1HL_1A.unassembled.reverse.fastq \u001b[31m1HL_8A_1.fq\u001b[m\u001b[m* 1NF_7A.assembled.fastq 1SN_4A.unassembled.forward.fastq\r\n", "\u001b[31m1HL_1A_1.fq\u001b[m\u001b[m* \u001b[31m1HL_8A_2.fq\u001b[m\u001b[m* 1NF_7A.discarded.fastq 1SN_4A.unassembled.reverse.fastq\r\n", "\u001b[31m1HL_1A_2.fq\u001b[m\u001b[m* 1HL_9A.assembled.fastq 1NF_7A.unassembled.forward.fastq \u001b[31m1SN_4A_1.fq\u001b[m\u001b[m*\r\n", "1HL_2A.assembled.fastq 1HL_9A.discarded.fastq 1NF_7A.unassembled.reverse.fastq \u001b[31m1SN_4A_2.fq\u001b[m\u001b[m*\r\n", "1HL_2A.discarded.fastq 1HL_9A.unassembled.forward.fastq \u001b[31m1NF_7A_1.fq\u001b[m\u001b[m* 1SN_5A.assembled.fastq\r\n", "1HL_2A.unassembled.forward.fastq 1HL_9A.unassembled.reverse.fastq \u001b[31m1NF_7A_2.fq\u001b[m\u001b[m* 1SN_5A.discarded.fastq\r\n", "1HL_2A.unassembled.reverse.fastq \u001b[31m1HL_9A_1.fq\u001b[m\u001b[m* 1NF_8A.assembled.fastq 1SN_5A.unassembled.forward.fastq\r\n", "\u001b[31m1HL_2A_1.fq\u001b[m\u001b[m* \u001b[31m1HL_9A_2.fq\u001b[m\u001b[m* 1NF_8A.discarded.fastq 1SN_5A.unassembled.reverse.fastq\r\n", "\u001b[31m1HL_2A_2.fq\u001b[m\u001b[m* 1NF_10A.assembled.fastq 1NF_8A.unassembled.forward.fastq \u001b[31m1SN_5A_1.fq\u001b[m\u001b[m*\r\n", "1HL_3A.assembled.fastq 1NF_10A.discarded.fastq 1NF_8A.unassembled.reverse.fastq \u001b[31m1SN_5A_2.fq\u001b[m\u001b[m*\r\n", "1HL_3A.discarded.fastq 1NF_10A.unassembled.forward.fastq \u001b[31m1NF_8A_1.fq\u001b[m\u001b[m* 1SN_6A.assembled.fastq\r\n", "1HL_3A.unassembled.forward.fastq 1NF_10A.unassembled.reverse.fastq \u001b[31m1NF_8A_2.fq\u001b[m\u001b[m* 1SN_6A.discarded.fastq\r\n", "1HL_3A.unassembled.reverse.fastq \u001b[31m1NF_10A_1.fq\u001b[m\u001b[m* 1NF_9A.assembled.fastq 1SN_6A.unassembled.forward.fastq\r\n", "\u001b[31m1HL_3A_1.fq\u001b[m\u001b[m* \u001b[31m1NF_10A_2.fq\u001b[m\u001b[m* 1NF_9A.discarded.fastq 1SN_6A.unassembled.reverse.fastq\r\n", "\u001b[31m1HL_3A_2.fq\u001b[m\u001b[m* 1NF_1A.assembled.fastq 1NF_9A.unassembled.forward.fastq \u001b[31m1SN_6A_1.fq\u001b[m\u001b[m*\r\n", "1HL_4A.assembled.fastq 1NF_1A.discarded.fastq 1NF_9A.unassembled.reverse.fastq \u001b[31m1SN_6A_2.fq\u001b[m\u001b[m*\r\n", "1HL_4A.discarded.fastq 1NF_1A.unassembled.forward.fastq \u001b[31m1NF_9A_1.fq\u001b[m\u001b[m* 1SN_7A.assembled.fastq\r\n", "1HL_4A.unassembled.forward.fastq 1NF_1A.unassembled.reverse.fastq \u001b[31m1NF_9A_2.fq\u001b[m\u001b[m* 1SN_7A.discarded.fastq\r\n", "1HL_4A.unassembled.reverse.fastq \u001b[31m1NF_1A_1.fq\u001b[m\u001b[m* 1SN_10A.assembled.fastq 1SN_7A.unassembled.forward.fastq\r\n", "\u001b[31m1HL_4A_1.fq\u001b[m\u001b[m* \u001b[31m1NF_1A_2.fq\u001b[m\u001b[m* 1SN_10A.discarded.fastq 1SN_7A.unassembled.reverse.fastq\r\n", "\u001b[31m1HL_4A_2.fq\u001b[m\u001b[m* 1NF_2A.assembled.fastq 1SN_10A.unassembled.forward.fastq \u001b[31m1SN_7A_1.fq\u001b[m\u001b[m*\r\n", "1HL_5A.assembled.fastq 1NF_2A.discarded.fastq 1SN_10A.unassembled.reverse.fastq \u001b[31m1SN_7A_2.fq\u001b[m\u001b[m*\r\n", "1HL_5A.discarded.fastq 1NF_2A.unassembled.forward.fastq \u001b[31m1SN_10A_1.fq\u001b[m\u001b[m* 1SN_8A.assembled.fastq\r\n", "1HL_5A.unassembled.forward.fastq 1NF_2A.unassembled.reverse.fastq \u001b[31m1SN_10A_2.fq\u001b[m\u001b[m* 1SN_8A.discarded.fastq\r\n", "1HL_5A.unassembled.reverse.fastq \u001b[31m1NF_2A_1.fq\u001b[m\u001b[m* 1SN_1A.assembled.fastq 1SN_8A.unassembled.forward.fastq\r\n", "\u001b[31m1HL_5A_1.fq\u001b[m\u001b[m* \u001b[31m1NF_2A_2.fq\u001b[m\u001b[m* 1SN_1A.discarded.fastq 1SN_8A.unassembled.reverse.fastq\r\n", "\u001b[31m1HL_5A_2.fq\u001b[m\u001b[m* 1NF_4A.assembled.fastq 1SN_1A.unassembled.forward.fastq \u001b[31m1SN_8A_1.fq\u001b[m\u001b[m*\r\n", "1HL_6A.assembled.fastq 1NF_4A.discarded.fastq 1SN_1A.unassembled.reverse.fastq \u001b[31m1SN_8A_2.fq\u001b[m\u001b[m*\r\n", "1HL_6A.discarded.fastq 1NF_4A.unassembled.forward.fastq \u001b[31m1SN_1A_1.fq\u001b[m\u001b[m* 1SN_9A.assembled.fastq\r\n", "1HL_6A.unassembled.forward.fastq 1NF_4A.unassembled.reverse.fastq \u001b[31m1SN_1A_2.fq\u001b[m\u001b[m* 1SN_9A.discarded.fastq\r\n", "1HL_6A.unassembled.reverse.fastq \u001b[31m1NF_4A_1.fq\u001b[m\u001b[m* 1SN_2A.assembled.fastq 1SN_9A.unassembled.forward.fastq\r\n", "\u001b[31m1HL_6A_1.fq\u001b[m\u001b[m* \u001b[31m1NF_4A_2.fq\u001b[m\u001b[m* 1SN_2A.discarded.fastq 1SN_9A.unassembled.reverse.fastq\r\n", "\u001b[31m1HL_6A_2.fq\u001b[m\u001b[m* 1NF_5A.assembled.fastq 1SN_2A.unassembled.forward.fastq \u001b[31m1SN_9A_1.fq\u001b[m\u001b[m*\r\n", "1HL_7A.assembled.fastq 1NF_5A.discarded.fastq 1SN_2A.unassembled.reverse.fastq \u001b[31m1SN_9A_2.fq\u001b[m\u001b[m*\r\n", "1HL_7A.discarded.fastq 1NF_5A.unassembled.forward.fastq \u001b[31m1SN_2A_1.fq\u001b[m\u001b[m*\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "sed: 1: \"./params.txt\": invalid command code .\n" ] } ], "source": [ "%%bash\n", "## set location of demultiplexed data that are 'pear' filtered\n", "sed -i '/## 18./c\\analysis_pyrad/fastq/*.assembled.fastq ## 18. demulti data loc ' ./params.txt" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Volumes/owl/temp/oly_gbs_pyrad\n" ] } ], "source": [ "cd /Volumes/owl/temp/oly_gbs_pyrad/" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==** parameter inputs for pyRAD version 3.0.66 **======================== affected step ==\n", "./ ## 1. Working directory (all)\n", " \t ## 2. Loc. of non-demultiplexed files (if not line 18) (s1)\n", "\t ## 3. Loc. of barcode file (if not line 18) (s1)\n", "/usr/local/bioinformatics/vsearch-1.11.1-osx-x86_64/bin/vsearch ## 4. command (or path) to call vsearch (or usearch) (s3,s6)\n", "/usr/local/bioinformatics/muscle3.8.31_i86darwin64 ## 5. command (or path) to call muscle (s3,s7)\n", "CWGC ## 6. Restriction overhang (e.g., C|TGCAG -> TGCAG) (s1,s2)\n", "16 ## 7. N processors (parallel) (all)\n", "6 ## 8. Mindepth: min coverage for a cluster (s4,s5)\n", "4 ## 9. NQual: max # sites with qual < 20 (or see line 20)(s2)\n", ".88 ## 10. Wclust: clustering threshold as a decimal (s3,s6)\n", "pairgbs ## 11. Datatype: rad,gbs,pairgbs,pairddrad,(others:see docs)(all)\n", "4 ## 12. MinCov: min samples in a final locus (s7)\n", "3 ## 13. MaxSH: max inds with shared hetero site (s7)\n", "oly_gbs_pyrad ## 14. Prefix name for final output (no spaces) (s7)\n", "==== optional params below this line =================================== affected step ==\n", " ## 15.opt.: select subset (prefix* only selector) (s2-s7)\n", " ## 16.opt.: add-on (outgroup) taxa (list or prefix*) (s6,s7)\n", " ## 17.opt.: exclude taxa (list or prefix*) (s7)\n", "/Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/fastq/*.assembled.fastq ## 18.opt.: loc. of de-multiplexed data (s2)\n", " ## 19.opt.: maxM: N mismatches in barcodes (def= 1) (s1)\n", " ## 20.opt.: phred Qscore offset (def= 33) (s2)\n", "2 ## 21.opt.: filter: def=0=NQual 1=NQual+adapters. 2=strict (s2)\n", " ## 22.opt.: a priori E,H (def= 0.001,0.01, if not estimated) (s5)\n", "4 ## 23.opt.: maxN: max Ns in a cons seq (def=5) (s5)\n", "8 ## 24.opt.: maxH: max heterozyg. sites in cons seq (def=5) (s5)\n", " ## 25.opt.: ploidy: max alleles in cons seq (def=2;see docs) (s4,s5)\n", " ## 26.opt.: maxSNPs: (def=100). Paired (def=100,100) (s7)\n", " ## 27.opt.: maxIndels: within-clust,across-clust (def. 3,99) (s3,s7)\n", " ## 28.opt.: random number seed (def. 112233) (s3,s6,s7)\n", " ## 29.opt.: trim overhang left,right on final loci, def(0,0) (s7)\n", "* ## 30.opt.: output formats: p,n,a,s,v,u,t,m,k,g,* (see docs) (s7)\n", " ## 31.opt.: maj. base call at depth>x TGCAG) (s1,s2)\n", "16 ## 7. N processors (parallel) (all)\n", "6 ## 8. Mindepth: min coverage for a cluster (s4,s5)\n", "4 ## 9. NQual: max # sites with qual < 20 (or see line 20)(s2)\n", ".88 ## 10. Wclust: clustering threshold as a decimal (s3,s6)\n", "merged ## 11. Datatype: rad,gbs,pairgbs,pairddrad,(others:see docs)(all)\n", "4 ## 12. MinCov: min samples in a final locus (s7)\n", "3 ## 13. MaxSH: max inds with shared hetero site (s7)\n", "oly_gbs_pyrad ## 14. Prefix name for final output (no spaces) (s7)\n", "==== optional params below this line =================================== affected step ==\n", " ## 15.opt.: select subset (prefix* only selector) (s2-s7)\n", " ## 16.opt.: add-on (outgroup) taxa (list or prefix*) (s6,s7)\n", " ## 17.opt.: exclude taxa (list or prefix*) (s7)\n", "/Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/fastq/*.assembled.fastq ## 18.opt.: loc. of de-multiplexed data (s2)\n", " ## 19.opt.: maxM: N mismatches in barcodes (def= 1) (s1)\n", " ## 20.opt.: phred Qscore offset (def= 33) (s2)\n", "2 ## 21.opt.: filter: def=0=NQual 1=NQual+adapters. 2=strict (s2)\n", " ## 22.opt.: a priori E,H (def= 0.001,0.01, if not estimated) (s5)\n", "4 ## 23.opt.: maxN: max Ns in a cons seq (def=5) (s5)\n", "8 ## 24.opt.: maxH: max heterozyg. sites in cons seq (def=5) (s5)\n", " ## 25.opt.: ploidy: max alleles in cons seq (def=2;see docs) (s4,s5)\n", " ## 26.opt.: maxSNPs: (def=100). Paired (def=100,100) (s7)\n", " ## 27.opt.: maxIndels: within-clust,across-clust (def. 3,99) (s3,s7)\n", " ## 28.opt.: random number seed (def. 112233) (s3,s6,s7)\n", " ## 29.opt.: trim overhang left,right on final loci, def(0,0) (s7)\n", "* ## 30.opt.: output formats: p,n,a,s,v,u,t,m,k,g,* (see docs) (s7)\n", " ## 31.opt.: maj. base call at depth>x5.tot\td>5.me\td>5.sd\tbadpairs\n", "1HL_10A.assembled\t929902\t1.324\t4.302\t14750\t14.742\t31.154\t0\n", "1HL_1A.assembled\t697155\t1.302\t3.598\t10665\t13.498\t26.092\t0\n", "1HL_2A.assembled\t806060\t1.324\t3.079\t13181\t14.275\t19.905\t0\n", "1HL_3A.assembled\t692692\t1.299\t3.247\t10830\t13.068\t22.795\t0\n", "1HL_4A.assembled\t758890\t1.325\t3.89\t12675\t13.885\t27.065\t0\n", "1HL_5A.assembled\t610261\t1.309\t3.885\t9593\t13.251\t28.295\t0\n", "1HL_6A.assembled\t645676\t1.302\t3.02\t9955\t13.321\t20.74\t0\n", "1HL_8A.assembled\t764053\t1.311\t4.378\t11485\t14.441\t32.949\t0\n", "1HL_9A.assembled\t843114\t1.318\t4.406\t13178\t14.572\t32.403\t0\n", "1NF_10A.assembled\t660691\t1.306\t3.262\t10014\t13.935\t22.931\t0\n", "1NF_1A.assembled\t432755\t1.295\t2.348\t6680\t12.746\t14.473\t0\n", "1NF_2A.assembled\t1002618\t1.321\t4.53\t15706\t14.885\t33.317\t0\n", "1NF_4A.assembled\t1052234\t1.321\t4.52\t16803\t14.694\t32.941\t0\n", "1NF_5A.assembled\t839831\t1.304\t3.072\t13013\t13.676\t20.971\t0\n", "1NF_6A.assembled\t734107\t1.313\t3.937\t11456\t13.916\t28.594\t0\n", "1NF_7A.assembled\t760816\t1.323\t3.174\t12256\t14.163\t21.068\t0\n", "1NF_8A.assembled\t834461\t1.305\t3.113\t12426\t14.3\t21.566\t0\n", "1NF_9A.assembled\t984792\t1.32\t3.915\t15378\t14.824\t27.984\t0\n", "1SN_10A.assembled\t778305\t1.314\t3.509\t12283\t13.869\t24.62\t0\n", "1SN_1A.assembled\t878975\t1.321\t3.324\t13747\t14.666\t22.631\t0\n", "1SN_2A.assembled\t897159\t1.323\t3.664\t13986\t14.72\t25.791\t0\n", "1SN_3A.assembled\t775704\t1.286\t2.946\t10947\t13.352\t21.257\t0\n", "1SN_4A.assembled\t961256\t1.328\t3.54\t15787\t14.49\t23.965\t0\n", "1SN_5A.assembled\t897389\t1.317\t3.624\t14585\t13.743\t25.251\t0\n", "1SN_6A.assembled\t981945\t1.324\t4.577\t15571\t14.758\t33.539\t0\n", "1SN_7A.assembled\t884633\t1.318\t3.903\t14175\t14.198\t27.725\t0\n", "1SN_8A.assembled\t961067\t1.325\t9.207\t15325\t14.778\t71.55\t0\n", "1SN_9A.assembled\t949018\t1.315\t3.67\t15109\t14.076\t25.837\t0\n", "\n", " ## total = total number of clusters, including singletons\n", " ## dpt.me = mean depth of clusters\n", " ## dpt.sd = standard deviation of cluster depth\n", " ## >N.tot = number of clusters with depth greater than N\n", " ## >N.me = mean depth of clusters with depth greater than N\n", " ## >N.sd = standard deviation of cluster depth for clusters with depth greater than N\n", " ## badpairs = mismatched 1st & 2nd reads (only for paired ddRAD data)\n", "\n", "HISTOGRAMS\n", "\n", " \n", "sample: 1HL_10A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 911416\n", "5 \t* 11120\n", "10 \t* 3287\n", "15 \t* 1533\n", "20 \t* 808\n", "25 \t* 531\n", "30 \t* 380\n", "35 \t* 235\n", "40 \t* 262\n", "50 \t* 244\n", "100 \t* 65\n", "250 \t* 16\n", "500 \t* 5\n", "\n", "sample: 1HL_1A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 683427\n", "5 \t* 8605\n", "10 \t* 2448\n", "15 \t* 1085\n", "20 \t* 651\n", "25 \t* 366\n", "30 \t* 209\n", "35 \t* 115\n", "40 \t* 90\n", "50 \t* 109\n", "100 \t* 37\n", "250 \t* 11\n", "500 \t* 2\n", "\n", "sample: 1HL_2A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 789404\n", "5 \t* 10224\n", "10 \t* 2884\n", "15 \t* 1334\n", "20 \t* 712\n", "25 \t* 467\n", "30 \t* 331\n", "35 \t* 228\n", "40 \t* 214\n", "50 \t* 187\n", "100 \t* 58\n", "250 \t* 14\n", "500 \t* 3\n", "\n", "sample: 1HL_3A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 678821\n", "5 \t* 8846\n", "10 \t* 2460\n", "15 \t* 1081\n", "20 \t* 584\n", "25 \t* 384\n", "30 \t* 181\n", "35 \t* 94\n", "40 \t* 88\n", "50 \t* 105\n", "100 \t* 39\n", "250 \t* 7\n", "500 \t* 2\n", "\n", "sample: 1HL_4A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 742799\n", "5 \t* 9960\n", "10 \t* 2912\n", "15 \t* 1238\n", "20 \t* 724\n", "25 \t* 450\n", "30 \t* 261\n", "35 \t* 175\n", "40 \t* 151\n", "50 \t* 157\n", "100 \t* 50\n", "250 \t* 10\n", "500 \t* 3\n", "\n", "sample: 1HL_5A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 597839\n", "5 \t* 8056\n", "10 \t* 2079\n", "15 \t* 988\n", "20 \t* 586\n", "25 \t* 270\n", "30 \t* 142\n", "35 \t* 78\n", "40 \t* 73\n", "50 \t* 101\n", "100 \t* 40\n", "250 \t* 6\n", "500 \t* 3\n", "\n", "sample: 1HL_6A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 632762\n", "5 \t* 8156\n", "10 \t* 2166\n", "15 \t* 1093\n", "20 \t* 648\n", "25 \t* 354\n", "30 \t* 175\n", "35 \t* 97\n", "40 \t* 79\n", "50 \t* 103\n", "100 \t* 34\n", "250 \t* 8\n", "500 \t* 1\n", "\n", "sample: 1HL_8A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 749305\n", "5 \t* 9138\n", "10 \t* 2492\n", "15 \t* 1202\n", "20 \t* 674\n", "25 \t* 401\n", "30 \t* 260\n", "35 \t* 179\n", "40 \t* 181\n", "50 \t* 149\n", "100 \t* 58\n", "250 \t* 9\n", "500 \t* 5\n", "\n", "sample: 1HL_9A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 826401\n", "5 \t* 10113\n", "10 \t* 3014\n", "15 \t* 1300\n", "20 \t* 794\n", "25 \t* 488\n", "30 \t* 344\n", "35 \t* 204\n", "40 \t* 182\n", "50 \t* 197\n", "100 \t* 61\n", "250 \t* 12\n", "500 \t* 4\n", "\n", "sample: 1NF_10A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 647784\n", "5 \t* 7984\n", "10 \t* 2252\n", "15 \t* 1080\n", "20 \t* 607\n", "25 \t* 362\n", "30 \t* 218\n", "35 \t* 122\n", "40 \t* 109\n", "50 \t* 114\n", "100 \t* 49\n", "250 \t* 8\n", "500 \t* 2\n", "\n", "sample: 1NF_1A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 424162\n", "5 \t* 5310\n", "10 \t* 1653\n", "15 \t* 841\n", "20 \t* 378\n", "25 \t* 153\n", "30 \t* 77\n", "35 \t* 48\n", "40 \t* 46\n", "50 \t* 58\n", "100 \t* 26\n", "250 \t* 3\n", "500 \t 0\n", "\n", "sample: 1NF_2A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 983061\n", "5 \t* 11679\n", "10 \t* 3601\n", "15 \t* 1550\n", "20 \t* 863\n", "25 \t* 509\n", "30 \t* 384\n", "35 \t* 273\n", "40 \t* 341\n", "50 \t* 265\n", "100 \t* 65\n", "250 \t* 20\n", "500 \t* 7\n", "\n", "sample: 1NF_4A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 1031378\n", "5 \t* 12266\n", "10 \t* 3940\n", "15 \t* 1840\n", "20 \t* 924\n", "25 \t* 526\n", "30 \t* 418\n", "35 \t* 280\n", "40 \t* 314\n", "50 \t* 254\n", "100 \t* 72\n", "250 \t* 14\n", "500 \t* 8\n", "\n", "sample: 1NF_5A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 823258\n", "5 \t* 10358\n", "10 \t* 2986\n", "15 \t* 1266\n", "20 \t* 723\n", "25 \t* 420\n", "30 \t* 280\n", "35 \t* 179\n", "40 \t* 141\n", "50 \t* 146\n", "100 \t* 59\n", "250 \t* 12\n", "500 \t* 3\n", "\n", "sample: 1NF_6A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 719344\n", "5 \t* 9341\n", "10 \t* 2531\n", "15 \t* 1104\n", "20 \t* 630\n", "25 \t* 433\n", "30 \t* 229\n", "35 \t* 156\n", "40 \t* 156\n", "50 \t* 119\n", "100 \t* 48\n", "250 \t* 12\n", "500 \t* 4\n", "\n", "sample: 1NF_7A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 745146\n", "5 \t* 9719\n", "10 \t* 2732\n", "15 \t* 1204\n", "20 \t* 689\n", "25 \t* 472\n", "30 \t* 279\n", "35 \t* 186\n", "40 \t* 142\n", "50 \t* 170\n", "100 \t* 62\n", "250 \t* 11\n", "500 \t* 4\n", "\n", "sample: 1NF_8A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 818557\n", "5 \t* 9859\n", "10 \t* 2628\n", "15 \t* 1228\n", "20 \t* 741\n", "25 \t* 505\n", "30 \t* 312\n", "35 \t* 201\n", "40 \t* 196\n", "50 \t* 157\n", "100 \t* 64\n", "250 \t* 11\n", "500 \t* 2\n", "\n", "sample: 1NF_9A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 965401\n", "5 \t* 11687\n", "10 \t* 3495\n", "15 \t* 1519\n", "20 \t* 845\n", "25 \t* 506\n", "30 \t* 391\n", "35 \t* 275\n", "40 \t* 341\n", "50 \t* 235\n", "100 \t* 73\n", "250 \t* 17\n", "500 \t* 7\n", "\n", "sample: 1SN_10A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 762615\n", "5 \t* 9888\n", "10 \t* 2692\n", "15 \t* 1183\n", "20 \t* 679\n", "25 \t* 451\n", "30 \t* 276\n", "35 \t* 160\n", "40 \t* 148\n", "50 \t* 147\n", "100 \t* 50\n", "250 \t* 13\n", "500 \t* 3\n", "\n", "sample: 1SN_1A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 861548\n", "5 \t* 10687\n", "10 \t* 2991\n", "15 \t* 1285\n", "20 \t* 780\n", "25 \t* 522\n", "30 \t* 343\n", "35 \t* 245\n", "40 \t* 281\n", "50 \t* 206\n", "100 \t* 66\n", "250 \t* 16\n", "500 \t* 5\n", "\n", "sample: 1SN_2A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 879338\n", "5 \t* 10887\n", "10 \t* 3204\n", "15 \t* 1270\n", "20 \t* 765\n", "25 \t* 478\n", "30 \t* 347\n", "35 \t* 268\n", "40 \t* 278\n", "50 \t* 241\n", "100 \t* 65\n", "250 \t* 11\n", "500 \t* 7\n", "\n", "sample: 1SN_3A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 761420\n", "5 \t* 9131\n", "10 \t* 2406\n", "15 \t* 1155\n", "20 \t* 626\n", "25 \t* 382\n", "30 \t* 209\n", "35 \t* 122\n", "40 \t* 84\n", "50 \t* 118\n", "100 \t* 37\n", "250 \t* 11\n", "500 \t* 3\n", "\n", "sample: 1SN_4A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 941440\n", "5 \t* 11957\n", "10 \t* 3664\n", "15 \t* 1568\n", "20 \t* 882\n", "25 \t* 527\n", "30 \t* 361\n", "35 \t* 262\n", "40 \t* 263\n", "50 \t* 240\n", "100 \t* 66\n", "250 \t* 16\n", "500 \t* 10\n", "\n", "sample: 1SN_5A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 878933\n", "5 \t* 11554\n", "10 \t* 3328\n", "15 \t* 1389\n", "20 \t* 786\n", "25 \t* 474\n", "30 \t* 313\n", "35 \t* 201\n", "40 \t* 161\n", "50 \t* 175\n", "100 \t* 54\n", "250 \t* 17\n", "500 \t* 4\n", "\n", "sample: 1SN_6A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 962379\n", "5 \t* 11767\n", "10 \t* 3629\n", "15 \t* 1524\n", "20 \t* 831\n", "25 \t* 549\n", "30 \t* 376\n", "35 \t* 279\n", "40 \t* 308\n", "50 \t* 215\n", "100 \t* 63\n", "250 \t* 16\n", "500 \t* 9\n", "\n", "sample: 1SN_7A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 866767\n", "5 \t* 10874\n", "10 \t* 3323\n", "15 \t* 1396\n", "20 \t* 758\n", "25 \t* 511\n", "30 \t* 355\n", "35 \t* 205\n", "40 \t* 195\n", "50 \t* 170\n", "100 \t* 59\n", "250 \t* 16\n", "500 \t* 4\n", "\n", "sample: 1SN_8A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 941822\n", "5 \t* 11699\n", "10 \t* 3624\n", "15 \t* 1488\n", "20 \t* 840\n", "25 \t* 544\n", "30 \t* 338\n", "35 \t* 208\n", "40 \t* 227\n", "50 \t* 181\n", "100 \t* 73\n", "250 \t* 13\n", "500 \t* 10\n", "\n", "sample: 1SN_9A.assembled\n", "bins\tdepth_histogram\tcnts\n", " :\t0------------50-------------100%\n", "0 \t****************************** 930053\n", "5 \t* 11576\n", "10 \t* 3579\n", "15 \t* 1479\n", "20 \t* 807\n", "25 \t* 488\n", "30 \t* 312\n", "35 \t* 241\n", "40 \t* 194\n", "50 \t* 211\n", "100 \t* 53\n", "250 \t* 16\n", "500 \t* 9\n", "\n" ] } ], "source": [ "%%bash\n", "cat stats/s3.clusters.txt" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[30m\u001b[43manalysis_pyrad\u001b[m\u001b[m/ \u001b[30m\u001b[43mclust.88\u001b[m\u001b[m/ \u001b[30m\u001b[43medits\u001b[m\u001b[m/ params.txt pear.log \u001b[30m\u001b[43mstats\u001b[m\u001b[m/\r\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Volumes/owl/temp\n" ] } ], "source": [ "cd .." ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t27m5.957s\n", "user\t0m0.141s\n", "sys\t2m48.164s\n" ] } ], "source": [ "%%bash\n", "time cp -r /Volumes/owl/temp/oly_gbs_pyrad/ /Volumes/Data/Sam/scratch/oly_gbs_pyrad/" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Volumes/Data/Sam/scratch/oly_gbs_pyrad\n" ] } ], "source": [ "cd /Volumes/Data/Sam/scratch/oly_gbs_pyrad/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### and let's look at an example cluster file" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">d35422.assembled_2120_r1;size=51;\n", "TGCAGCGCTACCTGCCCGATCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA\n", ">d35422.assembled_274175_r1;size=15;+\n", "TGCAGCGCTACCTGCCCGATCTGTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA\n", ">d35422.assembled_2265219_r1;size=2;+\n", "TGCAGCGCTACCTGCCGGATCTGTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA\n", ">d35422.assembled_216079_r1;size=1;+\n", "TGCAGCGCTACCTGCCCGATCTGCTCGAGAAGCGGACGAACTGAGAATGAACCTGCA\n", ">d35422.assembled_2495995_r1;size=1;+\n", "TGCAGCGCTACCTGCCCGGTCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA\n", ">d35422.assembled_590416_r1;size=1;+\n", "TGCAGCGCTATCTGCCCGATCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA\n", ">d35422.assembled_1947641_r1;size=1;+\n", "TGCAGCGCTACCTGCCCGATCTTTTCGAGAAGCGGACGAGCTGAGAATGAACCTGCA\n", ">d35422.assembled_1601389_r1;size=1;+\n", "TGCAGCGCTACCTGCCGGATCTGTTCGAGAAGCGGACGAACTGAGAATGACACTGCA\n", ">d35422.assembled_190367_r1;size=1;+\n", "TGCAGCGCTACCTGCCCGATCCTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA\n", ">d35422.assembled_2631365_r1;size=1;+\n", "TGCAGCGCTACCTGCCCGATCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCG\n", ">d35422.assembled_126737_r1;size=1;+\n", "TGCAGCGCTACCTGCCCGATCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTNCA\n", ">d35422.assembled_288692_r1;size=1;+\n", "TGCAGCGCTACCTGCCGGATCNGTTCGAGAAGCGGACGAACTGAGAATGACACTGCA\n", ">d35422.assembled_2194406_r1;size=1;+\n", "TGCAGCGCTANCTNCCCGATCTTTTCGAGAAGCGGANNANCTGAGAATGAACCTGCA\n", ">d35422.assembled_831862_r1;size=1;+\n", "TGCAGCGCTACCTGNCCGATCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA\n", "//\n", "//\n", ">d35422.assembled_61639_r1;size=24;\n", "TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCCCGCGACGCCCTGGTGCCGGCC\n", ">d35422.assembled_825340_r1;size=8;+\n", "TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGTCAGTTCATCCAGTCGCATCCGCGCGACGCCCTGGTGCCGTCC\n", ">d35422.assembled_88436_r1;size=2;+\n", "TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCCCGCGACGCCCTGGTGCCGGCC\n", ">d35422.assembled_354389_r1;size=2;+\n", "TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCCCGCGACGCCCTGGTGCCGGCC\n", ">d35422.assembled_2229078_r1;size=1;+\n", "TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTACGTCAGTTCATCCAGTCGCATCCGCGCGACTCCCTGGTGCCGTCC\n", ">d35422.assembled_1405977_r1;size=1;+\n", "TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATNCAGTCGCANCCCCGCGACGCCCTGGTGCCGNCC\n", ">d35422.assembled_2738651_r1;size=1;+\n", "TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCCCGCGACGCCCTGGTGCCGGCC\n", ">d35422.assembled_2565401_r1;size=1;+\n", "TGCAGCGTCAGTACGAGCTGGCGGAGATGGGCCTGCGTCAGTTCATCCAGTCCCATCCGCGTGATGCACTGGTACCGGCC\n", ">d35422.assembled_2060896_r1;size=1;+\n", "TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCGCGCGACGCCCTCGTGCCGGCC\n", ">d35422.assembled_27774_r1;size=1;+\n", "TGCAGCGTCAGTACGAGCTGGCGGAGATGGGCCTGCGTCAGTTCATCCAGTCCCATCCGCGTGATGCACTGGTACCGGCC\n", ">d35422.assembled_2776613_r1;size=1;+\n", "TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCCCGCGACGCCCTGGTGCCGGCC\n", ">d35422.assembled_2566934_r1;size=1;+\n", "TGCAGCGCCAGTACGAGCAGGCCGGGATGGGCCTGCGTCAGTTCATCCAGTCGCATCCGCGCGACGCCCTGGTGCCGTCC\n", "//\n", "//\n", ">d35422.assembled_781655_r1;size=2;+\n", "TGCAGGACCAGGGCTTCGCCGTGACCGCGAACATGGTGACCAGCCGCATCGTCGTGGACGCCGTGGGCACCTTCGACGCC\n", ">d35422.assembled_1218064_r1;size=2;\n", "TGCAGGACCAGGGCTTCGCCGTGACCGCGAACATGGTGACCAGTCGTGCTGTGGTCGAAGCCGTCGGCGCCTTCGACGCC\n", ">d35422.assembled_81417_r1;size=1;+\n", "TGCAGGACCAGGGCTTCGCCGTGNCCGCGAACATGGTNACCACCCGCGCCGTTGTCGACGCCGTCGGCACCTTCGACGCC\n", ">d35422.assembled_1447392_r1;size=1;+\n", "TGCAGGACCAGGGCTTCGCNGTGACCNCGAACATGGTGACCAGCCGCATCGTCGTGGACGCCGTGGGCACCTTCGACGCC\n", ">d35422.assembled_1916059_r1;size=1;+\n", "TGCAGGACCAGGGCTTCGCCGTGACCGCGAACATGGTGACCAGCCGCGTCGTCGTGGACGCCGTGGGCACCTTCGACGCC\n", "//\n", "//\n", ">d35422.assembled_1086507_r1;size=1;\n", "TGCAGNNANC-AAAAAAAATATATG-TTTTTTTTTTTTGCTGCA\n", ">d35422.assembled_1935566_r1;size=1;+\n", "TGCAGNTAACNAAAAAAAATATATGTTTTTTTTTTTTTGCTGCA\n", "//\n", "//\n", ">d35422.assembled_105061_r1;size=2;\n", "TGCAGGTCTCCTCCGACGTCTACTACTACACGGTCGGCTCGAAGCTCTTCTCGAGGCCGAACAAGCCCCTGCA\n", ">d35422.assembled_405948_r1;size=2;+\n", "TGCAGGTCTCCTCCGACGTCTACTACTACACGGTCGGCGCGAACCTCTTCGAGAGGAAGAACAAGCCGCTGCA\n", ">d35422.assembled_1112236_r1;size=1;+\n", "TGCAGGTNNCCTCNGACGTCTACTACTACACGGTCGGCGCGAACCTCTTCTCCAAGCCGAACAAGCCGCTGCA\n", ">d35422.assembled_129605_r1;size=1;+\n", "TGCAGGTGTCCTCCGACGTCTACTACTACACCGTCGGGTCGAAGCTCTTCTCGAGGCCGAACAAGCCCCTGCA\n", ">d35422.assembled_108168_r1;size=1;+\n", "TGCAGGTGTCCTCCGACGTCTACTACTACACGGTCGGCGCGAACCTCTTCTCGAAGCCGAACAAGCCCCTGCA\n", ">d35422.assembled_1796775_r1;size=1;+\n", "TGCAGGTGTCCTCCGACGTCTACTACTACACGGTCGGCGCGAACCTCTTCTCCAAGCCGAACAAGCCGCTGCA\n", "//\n", "//\n", ">d35422.assembled_2015519_r1;size=1;+\n", "TGCAGGGGGNCGCGGGGAGCAACCCCAGCTCGGTGCACGTGAGACAGGTGTCGATCACGGGCGGGGACCTCCTGGGGCCT\n", ">d35422.assembled_1326334_r1;size=1;\n", "TGCAGGGGGTCGCGGGGAGCAACCCCAGCTCGGTGCACGTGAGACAGGTGTCGATCACGGGCGGGGACCTCCTGGGGCCT\n", "//\n", "//\n", ">d35422.assembled_735556_r1;size=2;\n", "TGCAGCGCCGCCAGCGGCTGCAGAACTGCCCCCGAGTTTCCAGCTTCTGCA\n", ">d35422.assembled_2203593_r1;size=1;+\n", "TGCAGCGCCTGGACGCCCTGCAGAACTGCCCCCGAGTTTCCAGCTTCTGCA\n", "//\n", "//\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "gzip: stdout: Broken pipe\n" ] } ], "source": [ "%%bash\n", "gzip -cd analysis_pyrad/clust.85/d35422.assembled.clustS.gz | head -n 100 | cut -b 1-80" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Parameter estimation" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "\n", " ------------------------------------------------------------\n", " pyRAD : RADseq for phylogenetics & introgression analyses\n", " ------------------------------------------------------------\n", "\n", "\n", "\tstep 4: estimating error rate and heterozygosity\n", "\t............................\n", "real\t106m53.916s\n", "user\t1403m36.534s\n", "sys\t1m17.698s\n" ] } ], "source": [ "%%bash\n", "time pyrad -p params.txt -s 4" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "taxa\tH\tE\n", "1NF_1A.assembled\t0.01462331\t0.00325686\t\n", "1HL_5A.assembled\t0.01406749\t0.0032951\t\n", "1HL_6A.assembled\t0.01505022\t0.00348848\t\n", "1NF_10A.assembled\t0.01534361\t0.00308861\t\n", "1HL_3A.assembled\t0.01364489\t0.00319892\t\n", "1HL_1A.assembled\t0.01579058\t0.00339714\t\n", "1NF_6A.assembled\t0.01539856\t0.00312442\t\n", "1HL_4A.assembled\t0.01459177\t0.00315271\t\n", "1HL_8A.assembled\t0.01639449\t0.00322956\t\n", "1NF_7A.assembled\t0.01590034\t0.00303005\t\n", "1SN_3A.assembled\t0.01520282\t0.00335947\t\n", "1SN_10A.assembled\t0.01505682\t0.00330012\t\n", "1HL_2A.assembled\t0.01482491\t0.00313122\t\n", "1HL_9A.assembled\t0.01540366\t0.00296631\t\n", "1NF_8A.assembled\t0.01642511\t0.00323494\t\n", "1NF_5A.assembled\t0.01519121\t0.00314829\t\n", "1SN_1A.assembled\t0.01575731\t0.00306824\t\n", "1SN_7A.assembled\t0.01513415\t0.00310041\t\n", "1SN_5A.assembled\t0.01463804\t0.0031614\t\n", "1SN_2A.assembled\t0.01625897\t0.00318461\t\n", "1HL_10A.assembled\t0.01516635\t0.00324934\t\n", "1SN_9A.assembled\t0.01423233\t0.00315477\t\n", "1SN_8A.assembled\t0.01640971\t0.00328322\t\n", "1SN_4A.assembled\t0.01409185\t0.00304299\t\n", "1SN_6A.assembled\t0.01553508\t0.00320316\t\n", "1NF_9A.assembled\t0.01569505\t0.00318668\t\n", "1NF_2A.assembled\t0.01581381\t0.00301155\t\n", "1NF_4A.assembled\t0.01508062\t0.00300757\t\n" ] } ], "source": [ "%%bash\n", "cat stats/Pi_E_estimate.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Consenus base calling\n", "\n", "You may want to adjust:\n", " + max cluster size (param 33)\n", " + max number of Ns\n", " + max number of Hs" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "\n", " ------------------------------------------------------------\n", " pyRAD : RADseq for phylogenetics & introgression analyses\n", " ------------------------------------------------------------\n", "\n", "\n", "\tstep 5: creating consensus seqs for 28 samples, using H=0.01524 E=0.00318\n", "\t............................\n", "real\t66m47.210s\n", "user\t827m35.100s\n", "sys\t0m46.871s\n" ] } ], "source": [ "%%bash\n", "time pyrad -p params.txt -s 5" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "taxon \tnloci\tf1loci\tf2loci\tnsites\tnpoly\tpoly\n", "1NF_8A.assembled\t834461\t12424\t9070\t936467\t9052\t0.0096661\n", "1HL_2A.assembled\t806060\t13178\t10137\t1044548\t9526\t0.0091197\n", "1NF_5A.assembled\t839831\t13010\t10171\t1069632\t9293\t0.008688\n", "1HL_9A.assembled\t843114\t13174\t10343\t1071651\t10224\t0.0095404\n", "1SN_1A.assembled\t878975\t13742\t10736\t1134533\t9765\t0.0086071\n", "1SN_2A.assembled\t897159\t13979\t10742\t1118263\t10144\t0.0090712\n", "1SN_7A.assembled\t884633\t14171\t11005\t1147984\t10236\t0.0089165\n", "1SN_5A.assembled\t897389\t14581\t11416\t1184878\t10807\t0.0091208\n", "1HL_10A.assembled\t929902\t14745\t11554\t1198784\t10908\t0.0090992\n", "1SN_9A.assembled\t949018\t15100\t11927\t1244046\t11194\t0.0089981\n", "1SN_8A.assembled\t961067\t15315\t11873\t1242588\t10833\t0.0087181\n", "1SN_6A.assembled\t981945\t15562\t12013\t1252587\t11659\t0.0093079\n", "1NF_9A.assembled\t984792\t15371\t11942\t1268914\t10701\t0.0084332\n", "1SN_4A.assembled\t961256\t15777\t12499\t1300779\t11409\t0.0087709\n", "1NF_2A.assembled\t1002618\t15699\t12176\t1292382\t11082\t0.0085749\n", "1NF_4A.assembled\t1052234\t16795\t13192\t1398803\t12103\t0.0086524\n", "1NF_1A.assembled\t432755\t6680\t5139\t536894\t5451\t0.0101528\n", "1SN_3A.assembled\t775704\t10944\t8257\t850361\t8592\t0.0101039\n", "1HL_1A.assembled\t697155\t10663\t7979\t807288\t8288\t0.0102665\n", "1HL_8A.assembled\t764053\t11480\t8863\t921573\t8513\t0.0092375\n", "1HL_5A.assembled\t610261\t9590\t7466\t764440\t7345\t0.0096083\n", "1HL_6A.assembled\t645676\t9954\t7293\t732928\t8296\t0.011319\n", "1SN_10A.assembled\t778305\t12280\t9610\t1001472\t9488\t0.0094741\n", "1NF_10A.assembled\t660691\t10012\t7687\t808979\t7778\t0.0096146\n", "1HL_3A.assembled\t692692\t10828\t8461\t868384\t8396\t0.0096685\n", "1NF_6A.assembled\t734107\t11452\t8962\t944288\t8628\t0.009137\n", "1NF_7A.assembled\t760816\t12252\t9601\t1016859\t9056\t0.0089059\n", "1HL_4A.assembled\t758890\t12672\t9914\t1027933\t9402\t0.0091465\n", "\n", " ## nloci = number of loci\n", " ## f1loci = number of loci with >N depth coverage\n", " ## f2loci = number of loci with >N depth and passed paralog filter\n", " ## nsites = number of sites across f loci\n", " ## npoly = number of polymorphic sites in nsites\n", " ## poly = frequency of polymorphic sites\n" ] } ], "source": [ "%%bash\n", "cat stats/s5.consens.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cluster across samples" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "vsearch v1.11.1_osx_x86_64, 24.0GB RAM, 16 cores\n", "https://github.com/torognes/vsearch\n", "\n", "\n", "\tfinished clustering\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "\n", " ------------------------------------------------------------\n", " pyRAD : RADseq for phylogenetics & introgression analyses\n", " ------------------------------------------------------------\n", "\n", "\n", "\tstep 6: clustering across 28 samples at '.88' similarity \n", "\n", "Reading file /Volumes/Data/Sam/scratch/oly_gbs_pyrad/clust.88/cat.haplos_ 100%\n", "30308350 nt in 280028 seqs, min 32, max 245, avg 108\n", "Counting unique k-mers 100%\n", "Clustering 100%\n", "Sorting clusters 100%\n", "Writing clusters 100%\n", "Clusters: 51756 Size min 1, max 68, avg 5.4\n", "Singletons: 16399, 5.9% of seqs, 31.7% of clusters\n", "\n", "real\t6m9.378s\n", "user\t7m42.856s\n", "sys\t0m6.083s\n" ] } ], "source": [ "%%bash\n", "time pyrad -p params.txt -s 6" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Assemble final data set" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\tingroup 1HL_10A.assembled,1HL_1A.assembled,1HL_2A.assembled,1HL_3A.assembled,1HL_4A.assembled,1HL_5A.assembled,1HL_6A.assembled,1HL_8A.assembled,1HL_9A.assembled,1NF_10A.assembled,1NF_1A.assembled,1NF_2A.assembled,1NF_4A.assembled,1NF_5A.assembled,1NF_6A.assembled,1NF_7A.assembled,1NF_8A.assembled,1NF_9A.assembled,1SN_10A.assembled,1SN_1A.assembled,1SN_2A.assembled,1SN_3A.assembled,1SN_4A.assembled,1SN_5A.assembled,1SN_6A.assembled,1SN_7A.assembled,1SN_8A.assembled,1SN_9A.assembled\n", "\taddon \n", "\texclude \n", "\t\n", "\tfinal stats written to:\n", "\t /Volumes/Data/Sam/scratch/oly_gbs_pyrad/stats/oly_gbs_pyrad.stats\n", "\toutput files being written to:\n", "\t /Volumes/Data/Sam/scratch/oly_gbs_pyrad/outfiles/ directory\n", "\n", "\tfiltering & writing to phylip file\n", "\twriting nexus file\n", "\tWriting gphocs file\n", "\t + writing full SNPs file\n", "\t + writing unlinked bi-allelic SNPs file\n", "\t + writing STRUCTURE file\n", "\t + writing geno file\n", "\t ** must enter group/clade assignments for treemix output \n", "\twriting vcf file\n", "\t ** must enter group/clade assignments for migrate-n output \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "\n", " ------------------------------------------------------------\n", " pyRAD : RADseq for phylogenetics & introgression analyses\n", " ------------------------------------------------------------\n", "\n", "................\n", "real\t5m33.004s\n", "user\t22m21.544s\n", "sys\t4m54.310s\n" ] } ], "source": [ "%%bash\n", "time pyrad -p params.txt -s 7" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Examine results" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "21017 ## loci with > minsp containing data\n", "18212 ## loci with > minsp containing data & paralogs removed\n", "18212 ## loci with > minsp containing data & paralogs removed & final filtering\n", "\n", "## number of loci recovered in final data set for each taxon.\n", "taxon\tnloci\n", "1HL_10A.assembled\t6563\n", "1HL_1A.assembled \t4706\n", "1HL_2A.assembled \t5971\n", "1HL_3A.assembled \t5037\n", "1HL_4A.assembled \t5910\n", "1HL_5A.assembled \t4373\n", "1HL_6A.assembled \t4299\n", "1HL_8A.assembled \t5148\n", "1HL_9A.assembled \t6075\n", "1NF_10A.assembled\t4560\n", "1NF_1A.assembled \t3123\n", "1NF_2A.assembled \t6960\n", "1NF_4A.assembled \t7381\n", "1NF_5A.assembled \t5920\n", "1NF_6A.assembled \t5270\n", "1NF_7A.assembled \t5690\n", "1NF_8A.assembled \t5287\n", "1NF_9A.assembled \t6863\n", "1SN_10A.assembled\t5651\n", "1SN_1A.assembled \t6293\n", "1SN_2A.assembled \t6287\n", "1SN_3A.assembled \t4786\n", "1SN_4A.assembled \t7230\n", "1SN_5A.assembled \t6577\n", "1SN_6A.assembled \t6770\n", "1SN_7A.assembled \t6365\n", "1SN_8A.assembled \t6794\n", "1SN_9A.assembled \t6873\n", "\n", "\n", "## nloci = number of loci with data for exactly ntaxa\n", "## ntotal = number of loci for which at least ntaxa have data\n", "ntaxa\tnloci\tsaved\tntotal\n", "1\t-\n", "2\t-\t\t-\n", "3\t-\t\t-\n", "4\t3551\t*\t18212\n", "5\t2701\t*\t14661\n", "6\t2051\t*\t11960\n", "7\t1655\t*\t9909\n", "8\t1386\t*\t8254\n", "9\t1089\t*\t6868\n", "10\t876\t*\t5779\n", "11\t732\t*\t4903\n", "12\t601\t*\t4171\n", "13\t516\t*\t3570\n", "14\t400\t*\t3054\n", "15\t328\t*\t2654\n", "16\t291\t*\t2326\n", "17\t271\t*\t2035\n", "18\t224\t*\t1764\n", "19\t193\t*\t1540\n", "20\t175\t*\t1347\n", "21\t175\t*\t1172\n", "22\t139\t*\t997\n", "23\t157\t*\t858\n", "24\t160\t*\t701\n", "25\t132\t*\t541\n", "26\t118\t*\t409\n", "27\t141\t*\t291\n", "28\t150\t*\t150\n", "\n", "\n", "## nvar = number of loci containing n variable sites (pis+autapomorphies).\n", "## sumvar = sum of variable sites (SNPs).\n", "## pis = number of loci containing n parsimony informative sites.\n", "## sumpis = sum of parsimony informative sites.\n", "\tnvar\tsumvar\tPIS\tsumPIS\n", "0\t7053\t0\t10716\t0\n", "1\t3045\t3045\t2795\t2795\n", "2\t1991\t7027\t1563\t5921\n", "3\t1369\t11134\t995\t8906\n", "4\t1000\t15134\t678\t11618\n", "5\t811\t19189\t489\t14063\n", "6\t639\t23023\t348\t16151\n", "7\t569\t27006\t227\t17740\n", "8\t411\t30294\t145\t18900\n", "9\t303\t33021\t87\t19683\n", "10\t211\t35131\t62\t20303\n", "11\t186\t37177\t26\t20589\n", "12\t129\t38725\t31\t20961\n", "13\t112\t40181\t11\t21104\n", "14\t79\t41287\t9\t21230\n", "15\t73\t42382\t9\t21365\n", "16\t48\t43150\t9\t21509\n", "17\t43\t43881\t3\t21560\n", "18\t35\t44511\t4\t21632\n", "19\t17\t44834\t1\t21651\n", "20\t15\t45134\t1\t21671\n", "21\t13\t45407\t0\t21671\n", "22\t11\t45649\t1\t21693\n", "23\t12\t45925\t0\t21693\n", "24\t6\t46069\t0\t21693\n", "25\t8\t46269\t0\t21693\n", "26\t3\t46347\t0\t21693\n", "27\t5\t46482\t0\t21693\n", "28\t1\t46510\t0\t21693\n", "29\t3\t46597\t0\t21693\n", "30\t1\t46627\t0\t21693\n", "31\t1\t46658\t0\t21693\n", "32\t1\t46690\t0\t21693\n", "33\t2\t46756\t0\t21693\n", "34\t1\t46790\t0\t21693\n", "35\t1\t46825\t0\t21693\n", "36\t0\t46825\t1\t21729\n", "37\t2\t46899\t0\t21729\n", "38\t0\t46899\t0\t21729\n", "39\t0\t46899\t0\t21729\n", "40\t0\t46899\t0\t21729\n", "41\t0\t46899\t0\t21729\n", "42\t1\t46941\t0\t21729\n", "43\t0\t46941\t1\t21772\n", "44\t0\t46941\t0\t21772\n", "45\t1\t46986\t0\t21772\n", "total var= 46986\n", "total pis= 21772\n", "sampled unlinked SNPs= 11159\n", "sampled unlinked bi-allelic SNPs= 11074\n" ] } ], "source": [ "%%bash\n", "cat stats/oly_gbs_pyrad.stats" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">1HL_10A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", ">1HL_1A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", ">1HL_3A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", ">1HL_5A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", ">1HL_8A.assembled TATAAAATGAATGAAATATTGNATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", ">1NF_10A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", ">1NF_4A.assembled -ATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", ">1NF_6A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", ">1NF_7A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", ">1NF_8A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", ">1SN_6A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", ">1SN_8A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG\n", "// \n", ">1NF_5A.assembled CATTTTGCAAGATGAAAGTTATCGCTCTATTTGTCTTATTTGACATTTCCCACAATACCATCTGGT\n", ">1SN_1A.assembled CATTTTGCAAGATGAAAGTTATCGCTCTATTTGTCTTATTTGACATTTCCCACAATACCATCTGGT\n", ">1SN_3A.assembled CATTTTGCAAGATGAAAGTTATCGCTCTATTTGTCTTATTTGACATTTCCCACAATACCATCTGGT\n", ">1SN_4A.assembled -ATTTTGCAAGATGAAAGTTATCGCTCTATTTGTCTTATTTGACATTTCCCACAATACCATCTGGT\n", ">1SN_7A.assembled ------------------------------------------------------ATACCATCTGGT\n", ">1SN_8A.assembled CATTTTGCAAGATGAAAGTTATCGCTCTATTTGTCTTATTTGACATTTCCCACAATACCATCTGGT\n", "// \n", ">1HL_10A.assembled CCTCCCGCGCCCTAATTGGGAACCAGTTTTATATTATTCGATAACCTGTGTCATGGCTTTTCTTTT\n", ">1NF_6A.assembled CCTCCCGCGCCCTAATTGGGAACCAGTTTTATATTATTCGATAACCTGTGTCATGGCTTTTCTTTT\n", ">1NF_9A.assembled CCTCCCGCGCCCTAATTGNGNACCAGTTTTANA----NCGATAACCTGTGTCATGGCTTTTCTTTT\n", ">1SN_6A.assembled ----CCGCGCCCTAATTGGGAACCAGTTTTATATTATTCGATAACCTGTGTCATGGCTTTTCTTTT\n", ">1SN_7A.assembled CCTCCCGCGCCCTAATTGGGAACCAGTTTTATATTATTCGATAACCTGTGTCATGGCTTTTCTTTT\n", "// \n", ">1HL_9A.assembled CGACTAATAGCTTCTGTATATTCCGGAATGCCAATTTTGAATAGGTTTGAACTTTATCTGCGTTCT\n", ">1NF_8A.assembled CGACTAATAGCTTCTGTATATTCCGGAATGCCAATTTTGAATAGGTTTGAACTTTATCTGCGTTCT\n", ">1NF_9A.assembled CGACTAATAGCTTCTGTATATTCCGGAATGCCAATTTTGAATAGGTTTGAACTTTATCTGCGTTCT\n", ">1SN_7A.assembled CGACTAATAGCTTCTGTATATTCCGGAATGCCAATTTTGAATAGGTTTGAACTTTATCTGCGTTCT\n", "// \n", ">1HL_6A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1HL_9A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1NF_2A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1NF_5A.assembled GCTCATAGTACCACNGAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1NF_6A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1NF_7A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1NF_8A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1SN_10A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1SN_5A.assembled GCTCATAGTACCACNGAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1SN_6A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1SN_7A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1SN_8A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", ">1SN_9A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG\n", "// \n", ">1NF_5A.assembled GACAACAACTAATTCGTAGTCTAGCGAAAGACCCCGTTGCTGTACATAAATCTGGAGTATTGGAAG\n", ">1SN_1A.assembled GACAACAACTAATTCGTAGTCTAGCGAAAGACCCCGTTGCTGTACATAAATCTGGAGTATTGGAAG\n", ">1SN_3A.assembled GACAACAACTAATTCGTAGTCTAGCGAAAGACCCCGTTGCTGTACATAAATCTGGAGTATTGGAAG\n", ">1SN_7A.assembled GACAACAACTAATTCGTAGTCTAGCGAAAGACCCCGTTGCTGTACATAAATCTGGAGTATTGGAAG\n", ">1SN_8A.assembled GACAACAACTAATTCGTAGTCTAGCGAAAGACCCCGTTGCTGTACATAAATCTGGAGTATTGGAAG\n", "// \n", ">1HL_1A.assembled ----ATAAGTACGAGGTGATGTGTAAGCCCGAGACCTGCACCATTCATCCTCTCACCTGCACGCCC\n", ">1SN_1A.assembled TGCGATAAGTACGAGGTGATGTGTAAGCCCGAGACCTGCACCATTCAKCCTCTCACCTGCACGCCC\n", ">1SN_2A.assembled TGCGATAAGTACGAGGTGATGTGTAAGCCCGAGACCTGCACCATTCAKCCTCTCACCTGCACGCCC\n", ">1SN_4A.assembled --------------------------------------------------TCTCACCTGCACGCCC\n", ">1SN_6A.assembled TGCGATAAGTACGAGGTGATGTGTAAGCCCGAGACCTGCACCATTCATCCTCTCACCTGCACGCCC\n", "// * \n", ">1HL_4A.assembled CAAGAGATCCAGTTGCAGTGGCTATTTCCATGTTGTTGCCAGGTAATCCAGTTGCATGGGCTGTTT\n", ">1HL_9A.assembled -AAGAGATCCAGTTGCAGTGGCTATTTCCATGTTGTTGCCAGGTAATCCAGTTGCATGGGCTGTTT\n", ">1NF_6A.assembled -AAGAGATCCAGTTGCAGTGGCTATTTCCATGTTGTTGCCAGGTAATCCAGTTGCATGGGCTGTTT\n", ">1NF_8A.assembled CAAGAGATCCAGTTGCAGTGGCTATTTCCATGTTGTTGCCAGGTAATCCAGTTGCATGGGCTGTTT\n", ">1NF_9A.assembled CAAGAGATCCAGTTGCAGTGGCTRTTTCCATGTTGTTGCNAGGTAATCCAGTTGCATGGGCTGTTT\n", ">1SN_3A.assembled ----------------AGTGGCTGTTTCCATGTTGTTGCAAGGTAATCCAGTTGCATGGNCTGTTT\n", ">1SN_4A.assembled CAAGAGATCCAGTTGCAGTGGCTATTTCCATGTTGTTGCMAGGTAATCCAGTTGCATGGRCTGTTT\n", ">1SN_8A.assembled CAAGAGATCCAGTTGCAGTGGCT-TTTCCATGTTGTTGCNAGGTAATCCAGTTGCATGGNCTGTTT\n", "// * * - \n", ">1HL_10A.assembled AATTTCTGCGGGACTGTACAGTCCCATATCGGCTACATGTAGGAGAGTC\n", ">1HL_8A.assembled AATTTCTGCGGGACTGTACNGTCCCATNTCGGCTACATNTAGGAGAGTC\n", ">1NF_2A.assembled AATTTCTGCGGGACTGTACAGTCCCATATCGGCTACATGTAGGAGAGTC\n", ">1NF_9A.assembled AATTTCTGCGGGACTGTACNGTCCCATATCGGCTANATGNAGGAGAGTC\n", ">1SN_3A.assembled AATTTCTGCGGGACTGTACAGTCCCATATCGGCTACATGTAGGAGAGTC\n", ">1SN_5A.assembled AATTTCTGCGGGACTGNACNGTCCCATNTCGGCTACAWRNAGGAGAGTC\n", ">1SN_7A.assembled AATTTCTGCGGGACTGWACTGTCCMATNTCGGC---AWRWAGGAGAGTC\n", "// - - - **- |9|\n", ">1HL_2A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT\n", ">1HL_9A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT\n", ">1NF_2A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT\n", ">1NF_6A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT\n", ">1NF_8A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT\n", ">1SN_7A.assembled AACATNAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT\n", ">1SN_8A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT\n", "// \n", ">1HL_6A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", ">1NF_10A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", ">1NF_5A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", ">1NF_9A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", ">1SN_10A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", ">1SN_1A.assembled -----------------------------------------------------TAGNNNCTGACCT\n", ">1SN_2A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", ">1SN_4A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", ">1SN_5A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", ">1SN_6A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", ">1SN_7A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", ">1SN_8A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", ">1SN_9A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT\n", "// \n", ">1HL_2A.assembled CGYAGAAAACATCTTTCCTACCAGAGCTAACACTGTCGTCAA\n", ">1NF_5A.assembled CGTAGAAAACATCTTTCCTACCAGAGCTAACACTGTCGTCAA\n", ">1NF_8A.assembled CGTAGAAAACATCTTTCCTACCAGAGCTAACACTGTCGTCAA\n", ">1NF_9A.assembled CGCAGAAAACATCTTTCCTACCAGAGCTAACACTGTCGTCAA\n" ] } ], "source": [ "%%bash\n", "head -n 100 outfiles/oly_gbs_pyrad.loci | cut -b 1-88" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pi_E_estimate.txt oly_gbs_pyrad.stats s2.rawedit.txt s3.clusters.txt s5.consens.txt\r\n" ] } ], "source": [ "ls stats/" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "oly_gbs_pyrad.alleles oly_gbs_pyrad.loci oly_gbs_pyrad.phy.partitions oly_gbs_pyrad.str oly_gbs_pyrad.vcf\r\n", "oly_gbs_pyrad.excluded_loci oly_gbs_pyrad.nex oly_gbs_pyrad.snps oly_gbs_pyrad.unlinked_snps\r\n", "oly_gbs_pyrad.gphocs oly_gbs_pyrad.phy oly_gbs_pyrad.snps.geno oly_gbs_pyrad.usnps.geno\r\n" ] } ], "source": [ "ls outfiles/" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "## 28 taxa, 36424 loci, 72251 snps\n" ] } ], "source": [ "%%bash\n", "head -1 outfiles/oly_gbs_pyrad.snps" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }