{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "RAD Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will explore how to handle RAD data - trying to use IPlant - Stacks" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Some clam data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cd /Volumes/web/whale/" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Volumes/web/whale\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "!gunzip clam_RAD_s_1_sequence.txt.txt.gz" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "^C\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -100 clam_RAD_s_1_sequence.txt.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "@HWI-ST700693_0075:1:1101:1225:2121#0/1\r\n", "GNCTACATGCTGTGTACTCTTGTGGCGAAACTGCTG\r\n", "+HWI-ST700693_0075:1:1101:1225:2121#0/1\r\n", "^BV^\\bcbbceeeeeeeeeeeeeeeeeeeedeeee_\r\n", "@HWI-ST700693_0075:1:1101:1224:2148#0/1\r\n", "AGTAGGTGCAGGCACAAATTGATAGTATTTACCAGT\r\n", "+HWI-ST700693_0075:1:1101:1224:2148#0/1\r\n", "dffafdffcfedd_eeddeedaedeefefffbdffe\r\n", "@HWI-ST700693_0075:1:1101:1199:2158#0/1\r\n", "AGGTGTTGCAGGACTGGATACTTAAAATATATTGCA\r\n", "+HWI-ST700693_0075:1:1101:1199:2158#0/1\r\n", "dddcdeffeffedffeeded_^eb\\ddd[d_]`KY_\r\n", "@HWI-ST700693_0075:1:1101:1114:2184#0/1\r\n", "AGTTAATGCAGGTTACAAATATAACAACTGACAATG\r\n", "+HWI-ST700693_0075:1:1101:1114:2184#0/1\r\n", "ggggggggfgggggggggggggggggggggfggggg\r\n", "@HWI-ST700693_0075:1:1101:1218:2203#0/1\r\n", "AGTTAATGCAGGACATGTTTCAAATATAGGGTCTAA\r\n", "+HWI-ST700693_0075:1:1101:1218:2203#0/1\r\n", "gggdgggggggggggggggggggggggggggggggf\r\n", "@HWI-ST700693_0075:1:1101:1191:2217#0/1\r\n", "GCCTTTTGTACATAACTTTTATAAATCATTTTGTAA\r\n", "+HWI-ST700693_0075:1:1101:1191:2217#0/1\r\n", "ff\\ffgggggggggggggggggggeggggggggggg\r\n", "@HWI-ST700693_0075:1:1101:1244:2228#0/1\r\n", "AGGGTCTGCAGGGCAGCGTTGACGGATGGGAGATAC\r\n", "+HWI-ST700693_0075:1:1101:1244:2228#0/1\r\n", "ggggggggfgggggggggggfgfgggggggegdgfg\r\n", "@HWI-ST700693_0075:1:1101:1210:2241#0/1\r\n", "ACTCTTTGCAGGGCGAGTGGTGTGAAAAAGAATTTT\r\n", "+HWI-ST700693_0075:1:1101:1210:2241#0/1\r\n", "gggggfggegggggggggfgddggggggggggfegf\r\n", "@HWI-ST700693_0075:1:1101:1160:2247#0/1\r\n", "AGGGTCTGCAGGGCGAGTGGTGTGAAAAAGAATTTT\r\n", "+HWI-ST700693_0075:1:1101:1160:2247#0/1\r\n", "ggggggggfggggggggggggggggggggggggggg\r\n", "@HWI-ST700693_0075:1:1101:1404:2116#0/1\r\n", "ANGACCTCGGCCAAGTTCGATAACTAGCCAAATCGG\r\n", "+HWI-ST700693_0075:1:1101:1404:2116#0/1\r\n", "^B]\\^cccccgggggggggggggfggggggfggggg\r\n", "@HWI-ST700693_0075:1:1101:1467:2124#0/1\r\n", "ANGGTCTGCAGGACATGAGCATTTTTCCCATAGAAA\r\n", "+HWI-ST700693_0075:1:1101:1467:2124#0/1\r\n", "bBbbbfdfdfgggggggggggggggggggggggggg\r\n", "@HWI-ST700693_0075:1:1101:1267:2139#0/1\r\n", "TGCCGCCACATGCAAAGGAATTTCCCTAAATAGTCA\r\n", "+HWI-ST700693_0075:1:1101:1267:2139#0/1\r\n", "gggggefccfggggggggggggggggggggfgfgfd\r\n", "@HWI-ST700693_0075:1:1101:1393:2140#0/1\r\n", "AGCCATTGCAGGGATGTGCAGGCTGATCTTGGTCTG\r\n", "+HWI-ST700693_0075:1:1101:1393:2140#0/1\r\n", "ggfgggggffggggdfgggffgggf_cdaccgeeef\r\n", "@HWI-ST700693_0075:1:1101:1430:2141#0/1\r\n", "TGCTGCTAGGATGGTCCTAGATGCCCAAGCACCAAT\r\n", "+HWI-ST700693_0075:1:1101:1430:2141#0/1\r\n", "ff_fffffefaeefcfffffffffffffffffffff\r\n", "@HWI-ST700693_0075:1:1101:1304:2180#0/1\r\n", "CGCCTCGACGCAGCTACTATAGAAATCGCATTACAA\r\n", "+HWI-ST700693_0075:1:1101:1304:2180#0/1\r\n", "ggaggggggagegggggggggeggggggdggggggf\r\n", "@HWI-ST700693_0075:1:1101:1497:2181#0/1\r\n", "CATGCAGGGTTCACGTTACAGGTCACCGATGCCCAG\r\n", "+HWI-ST700693_0075:1:1101:1497:2181#0/1\r\n", "gZggggggggggggggggggggeggggggggggggg\r\n", "@HWI-ST700693_0075:1:1101:1279:2186#0/1\r\n", "AACTTGAGCCAGAACCTGATATAAACGTGTGTATTG\r\n", "+HWI-ST700693_0075:1:1101:1279:2186#0/1\r\n", "cNYcadcdcccdcddddddddddddcdddddddddd\r\n", "@HWI-ST700693_0075:1:1101:1476:2202#0/1\r\n", "ACTCTTTGCAGGGCGATGAGATAAAAGGCAGTTTCT\r\n", "+HWI-ST700693_0075:1:1101:1476:2202#0/1\r\n", "ggegdgggcgggggggggggggdggggggggggggf\r\n", "@HWI-ST700693_0075:1:1101:1479:2223#0/1\r\n", "GCTCTTTGCAGGGCGATGAGATAAAAGGCAGTTTCT\r\n", "+HWI-ST700693_0075:1:1101:1479:2223#0/1\r\n", "fgggggggfggggggdggegcggdf_ggbbggggaf\r\n", "@HWI-ST700693_0075:1:1101:1336:2239#0/1\r\n", "AGGTGTTGCAGGAAGGTCGTTAATTCAATTTTAGTT\r\n", "+HWI-ST700693_0075:1:1101:1336:2239#0/1\r\n", "ggggggggegdeggefbfffffffffgegggfbgge\r\n", "@HWI-ST700693_0075:1:1101:1272:2241#0/1\r\n", "AGTTAATGCAGGCGTCAGACTTCATAGGATGGTCGT\r\n", "+HWI-ST700693_0075:1:1101:1272:2241#0/1\r\n", "ggggggggfgggggggggggggggggggegggcegb\r\n", "@HWI-ST700693_0075:1:1101:1723:2114#0/1\r\n", "ANCGCATGCAGGTCACCAACTGATCTCTTTCTCTTG\r\n", "+HWI-ST700693_0075:1:1101:1723:2114#0/1\r\n", "_B___dddddggggggggggggggggggggfggggg\r\n", "@HWI-ST700693_0075:1:1101:1515:2128#0/1\r\n", "GCCCAAAACCCTTCCACCATATGACCCAGTTTCAAA\r\n", "+HWI-ST700693_0075:1:1101:1515:2128#0/1\r\n", "gggggggg`ggggggggggggfgggggggggggggg\r\n", "@HWI-ST700693_0075:1:1101:1724:2139#0/1\r\n", "AGCCATTGCAGGGTTTCATTTAAACGCAATGTCAGT\r\n", "+HWI-ST700693_0075:1:1101:1724:2139#0/1\r\n", "ggggggggeggggggggggggggggeggeggggefg\r\n", "@HWI-ST700693_0075:1:1101:1621:2144#0/1\r\n", "GGCTACAAGAATGAAAACTTTGTCCGCTGCCATTTC\r\n", "+HWI-ST700693_0075:1:1101:1621:2144#0/1\r\n", "eeXecdeecccfeefffffffffffffffffffffe\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": {}, "source": [ "_Some text for perspective - methods:_\n", " \n", "Restriction site associated DNA (RAD) marker library preparation \n", "Restriction site associated DNA (RAD) marker libraries were constructed to identify diagnostic markers among cohorts. Genomic DNA was isolated separately from the gill tissue BARN (n=4) and MASH (n=4) clams using DNAzol (Molecular Research Center) as per manufacturers recommendations. Libraries were prepared as described by Miller et al 2007. Briefly samples (n=8) were digested Sbf-1 (New England Biolabs), then each hybridized with a unique barcode, and RAD adapters (PI and P2) were ligated on DNA fragments. Size selection of DNA fragments was achieved by running PCR on a 1% EZ gel (Invitrogen) with E-gel 1 kb Plus DNA ladder followed by purification using the MiniElute gel purification protocol. Subsequent library construction and sequencing was carried out by the University of Washington High Throughput Genomics Unit (HTGU) using the Illumina HiSeq2000 system. \n", "\n", "---\n", " \n", "Restriction site associated DNA (RAD) marker library analysis \n", "Initial sequence read processing of RAD tags was carried out as previously described by Miller et (2012). Quality scores were used to remove raw sequencing reads with a probability of sequencing error greater than 10%. Using custom perl scripts (Miller et al. 2012) we then grouped raw sequences reads by individual and removed barcodes and restriction sites for a total sequence read length of 24 base pairs.\n", "\n", "Two types SNP analyses were performed including population specific SNP variation characterization and the identification of SNPs that could potentially distinguish populations. In order to examine population specific SNP variation quality trimmed reads from each cohort (BARN and MASH) were assembled independently using the following parameters; limit = 8, and mismatch cost = 2 (Genomics Workbench 4.0; CLC Bio). SNP detection was carried out using the following parameters: maximum gap and mismatch count = 2, minimum average quality = 15, minimum central quality = 20, minimum coverage = 10, minimum variant frequency = 35% (Genomics Workbench 4.0; CLC Bio). \n", "\n", "For the second form of SNP analysis Novoindex and Novoalign (Novocraft Technologies) were used to aseemble RAD-tags to identify RAD-tags within a cohort that were identical (lacked any polymorphisms. These \u201cisotigs\u201d from each cohort were then compared by assembling reads and carrying out SNP detection as described above. Any SNP that was identified indicated that the locus is fixed for the individuals in each cohort examined." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "https://www.evernote.com/shard/s10/sh/83c8adea-e40f-417b-a211-92b0a3a2b04f/1919679c5d1b0d3cdc66ca4273f1598b" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "_results_ \n", "\n", "RAD\n", "After quality trimming there 14.5 million reads remained from BARN clams (n=4) and 8.4 million reads from MASH clams (n=4), with a read length of 24bp. All but one individuals (MASH) had between 2.4 and 4.7 millions reads. To assess overall genetic diversity between the populations, reads were characterized in silico for each cohort. A de novo assembly of BARN reads resulted in 4,491 contigs containing 543 putative SNPs [Additional File 1](http://aquacul4.fish.washington.edu/~steven/armina/BARN_RAD_SNPtable.txt). This corresponds to approximately 5.0 SNPs / 1kb. Assembly of the MASH reads resulted in 9,824 contigs containing 1372 SNPs [Additional File 2](http://aquacul4.fish.washington.edu/~steven/armina/MASH_RAD_SNPtable.txt). For this MASH library this corresponds to approximately 5.8 SNPs / 1kb. \n", "\n", "In addition to characterizing putative SNP within cohorts, we also set out to identify fixed loci that could be used to distinguish between cohorts. The number of identical contigs for each library was 8606 and 4845 for MASH and BARN, respectively. Comparing these two sets of isotigs revealed 2090 analogous sequences across cohorts with 1945 identical matches. The remaining 145 putative SNPs thus provide diagnostic markers to distinguish strains based on the samples used here. Of the 145 corresponding contigs only one mapped back to the transcriptome and this transcript was not annotated.\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**via Storer Evernote**\n", "\n", "Download sequence files in Illumina fastq format. \n", "This will not open in notepad. \n", "Need program such as \"Large Text File Viewer\". Link to this will be on catalyst site.\n", "these file are 16-20 GB\n", "\n", "First letters of seq are barcode. Second line are quality score for each sequence\n", "\n", "EXAMPLE\n", "@name (line also contains information on samples location on slide)\n", "sequence\n", "+ name\n", "quality scores\n", "\n", "typically first sequences are low quality\n", "\n", "General notes:\n", "> (carrot symbol) tells perl to write a file \n", "Unix format needs \n", "up key spits out previous command so that you can edit\n", "\n", "line 9 is telling it to read file line by line\n", "lines 11-14 is telling it to read the code line by line\n", "\n", "x+++ Note:What does it mean?\n", "\n", "\n", "Why do we want to count the number of sequences?\n", "We can filter sequences based on the number of counts (eg too low or too high such as 10,000) \n", "Cutoff based on distribution of read counts\n", "\n", "Novoalign: longest step (24hrs) This assembly is program is good because it does not make any assumptions about trying to lengthen sequences eg creates stacks\n", "\n", "Can only run on cluster need 64 bit unix system\n", "Installed on node 4\n", "Navigate to Novocraft folder on desktop\n", "open terminal create directory (cd) in Novocraft \n", "\n", "Next Step is align sequences back to their index\n", "\n", "The higher the scores the worse the alignment. The score is the number of mismatched basepairs.\n", "\n", "To get the files on and off the cluster use DENALI\n", "\n", "NOVO file interpretation \n", "R-matched to other sequences\n", "U-only matched to itself\n", "Next is the score and then the sequences it has matched to\n", "Sometimes a score of 30 indicates a single nucleotide difference\n", "\n", "For next week...\n", "rerun scripts and try running the other Novo align script" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"_40__Discovery_Environment_18BCD1C5_png\"/" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cd /Volumes/web/cnidarian/s_1_sequence_fastqc" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Volumes/web/cnidarian/s_1_sequence_fastqc\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "http://eagle.fish.washington.edu/cnidarian/s_1_sequence_fastqc/fastqc_report.html" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "from IPython.display import HTML\n", "HTML('')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "" ], "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ "" ] } ], "prompt_number": 9 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Git Diversion" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"sr320_fish546_18BD03A5_png\"/" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cd /Volumes/Shoelace/Dropbox/Steven/" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[Errno 2] No such file or directory: '/Volumes/Shoelace/Dropbox/Steven/'\n", "/Volumes/web/cnidarian/s_1_sequence_fastqc\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "mkdir gt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "cd gt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Volumes/Shoelace/Dropbox/Steven/gt\n" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "!git clone https://github.com/sr320/fish546.git " ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Cloning into fish546...\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "remote: Counting objects: 3, done.\u001b[K\r\n", "remote: Total 3 (delta 0), reused 0 (delta 0)\u001b[K\r\n", "Unpacking objects: 33% (1/3) \r", "Unpacking objects: 66% (2/3) \r", "Unpacking objects: 100% (3/3) \r", "Unpacking objects: 100% (3/3), done.\r\n" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "ls" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\u001b[34mfish546\u001b[m\u001b[m/\r\n" ] } ], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "cd fish546/" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Volumes/Shoelace/Dropbox/Steven/gt/fish546\n" ] } ], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "ls" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "README.md\r\n" ] } ], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "drag some files in ....." ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "!git status" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "# On branch master\r\n", "# Untracked files:\r\n", "# (use \"git add ...\" to include in what will be committed)\r\n", "#\r\n", "#\t.DS_Store\r\n", "#\tRad_perl_pipeline/\r\n", "nothing added to commit but untracked files present (use \"git add\" to track)\r\n" ] } ], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": [ "!git add Rad_perl_pipeline/" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [ "!git commit -m \"adding perl scripts courtesy of Storer\"" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[master a3ee1d3] adding perl scripts courtesy of Storer\r\n", " 10 files changed, 360 insertions(+), 0 deletions(-)\r\n", " create mode 100644 Rad_perl_pipeline/.DS_Store\r\n", " create mode 100644 Rad_perl_pipeline/perlpipelinefiles.zip\r\n", " create mode 100644 Rad_perl_pipeline/perlpipelinefiles/.DS_Store\r\n", " create mode 100755 Rad_perl_pipeline/perlpipelinefiles/BarcodeSplit.pl\r\n", " create mode 100755 Rad_perl_pipeline/perlpipelinefiles/FilterLoci.pl\r\n", " create mode 100755 Rad_perl_pipeline/perlpipelinefiles/HashSeqs.pl\r\n", " create mode 100755 Rad_perl_pipeline/perlpipelinefiles/IdentifyLoci.pl\r\n", " create mode 100755 Rad_perl_pipeline/perlpipelinefiles/QualityFilter.pl\r\n", " create mode 100755 Rad_perl_pipeline/perlpipelinefiles/SNPLocation.pl\r\n", " create mode 100755 Rad_perl_pipeline/perlpipelinefiles/pipeline.txt\r\n" ] } ], "prompt_number": 37 }, { "cell_type": "code", "collapsed": false, "input": [ "!git status" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "# On branch master\r\n", "# Your branch is ahead of 'origin/master' by 1 commit.\r\n", "#\r\n", "# Untracked files:\r\n", "# (use \"git add ...\" to include in what will be committed)\r\n", "#\r\n", "#\t.DS_Store\r\n", "nothing added to commit but untracked files present (use \"git add\" to track)\r\n" ] } ], "prompt_number": 38 }, { "cell_type": "code", "collapsed": false, "input": [ "!git commit" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "# On branch master\r\n", "# Your branch is ahead of 'origin/master' by 1 commit.\r\n", "#\r\n", "# Untracked files:\r\n", "# (use \"git add ...\" to include in what will be committed)\r\n", "#\r\n", "#\t.DS_Store\r\n", "nothing added to commit but untracked files present (use \"git add\" to track)\r\n" ] } ], "prompt_number": 40 }, { "cell_type": "code", "collapsed": false, "input": [ "!git add IMG_1566.jpg" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 41 }, { "cell_type": "code", "collapsed": false, "input": [ "!git commit -m \"adding image\"" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[master e6ccb50] adding image\r\n", " 1 files changed, 0 insertions(+), 0 deletions(-)\r\n", " create mode 100644 IMG_1566.jpg\r\n" ] } ], "prompt_number": 42 }, { "cell_type": "code", "collapsed": false, "input": [ "cd /Users/sr320/Dropbox/Steven/gt/fish546" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Users/sr320/Dropbox/Steven/gt/fish546\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "!git status" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "# On branch master\r\n", "nothing to commit, working directory clean\r\n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "!git remote" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "origin\r\n" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "!git log" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "commit ae5b7b65fa618ea8f0acff5cfdb3298654ad83bd\r\n", "Author: sr320 \r\n", "Date: Tue Feb 25 11:02:22 2014 -0800\r\n", "\r\n", " Add DESeq\r\n", "\r\n", "commit dccf56f83a5e10e68ac0adecce50a17ed0ab6a2a\r\n", "Author: sr320 \r\n", "Date: Tue Feb 25 09:48:44 2014 -0800\r\n", "\r\n", " RAD perl scripts\r\n", " \r\n", " via Storer\r\n", "\r\n", "commit 35567a29285a0e7e59cb92720a4655d326c70d49\r\n", "Author: sr320 \r\n", "Date: Tue Feb 25 05:45:56 2014 -0800\r\n", "\r\n", " Initial commit\r\n" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "!git push" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "warning: push.default is unset; its implicit value is changing in\r\n", "Git 2.0 from 'matching' to 'simple'. To squelch this message\r\n", "and maintain the current behavior after the default changes, use:\r\n", "\r\n", " git config --global push.default matching\r\n", "\r\n", "To squelch this message and adopt the new behavior now, use:\r\n", "\r\n", " git config --global push.default simple\r\n", "\r\n", "See 'git help config' and search for 'push.default' for further information.\r\n", "(the 'simple' mode was introduced in Git 1.7.11. Use the similar mode\r\n", "'current' instead of 'simple' if you sometimes use older versions of Git)\r\n", "\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Username for 'https://github.com': " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "^C\r\n" ] } ], "prompt_number": 15 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "via IPlant" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cd /Volumes/web/whale/fish546/" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Volumes/web/whale/fish546\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "cd rad" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Volumes/web/whale/fish546/rad\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "!icd /iplant/home/sr320/analyses/radtag_demultiplex_andy2-2014-02-27-10-42-44.518" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "!iget -r -f samples" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "cd samples/" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Volumes/web/whale/fish546/rad/samples\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "ls" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "process_radtags.log sample_AATATC.fq sample_ACAGCG.fq sample_CCCGGT.fq\r\n", "sample_AAGACG.fq sample_AATGAG.fq sample_CACCTC.fq sample_CCCTAA.fq\r\n", "sample_AAGCTA.fq sample_ACAAGA.fq sample_CAGGCA.fq\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/whale/fish546/rad/samples/sample_AAGCTA.fq" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "@1_1101_13406_2145_1\r\n", "TGCAGTCTCGCTCGCTCTTTGGCCACAGAGTCCAGGGTCTGGCGGGCGTCGGCCAGCTCGGTCTCGTAGGCGGCCTTCAGGCCCGACAGTTCCCGGCTGACCTCGGACTCCGACTCTGTGATGCGCAGTCGCAGGCCTGCATTCTCCGTCTCCAGAAAGCGCACCTTGTCGATGTAGACGGCCAGCCGGTCGTTGAGGTTGCACAGGTCCTCCTTCTCCCGGGGCCGGGGGATACGGTTGGGGGG\r\n", "+\r\n", "B1B33BAFF1AFF1EFGHHHHHHHHGHHHHHHHHHHGHHHHHHGGGGGGGEGCGGEGCGGGAE?GGFG/EFEGCGFHGHFFHGGGCGCCGGFHHCGCC?CFH0CBBAB99--:/9;9-/;:FFFBFB/99/9@--@;--@-@--9/9B9E-@?@@-\r\n", "@1_1101_18318_2145_1\r\n", "TGCAGGAAAGAGAAGCATGCTCAGATTCTTCTTCTCTGACAATTGATGACAGATATTTTGTAGAACGAGGGTTGTGTGGACAGAATTGTATTATTTTATACATTTATTATAATAAAATGATCTGTATCCGTGTACGCAGGGCCTCAGTCTTTCAGTAAGATAACGGGGTCAGAGTTTAACTCGGACTGAACAAATATAATCAGAGGATCGAACCTGTGACCTTCTTTCTAACCCTCAGGTCTCCA\r\n", "+\r\n", "B@BB331111111AEFFHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHGGGGGGHGGGHHGEGHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGGHHHHGGGCGGGGHHHGFHHHHHFHHHHHGHHHGGCGGGHHGHHHHHHGHHGGGCCHGGHHHGHHHFFGFHFBGEC/B?CGE@G:;0;0CGFGGGEBFFFG.A9..FF0;9BF0\r\n", "@1_1101_12102_2148_1\r\n", "TGCAGCAGATCGGAGACGAGCTGGATGGAAATATGGAGCTCCAAAGGTGAGCTTTCCAATATCGTTCCTGAAGAATTATTAATGAGTATAATTAGCCTGTCTGTAATCACTAAATCTTGTTTGTTCCCCCCCCACAGAATGATAAACAACTCTTCGCTCAGTCCCACAAAAGACATGTTTATGAGAGTTGCCATTGAGATCTTTTCAGATGGAAAGTTCAACTGGGGCCGGGGGGTCGCACTGTT\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -30 /Volumes/web/whale/fish546/rad/samples/process_radtags.log" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/usr/local2/STACKS/stacks/bin/process_radtags -p andy_dir -b anofimbarcodes_102413.txt -o samples -e pstI --inline_null -c -q -r -i gzfastq\r\n", "process_radtags executed 2014-02-27 11:05:58\r\n", "\r\n", "File\tRetained Reads\tLow Quality\tAmbiguous Barcodes\tAmbiguous RAD-Tag\tTotal\r\n", "sablefishPstI1_S1_L001_R1_001.fastq.gz\t13754658\t5\t372693\t46987\t14174343\r\n", "\r\n", "Total Sequences\t14174343\r\n", "Ambiguous Barcodes\t372693\r\n", "Low Quality\t5\r\n", "Ambiguous RAD-Tag\t46987\r\n", "Retained Reads\t13754658\r\n", "\r\n", "Barcode\tTotal\tNo RadTag\tRetained\r\n", "AAGACG\t1544350\t4470\t1539880\r\n", "AAGCTA\t1412759\t3286\t1409471\r\n", "AATATC\t1352594\t4347\t1348246\r\n", "AATGAG\t2186604\t8541\t2178063\r\n", "ACAAGA\t960638\t4366\t956272\r\n", "ACAGCG\t1408640\t4526\t1404113\r\n", "CACCTC\t1185959\t7357\t1178602\r\n", "CAGGCA\t1376357\t3700\t1372657\r\n", "CCCGGT\t1110046\t2411\t1107634\r\n", "CCCTAA\t1263703\t3983\t1259720\r\n", "\r\n", "Sequences not recorded\r\n", "Barcode\tTotal\r\n", "CGTATG\t6253\r\n", "CCTCTC\t3893\r\n", "CACACA\t2889\r\n", "ATGAGT\t2606\r\n" ] } ], "prompt_number": 5 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "ustacks" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Example code (run on hummingbird remotely)\n", "\n", "`!ustacks -t fastq -f /Volumes/web/whale/fish546/rad/samples/sample_AATGAG.fq -o /Volumes/web/whale/fish546/rad/samples -i 4 -p 10 -m 2 --model_type 'bounded' --alpha 0.05 --bound_low 0.001 --bound_high 0.01 -r -d`\n", "\n", "Ran 5 libaries " ] }, { "cell_type": "code", "collapsed": false, "input": [ "pwd" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 12, "text": [ "u'/Volumes/web/whale/fish546/rad/samples'" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "ls" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "process_radtags.log sample_AATATC.fq\r\n", "sample_AAGACG.alleles.tsv sample_AATGAG.fq\r\n", "sample_AAGACG.fq sample_ACAAGA.fq\r\n", "sample_AAGACG.snps.tsv sample_ACAGCG.fq\r\n", "sample_AAGACG.tags.tsv sample_CACCTC.fq\r\n", "sample_AAGCTA.alleles.tsv sample_CAGGCA.fq\r\n", "sample_AAGCTA.fq sample_CCCGGT.fq\r\n", "sample_AAGCTA.snps.tsv sample_CCCTAA.fq\r\n", "sample_AAGCTA.tags.tsv\r\n" ] } ], "prompt_number": 8 }, { "cell_type": "raw", "metadata": {}, "source": [ "3 files are created with ustacks" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head sample_AAGACG.alleles.tsv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "0\t1\t2\tAGACTTA\t60\t3\r\n", "0\t1\t2\tATTCCAA\t20\t1\r\n", "0\t1\t2\tCGATTTT\t20\t1\r\n", "0\t1\t3\tACA\t20\t1\r\n", "0\t1\t3\tTTA\t60\t3\r\n", "0\t1\t3\tTTG\t20\t1\r\n", "0\t1\t7\tAGCGC\t20\t1\r\n", "0\t1\t7\tCTGAA\t20\t1\r\n", "0\t1\t7\tCTGAC\t40\t2\r\n", "0\t1\t8\tCT\t66.6667\t2\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!head sample_AAGACG.snps.tsv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "0\t1\t2\t159\t-5.10668\tA\tC\t\t\r\n", "0\t1\t2\t213\t-5.10668\tG\tT\t\t\r\n", "0\t1\t2\t220\t-5.10668\tA\tT\t\t\r\n", "0\t1\t2\t228\t-5.10668\tC\tT\t\t\r\n", "0\t1\t2\t231\t-5.10668\tT\tC\t\t\r\n", "0\t1\t2\t233\t-5.10668\tT\tA\t\t\r\n", "0\t1\t2\t239\t-5.10668\tA\tT\t\t\r\n", "0\t1\t3\t206\t-5.10668\tT\tA\t\t\r\n", "0\t1\t3\t220\t-5.10668\tT\tC\t\t\r\n", "0\t1\t3\t228\t-5.10668\tA\tG\t\t\r\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "!head sample_AAGACG.tags.tsv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "0\t1\t1\t\t0\t+\tconsensus\t\t\tTGCAGATGGATTCTGTTGGTGCACCACAAAGCACCTTCAAGTAATCACATCGCTTATAGATAATCTATATATAGACATATATATCTATATATGTCTATATATCTATATATGTATTTTGTATAAAATACAAAAGCCCTGCATATGTTAATTTGGAAGCAATACAGTCTGATTTGGGGGAGTTTTAATTGGAACCATGGGAAAGAAATGCTTATGAAGGCAAGGAGAGAAAACACACTCAGATGCTT\t0\t0\t0\r\n", "0\t1\t1\t\t\t\tmodel\t\t\tUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\t\t\t\r\n", "0\t1\t1\t\t\t\tprimary\t0\t1_1106_5151_10390_1\tTGCAGATGGATTCTGTTGGTGCACCACAAAGCACCTTCAAGTAATCACATCGCTTATAGATAATCTATATATAGACATATATATCTATATATGTCTATATATCTATATATGTATTTTGTATAAAATACAAAAGCCCTGCATATGTTAATTTGGAAGCAATACAGTCTGATTTGGGGGAGTTTTAATTGGAACCATGGGAAAGAAATGCTTATGAAGGCAAGGAGAGAAAACACACTCAGATGCTT\t\t\t\r\n", "0\t1\t1\t\t\t\tprimary\t0\t1_1110_4587_19590_1\tTGCAGATGGATTCTGTTGGTGCACCACAAAGCACCTTCAAGTAATCACATCGCTTATAGATAATCTATATATAGACATATATATCTATATATGTCTATATATCTATATATGTATTTTGTATAAAATACAAAAGCCCTGCATATGTTAATTTGGAAGCAATACAGTCTGATTTGGGGGAGTTTTAATTGGAACCATGGGAAAGAAATGCTTATGAAGGCAAGGAGAGAAAACACACTCAGATGCTT\t\t\t\r\n", "0\t1\t2\t\t0\t+\tconsensus\t\t\tTGCAGTGCAGCTTTAAACACAGGAGGGTCGGCAGAATGTCAAACAGAACCACCGTCACTGCTGTGACTCTGAAGCCACAGTCAGGATTTTTGGGGAAGAACCGCCAACAATGGAAGCCGTGCTGGCCGATACGGATCACCGATCATGGTAATAAAATCTAGAGATGAGAAAGTATCATGAATTGATAATTCTTTGTTTATGGGCAGCCAACATGTGCAACATATTCCACTCTCTCTCTTAGAGAC\t0\t0\t0\r\n", "0\t1\t2\t\t\t\tmodel\t\t\tOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOEOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOEOOOOOOEOOOOOOOEOOEOEOOOOOEOOOOO\t\t\t\r\n", "0\t1\t2\t\t\t\tprimary\t0\t1_1101_10914_9604_1\tTGCAGTGCAGCTTTAAACACAGGAGGGTCGGCAGAATGTCAAACAGAACCACCGTCACTGCTGTGACTCTGAAGCCACAGTCAGGATTTTTGGGGAAGAACCGCCAACAATGGAAGCCGTGCTGGCCGATACGGATCACCGATCATGGTAATAAAATCTAGAGATGAGAAAGTATCATGAATTGATAATTCTTTGTTTATGGGCAGCCAACATGTGCAACATATTCCACTCTCTCTCTTAGAGAC\t\t\t\r\n", "0\t1\t2\t\t\t\tprimary\t0\t1_1103_12944_26723_1\tTGCAGTGCAGCTTTAAACACAGGAGGGTCGGCAGAATGTCAAACAGAACCACCGTCACTGCTGTGACTCTGAAGCCACAGTCAGGATTTTTGGGGAAGAACCGCCAACAATGGAAGCCGTGCTGGCCGATACGGATCACCGATCATGGTAATAAAATCTAGAGATGAGAAAGTATCATGAATTGATAATTCTTTGTTTATGGGCAGCCAACATGTGCAACATATTCCACTCTCTCTCTTAGAGAC\t\t\t\r\n", "0\t1\t2\t\t\t\tprimary\t0\t1_1111_24429_15159_1\tTGCAGTGCAGCTTTAAACACAGGAGGGTCGGCAGAATGTCAAACAGAACCACCGTCACTGCTGTGACTCTGAAGCCACAGTCAGGATTTTTGGGGAAGAACCGCCAACAATGGAAGCCGTGCTGGCCGATACGGATCACCGATCATGGTAATAAAATCTAGAGATGAGAAAGTATCATGAATTGATAATTCTTTGTTTATGGGCAGCCAACATGTGCAACATATTCCACTCTCTCTCTTAGAGAC\t\t\t\r\n", "0\t1\t2\t\t\t\tsecondary\t\t1_2113_9036_27152_1\tTGCAGTGCAGCTTTAAACACAGGAGGGTCGGCAGAATGTCAAACAGAACCACCGTCACTGCTGTGACTCTGAAGCCACAGTCAGGATTTTTGGGGAAGAACCGCCAACAATGGAAGCCGTGCTGGCCGATACGGATCACCGATCATGGTAATAAAATCTCGAGATGAGAAAGTATCATGAATTGATAATTCTTTGTTTATGGGCAGCCAACATGTGCAACATATTCCATTCTCTCTCTTTGAGAC\t\t\t\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "cstacks" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "cstacks -b 1 -o /Volumes/web/whale/fish546/rad/samples -s /Volumes/web/whale/fish546/rad/samples/sample_AAGACG -s /Volumes/web/whale/fish546/rad/samples/sample_AAGCTA -s /Volumes/web/whale/fish546/rad/samples/sample_AATATC -s /Volumes/web/whale/fish546/rad/samples/sample_AATGAG -p 12 -n 2\n", "Number of mismatches allowed between stacks: 2\n", "Loci matched based on sequence identity.\n", "Constructing catalog from 4 samples.\n", "Initializing new catalog...\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AAGACG.tags.tsv\n", "\"" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/whale/fish546/rad/samples/batch_1.catalog.alleles.tsv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "0\t1\t2\tAGACTTA\t0\t0\r\n", "0\t1\t2\tATTCCAA\t0\t0\r\n", "0\t1\t2\tCGATTTT\t0\t0\r\n", "0\t1\t3\tACA\t0\t0\r\n", "0\t1\t3\tTTA\t0\t0\r\n", "0\t1\t3\tTTG\t0\t0\r\n", "0\t1\t4\tACCA\t0\t0\r\n", "0\t1\t4\tCATT\t0\t0\r\n", "0\t1\t6\tA\t0\t0\r\n", "0\t1\t6\tC\t0\t0\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/whale/fish546/rad/samples/batch_1.catalog.snps.tsv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "0\t1\t2\t159\t-5.10668\tA\tC\t\t\r\n", "0\t1\t2\t213\t-5.10668\tG\tT\t\t\r\n", "0\t1\t2\t220\t-5.10668\tA\tT\t\t\r\n", "0\t1\t2\t228\t-5.10668\tC\tT\t\t\r\n", "0\t1\t2\t231\t-5.10668\tT\tC\t\t\r\n", "0\t1\t2\t233\t-5.10668\tT\tA\t\t\r\n", "0\t1\t2\t239\t-5.10668\tA\tT\t\t\r\n", "0\t1\t3\t206\t-5.10668\tT\tA\t\t\r\n", "0\t1\t3\t220\t-5.10668\tT\tC\t\t\r\n", "0\t1\t3\t228\t-5.10668\tA\tG\t\t\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/whale/fish546/rad/samples/batch_1.catalog.tags.tsv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "0\t1\t1\t\t0\t+\tconsensus\t0\t1_1\tTGCAGATGGATTCTGTTGGTGCACCACAAAGCACCTTCAAGTAATCACATCGCTTATAGATAATCTATATATAGACATATATATCTATATATGTCTATATATCTATATATGTATTTTGTATAAAATACAAAAGCCCTGCATATGTTAATTTGGAAGCAATACAGTCTGATTTGGGGGAGTTTTAATTGGAACCATGGGAAAGAAATGCTTATGAAGGCAAGGAGAGAAAACACACTCAGATGCTT\t0\t0\t0\r\n", "0\t1\t2\t\t0\t+\tconsensus\t0\t1_2\tTGCAGTGCAGCTTTAAACACAGGAGGGTCGGCAGAATGTCAAACAGAACCACCGTCACTGCTGTGACTCTGAAGCCACAGTCAGGATTTTTGGGGAAGAACCGCCAACAATGGAAGCCGTGCTGGCCGATACGGATCACCGATCATGGTAATAAAATCTAGAGATGAGAAAGTATCATGAATTGATAATTCTTTGTTTATGGGCAGCCAACATGTGCAACATATTCCACTCTCTCTCTTAGAGAC\t0\t0\t0\r\n", "0\t1\t3\t\t0\t+\tconsensus\t0\t1_3\tTGCAGTAGCTAGCGTTAACTCCATGAGTTGGTTTAAAACAACCTCACCAGCTGTCATTGTTGTAGAACCTTGTTAAATACTGTAGCACCCAGCAATGGCCGAAGCTATGCATTGGTACCAACCATGTCATGCTAGCTTGTCGGGATCGGGTATAAAAGCCCTTCACATTTGGTTGAGGGAACATTTGGAATGATCATATTCAAAAATGTCACTTGAACTCTCACTTCAAGATATGTCAATGAAAA\t0\t0\t0\r\n", "0\t1\t4\t\t0\t+\tconsensus\t0\t1_4,4_112317\tTGCAGGGCCCCTGAGTCTCTCTGTGGCTGTTTGACAGTAATAGGACTGGTTTTATTCCTGTGAGACCTGCCTGCATAGATAACATCTCTCCTGTGCATACATAAATATTAGAACAGTCCATGGGACATTTCAAGTGGAGAGGATGAATTATTTATCGCGCCAAGTTGAGAGAGTTGAGTTTATACAGTGCCGACAGTGGCTCTTAATTGTTAGCTGACGGTTAGAAATCGTTTTTTTAAAAGCTT\t0\t0\t0\r\n", "0\t1\t5\t\t0\t+\tconsensus\t0\t1_5\tTGCAGTGGAGGACAGAGGACATGGCTATGAACAAAATGGAAGAAGAGACAGAGGCAGATTGTTTCTGTATCTGTAGCCTATTTAAACATTTTTATTTCACACATTTGCCATTTAAATGCTCCTCATCCTCTGAAGGTACATGAGGCTTCTTATGTTTCACCTGCAATGGTAGCTGTGACATCAGACGACTGATTCAACCGGAAGAGAGGAACTGTAAGCTGAGCAGAGGGCCACTGCATGTTCAC\t0\t0\t0\r\n", "0\t1\t6\t\t0\t+\tconsensus\t0\t1_6,4_59742\tTGCAGCTTAAATAAAGCTTCATTTTCGGATTCGAGGTCTTATTTGGATGGATTTCAGCAGCTATGGCTTTGTGTTTTGTTCCCACGAGTTTTGATTTTGCTACAGCTGTGATTAAAACCGCTGACTGGAATCTCAACAGCGGCTAAAGCTCATGTGTTTTTTGCAATATTTATAGCCCATCCACCACAACAGAAGTTATAGAAAAAATATCAGTAAAAGAAAAGTTTGAAAAAGCAATTGAAAAT\t0\t0\t0\r\n", "0\t1\t7\t\t0\t+\tconsensus\t0\t1_7,4_136809\tTGCAGGACACACATGAAAGTAAATGCAGCCTATCACAGTATGGCACGATACAATCATATCCTTTCTTTTTCTTGCTGATTATTTATAATTGGAGGTATCCATAGCAACAGCAGCTCTGTATATTTTAATGCAAAAGATACATTTTATGGCTTTAGGTTTAAACCGTGCTTTGTAGAGCTGATGGGTGTTTAACAGAGCAGCCTATCAGTGCATGGCCAGCTTGTGGCTGAGTGGAGCCCCTTTAC\t0\t0\t0\r\n", "0\t1\t8\t\t0\t+\tconsensus\t0\t1_8\tTGCAGATTTACTGGGCAGGTAGTATTAAGAGTCACTCCTTTGCTCAAGGTCTGGTTGACATGACTAAATTTGAAGCTTTTTGTTGAGCAGCTTGCAGAAATAGAAATGCTGCGATCTAATTAAACCATAGCTTGCACTTTCATGTCAAGCAAATGCATCCTCAATAGCCAGAAGCTGAATGTTCTTGAAAAATATGGAGGTGAGAGCCAACTAAGTCCAGTGGACCAGATCAAATCCTTAAATTA\t0\t0\t0\r\n", "0\t1\t9\t\t0\t+\tconsensus\t0\t1_9\tTGCAGCGGGGACTGAAGTAGATACGTGGGCCGAGGACGCATGGCTTCAAACGCCAGGCAGTGGTACAGACCAACGGGTGTCAGACGGTGAGATGAGCTCAGCCGCCGATGGAGTGACTCCTCGCAGCAGTTCTACCAAGTCCTCCCCTGTTTCCATATCTATTATTACAGCACAGGGTAAGACGGCAAATTATAGATTCCACATAGACTTAATGGTTAGGGATTTAACGTGCAATTGCATTTTCT\t0\t0\t0\r\n", "0\t1\t10\t\t0\t+\tconsensus\t0\t1_10,3_4904\tTGCAGTATTTTTACTAGTTATATTCTCTTTTAAAGTTGTTACTTTCTGGAAGCGATAGACCCTTCGCCTCAGTTTCAACCCTGAAGCGCCGTCTGGCTAATCAAAGCGTAGCATCAGTCAGACCAGGGTTCCTCAACTACTAGTCTCTTTCTCATAGACTCATATGAAACGGTTTCAGATGTTCTTCATCTTCAGGCTGGTTTTGGACTTGGCGCTAGTTATGGTTAAGCGTTGTGTAATAGTGA\t0\t0\t0\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "sstacks" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```\n", "!sstacks -b 1 -c /Volumes/web/whale/fish546/rad/samples/batch_1 -s /Volumes/web/whale/fish546/rad/samples/sample_ACAAGA -o /Volumes/web/whale/fish546/rad/samples/ -p 12\n", "```" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "!sstacks -b 1 -c /Volumes/web/whale/fish546/rad/samples/batch_1 -s /Volumes/web/whale/fish546/rad/samples/sample_AAGACG -o /Volumes/web/whale/fish546/rad/samples/ -p 12" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "!sstacks -b 1 -c /Volumes/web/whale/fish546/rad/samples/batch_1 -s /Volumes/web/whale/fish546/rad/samples/sample_AAGCTA -o /Volumes/web/whale/fish546/rad/samples/ -p 12" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "!sstacks -b 1 -c /Volumes/web/whale/fish546/rad/samples/batch_1 -s /Volumes/web/whale/fish546/rad/samples/sample_AATATC -o /Volumes/web/whale/fish546/rad/samples/ -p 12" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "!sstacks -b 1 -c /Volumes/web/whale/fish546/rad/samples/batch_1 -s /Volumes/web/whale/fish546/rad/samples/sample_AATGAG -o /Volumes/web/whale/fish546/rad/samples/ -p 12" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```\n", "output\n", "Parsing /Volumes/web/whale/fish546/rad/samples/batch_1.catalog.tags.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/batch_1.catalog.snps.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/batch_1.catalog.alleles.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_ACAAGA.tags.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_ACAAGA.snps.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_ACAAGA.alleles.tsv\n", "Searching for sequence matches...\n", "40395 stacks compared against the catalog containing 289998 loci.\n", " 16987 matching loci, 8346 contained no verified haplotypes.\n", " 156 loci matched more than one catalog locus and were excluded.\n", " 8029 loci contained SNPs unaccounted for in the catalog and were excluded.\n", " 19339 total haplotypes examined from matching loci, 9781 verified.\n", "Outputing to file /Volumes/web/whale/fish546/rad/samples/sample_ACAAGA.matches.tsv\n", "```" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/whale/fish546/rad/samples/sample_ACAAGA.matches.tsv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "0\t1\t17370\t5\t1\tC\t2\r\n", "0\t1\t133131\t5\t9\tconsensus\t3\r\n", "0\t1\t13913\t5\t34\tAAGAT\t2\r\n", "0\t1\t165533\t5\t50\tCA\t2\r\n", "0\t1\t48330\t5\t51\tT\t2\r\n", "0\t1\t43034\t5\t52\tTGG\t2\r\n", "0\t1\t177000\t5\t68\tconsensus\t3\r\n", "0\t1\t20735\t5\t75\tTAA\t2\r\n", "0\t1\t55930\t5\t79\tCCC\t2\r\n", "0\t1\t139304\t5\t92\tTCACC\t2\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "raw", "metadata": {}, "source": [ "#pop.map\n", "\n", "sample_AAGACG 1\n", "sample_AAGCTA 1\n", "sample_AATATC 1\n", "sample_AATGAG 1\n", "\n" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "!populations -b 1 -P /Volumes/web/whale/fish546/rad/samples/ -M /Volumes/web/cnidarian/pop.map -m 4 -t 12 --vcf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```\n", "Fst kernel smoothing: off\n", "Bootstrap resampling: off\n", "Percent samples limit per population: 0\n", "Locus Population limit: 1\n", "Minimum stack depth: 4\n", "Minor allele frequency cutoff: 0\n", "Applying Fst correction: none.\n", "Parsing population map.\n", "Found 4 input file(s).\n", " 1 population found\n", " 1: sample_AAGACG, sample_AAGCTA, sample_AATATC, sample_AATGAG\n", " 1 group of populations found\n", " 1: 1\n", " Parsing /Volumes/web/whale/fish546/rad/samples/batch_1.catalog.tags.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/batch_1.catalog.snps.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/batch_1.catalog.alleles.tsv\n", "Catalog is not reference aligned, arbitrarily ordering catalog loci.\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AAGACG.matches.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AAGCTA.matches.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AATATC.matches.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AATGAG.matches.tsv\n", "Populating observed haplotypes for 4 samples, 289998 loci.\n", "Removed 215394 samples from loci that are below the minimum stack depth of 4x\n", "Removing 154311 loci that did not pass sample/population constraints... retained 135687 loci.\n", "Loading model outputs for 4 samples, 135687 loci.\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AAGACG.tags.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AAGCTA.tags.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AATATC.tags.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AATGAG.tags.tsv\n", "Generating nucleotide-level summary statistics for population 1\n", "Population 1 contained 2093 incompatible loci.\n", "Tallying loci across populations...done.\n", "Writing 135687 loci to haplotype statistics file, '/Volumes/web/whale/fish546/rad/samples/batch_1.hapstats.tsv'\n", "Calculating haplotype F statistics... done.\n", "Writing haplotype F statistics... wrote 123147 loci to haplotype Phi_st file, '/Volumes/web/whale/fish546/rad/samples/batch_1.phistats.tsv'\n", "Writing 135687 loci to summary statistics file, '/Volumes/web/whale/fish546/rad/samples/batch_1.sumstats.tsv'\n", "Writing population data to VCF file '/Volumes/web/whale/fish546/rad/samples/batch_1.vcf'\n", "Loading SNP data for 4 samples.\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AAGACG.snps.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AAGCTA.snps.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AATATC.snps.tsv\n", " Parsing /Volumes/web/whale/fish546/rad/samples/sample_AATGAG.snps.tsv\n", "Writing 135687 loci to observed haplotype file, '/Volumes/web/whale/fish546/rad/samples/batch_1.haplotypes.tsv'\n", "```" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/whale/fish546/rad/samples/batch_1.haplotypes.tsv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Catalog ID\tCnt\tsample_AAGACG\tsample_AAGCTA\tsample_AATATC\tsample_AATGAG\r\n", "2\t1\tAGACTTA/ATTCCAA/CGATTTT\t-\t-\t-\r\n", "3\t1\tACA/TTA/TTG\t-\t-\t-\r\n", "4\t2\tCATT\t-\t-\tACCA/CATT\r\n", "7\t2\tCAGCGCGC/CCTCGGAA/CCTCGGAC\t-\t-\tCCTAGGAC/CCTCTGAC/TCTCGGAC\r\n", "9\t1\tAC/AT/CC\t-\t-\t-\r\n", "11\t2\tCGG/TGG\t-\t-\tCGG\r\n", "14\t2\tGG/TA\t-\tTA\t-\r\n", "15\t1\tconsensus\t-\t-\t-\r\n", "16\t1\tAA/GT\t-\t-\t-\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "!ls /Volumes/web/whale/fish546/rad/samples/" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "batch_1.catalog.alleles.tsv sample_AATATC.alleles.tsv\r\n", "batch_1.catalog.snps.tsv sample_AATATC.fq\r\n", "batch_1.catalog.tags.tsv sample_AATATC.matches.tsv\r\n", "batch_1.haplotypes.tsv sample_AATATC.snps.tsv\r\n", "batch_1.hapstats.tsv sample_AATATC.tags.tsv\r\n", "batch_1.phistats.tsv sample_AATGAG.alleles.tsv\r\n", "batch_1.populations.log sample_AATGAG.fq\r\n", "batch_1.sumstats.tsv sample_AATGAG.matches.tsv\r\n", "batch_1.sumstats_summary.tsv sample_AATGAG.snps.tsv\r\n", "batch_1.vcf sample_AATGAG.tags.tsv\r\n", "process_radtags.log sample_ACAAGA.alleles.tsv\r\n", "sample_AAGACG.alleles.tsv sample_ACAAGA.fq\r\n", "sample_AAGACG.fq sample_ACAAGA.matches.tsv\r\n", "sample_AAGACG.matches.tsv sample_ACAAGA.snps.tsv\r\n", "sample_AAGACG.snps.tsv sample_ACAAGA.tags.tsv\r\n", "sample_AAGACG.tags.tsv sample_ACAGCG.fq\r\n", "sample_AAGCTA.alleles.tsv sample_CACCTC.fq\r\n", "sample_AAGCTA.fq sample_CAGGCA.fq\r\n", "sample_AAGCTA.matches.tsv sample_CCCGGT.fq\r\n", "sample_AAGCTA.snps.tsv sample_CCCTAA.fq\r\n", "sample_AAGCTA.tags.tsv\r\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }