{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analysis of Ficus RAD-seq data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Table of contents\n", "[Software installation (conda)](#Required-software) \n", "[The assembled RAD data](#The-assembled-data-sets) \n", "[Phylogenetic analysis (raxml)](#Analysis-BPP) \n", "[Plot results](#Plots)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Required software\n", "All required software can be installed locally using *conda*. I assume here that you already have `ipyrad` installed using conda. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "## conda install toytree -c eaton-lab\n", "## conda install ipyrad -c ipyrad \n", "## conda install bpp -c ipyrad " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ipyrad v.0.6.20\n" ] } ], "source": [ "## import packages\n", "import ipyrad as ip\n", "import ipyrad.analysis as ipa\n", "import numpy as np\n", "import toyplot\n", "import toytree\n", "import glob\n", "\n", "## print ipyrad info\n", "print \"ipyrad v.{}\".format(ip.__version__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cluster setup\n", "see more information on ipyparallel setup here. " ] }, { "cell_type": "code", "execution_count": 181, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "host compute node: [40 cores] on tinus\n" ] } ], "source": [ "## print ipyparallel cluster information\n", "import ipyparallel as ipp\n", "ipyclient = ipp.Client()\n", "print ip.cluster_info(ipyclient)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Assembled data sets" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " loading Assembly: pharma_dhi_s4\n", " from saved path: ~/Documents/Ficus/analysis-ipyrad/pharma_dhi_s4.json\n", " loading Assembly: america_dhi_s4\n", " from saved path: ~/Documents/Ficus/analysis-ipyrad/america_dhi_s4.json\n" ] } ], "source": [ "## create subsampled pharma-clade branch\n", "pharma = ip.load_json(\"analysis-ipyrad/pharma_dhi_s4.json\")\n", "america = ip.load_json(\"analysis-ipyrad/america_dhi_s4.json\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Analysis BPP" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Pharmacosyceae clade" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "## a tree hypothesis (guidetree) (here based on tetrad results)\n", "newick = \"((((glabrata, insipida), yoponensis), maxima), tonduzii);\"\n", "\n", "## a dictionary mapping sample names to 'species' names\n", "imap = {\n", " \"glabrata\": [\"B133_glabrata\", \"A97_glabrata\", \"B134_glabrata\", \"B131_glabrataXmaxima\"],\n", " \"insipida\": [\"A95_insipida\", \"B127_insipida\", \"C15_insipida\", \n", " \"B128_insipida\", \"B127_insipida\", \"A95_insipida\"],\n", " \"yoponensis\": [\"C45_yoponensis\", \"C47_yoponensis\", \"C46_yoponensis\"],\n", " \"maxima\": [\"A94_maxima\", \"C17_maxima\", \"B119_maxima\", \"B120_maxima\"],\n", " \"tonduzii\": [\"C48_tonduzii\"],\n", " }\n", "\n", "## loci must have data for at least N samples in each species.\n", "minmap = {\n", " \"glabrata\": 4,\n", " \"insipida\": 4,\n", " \"yoponensis\": 3,\n", " \"maxima\": 4, \n", " \"tonduzii\": 1,\n", " }" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
tonduziimaximayoponensisglabratainsipida
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## check your (starting) tree hypothesis\n", "toytree.tree(newick).draw();" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "## create a bpp object to run algorithm 00\n", "boo = ipa.bpp(\n", " locifile=pharma.outfiles.loci,\n", " guidetree=newick, \n", " imap=imap, \n", " minmap=minmap, \n", " workdir=\"analysis-bpp/\",\n", " )" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "burnin 10000 \n", "cleandata 0 \n", "copied False \n", "delimit_alg (0, 5) \n", "finetune (0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01)\n", "infer_delimit 0 \n", "infer_sptree 0 \n", "nsample 50000 \n", "sampfreq 25 \n", "seed 12345 \n", "tauprior (2, 2000, 1) \n", "thetaprior (2, 2000) \n", "usedata 1 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## set some optional params, leaving others at their defaults\n", "boo.params.burnin = 10000\n", "boo.params.nsample = 50000\n", "boo.params.sampfreq = 25\n", "boo.params\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "maxloci 500 \n", "minmap {'insipida': 4, 'tonduzii': 1, 'maxima': 4, 'glabrata': 4, 'yoponensis': 3}\n", "minsnps 4 " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## set some optional filters leaving others at their defaults\n", "boo.filters.maxloci=500\n", "boo.filters.minsnps=4\n", "\n", "## print filters\n", "boo.filters" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "input files created for job pharma-ltest (500 loci)\n" ] } ], "source": [ "## write files \n", "boo.write_bpp_files(prefix=\"pharma-ltest\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "submitted 3 bpp jobs [pharma-loci500] (500 loci)\n" ] } ], "source": [ "boo.submit_bpp_jobs(\n", " prefix=\"pharma-loci500\", \n", " nreps=3, \n", " ipyclient=ipyclient, \n", " seed=12345, \n", " randomize_order=True,\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tree inference" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "burnin 10000 \n", "cleandata 0 \n", "copied False \n", "delimit_alg (0, 5) \n", "finetune (0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01)\n", "infer_delimit 0 \n", "infer_sptree 1 \n", "nsample 50000 \n", "sampfreq 25 \n", "seed 285080861 \n", "tauprior (2, 2000, 1) \n", "thetaprior (2, 2000) \n", "usedata 1 " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b10 = boo.copy()\n", "b10.params.infer_sptree = 1\n", "b10.params" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "maxloci 500 \n", "minmap {'insipida': 4, 'tonduzii': 1, 'maxima': 4, 'glabrata': 4, 'yoponensis': 3}\n", "minsnps 4 " ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b10.filters.maxloci = 500\n", "b10.filters" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "submitted 3 bpp jobs [pharma-loci500-b10] (500 loci)\n" ] } ], "source": [ "b10.submit_bpp_jobs(\n", " prefix=\"pharma-loci500-b10\", \n", " nreps=3, \n", " ipyclient=ipyclient, \n", " seed=12345, \n", " randomize_order=True,\n", " )" ] }, { "cell_type": "code", "execution_count": 182, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "submitted 10 bpp jobs [pharma-loci50-bb10] (50 loci)\n" ] } ], "source": [ "bb10 = b10.copy()\n", "bb10.filters.maxloci = 50\n", "bb10.filters\n", "\n", "bb10.submit_bpp_jobs(\n", " prefix=\"pharma-loci50-bb10\", \n", " nreps=10, \n", " ipyclient=ipyclient, \n", " seed=12345, \n", " randomize_order=True,\n", " )" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Plot results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### fixed tree" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countmeanstdmin25%50%75%max
theta_1glabrata760.00000.00110.00010.00080.00100.00100.00120.0016
theta_2insipida760.00000.00110.00010.00080.00100.00100.00110.0017
theta_3maxima760.00000.00380.00030.00300.00360.00380.00400.0046
theta_5yoponensis760.00000.00200.00020.00160.00190.00200.00210.0025
theta_6glabratainsipidayoponensismaximatonduzii760.00000.01570.00080.01350.01520.01570.01620.0181
theta_7glabratainsipidayoponensismaxima760.00000.00150.00090.00010.00080.00130.00200.0051
theta_8glabratainsipidayoponensis760.00000.00190.00120.00010.00100.00160.00240.0070
theta_9glabratainsipida760.00000.01320.00190.00790.01200.01320.01430.0191
tau_6glabratainsipidayoponensismaximatonduzii760.00000.00300.00020.00240.00290.00300.00310.0036
tau_7glabratainsipidayoponensismaxima760.00000.00300.00020.00240.00290.00300.00310.0036
tau_8glabratainsipidayoponensis760.00000.00290.00020.00240.00280.00300.00310.0036
tau_9glabratainsipida760.00000.00110.00020.00080.00100.00100.00110.0016
lnL760.0000-69160.902843.5274-69280.3200-69189.8015-69157.8950-69129.9325-68997.7510
\n", "
" ], "text/plain": [ " count mean std \\\n", "theta_1glabrata 760.0000 0.0011 0.0001 \n", "theta_2insipida 760.0000 0.0011 0.0001 \n", "theta_3maxima 760.0000 0.0038 0.0003 \n", "theta_5yoponensis 760.0000 0.0020 0.0002 \n", "theta_6glabratainsipidayoponensismaximatonduzii 760.0000 0.0157 0.0008 \n", "theta_7glabratainsipidayoponensismaxima 760.0000 0.0015 0.0009 \n", "theta_8glabratainsipidayoponensis 760.0000 0.0019 0.0012 \n", "theta_9glabratainsipida 760.0000 0.0132 0.0019 \n", "tau_6glabratainsipidayoponensismaximatonduzii 760.0000 0.0030 0.0002 \n", "tau_7glabratainsipidayoponensismaxima 760.0000 0.0030 0.0002 \n", "tau_8glabratainsipidayoponensis 760.0000 0.0029 0.0002 \n", "tau_9glabratainsipida 760.0000 0.0011 0.0002 \n", "lnL 760.0000 -69160.9028 43.5274 \n", "\n", " min 25% \\\n", "theta_1glabrata 0.0008 0.0010 \n", "theta_2insipida 0.0008 0.0010 \n", "theta_3maxima 0.0030 0.0036 \n", "theta_5yoponensis 0.0016 0.0019 \n", "theta_6glabratainsipidayoponensismaximatonduzii 0.0135 0.0152 \n", "theta_7glabratainsipidayoponensismaxima 0.0001 0.0008 \n", "theta_8glabratainsipidayoponensis 0.0001 0.0010 \n", "theta_9glabratainsipida 0.0079 0.0120 \n", "tau_6glabratainsipidayoponensismaximatonduzii 0.0024 0.0029 \n", "tau_7glabratainsipidayoponensismaxima 0.0024 0.0029 \n", "tau_8glabratainsipidayoponensis 0.0024 0.0028 \n", "tau_9glabratainsipida 0.0008 0.0010 \n", "lnL -69280.3200 -69189.8015 \n", "\n", " 50% 75% \\\n", "theta_1glabrata 0.0010 0.0012 \n", "theta_2insipida 0.0010 0.0011 \n", "theta_3maxima 0.0038 0.0040 \n", "theta_5yoponensis 0.0020 0.0021 \n", "theta_6glabratainsipidayoponensismaximatonduzii 0.0157 0.0162 \n", "theta_7glabratainsipidayoponensismaxima 0.0013 0.0020 \n", "theta_8glabratainsipidayoponensis 0.0016 0.0024 \n", "theta_9glabratainsipida 0.0132 0.0143 \n", "tau_6glabratainsipidayoponensismaximatonduzii 0.0030 0.0031 \n", "tau_7glabratainsipidayoponensismaxima 0.0030 0.0031 \n", "tau_8glabratainsipidayoponensis 0.0030 0.0031 \n", "tau_9glabratainsipida 0.0010 0.0011 \n", "lnL -69157.8950 -69129.9325 \n", "\n", " max \n", "theta_1glabrata 0.0016 \n", "theta_2insipida 0.0017 \n", "theta_3maxima 0.0046 \n", "theta_5yoponensis 0.0025 \n", "theta_6glabratainsipidayoponensismaximatonduzii 0.0181 \n", "theta_7glabratainsipidayoponensismaxima 0.0051 \n", "theta_8glabratainsipidayoponensis 0.0070 \n", "theta_9glabratainsipida 0.0191 \n", "tau_6glabratainsipidayoponensismaximatonduzii 0.0036 \n", "tau_7glabratainsipidayoponensismaxima 0.0036 \n", "tau_8glabratainsipidayoponensis 0.0036 \n", "tau_9glabratainsipida 0.0016 \n", "lnL -68997.7510 " ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "tab = pd.read_csv(\"analysis-bpp/pharma-loci500-r0.mcmc.txt\", sep=\"\\t\", index_col=0)\n", "pd.set_option('display.float_format', lambda x: '%.4f' % x)\n", "tab.describe().T" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### inferred tree" ] }, { "cell_type": "code", "execution_count": 190, "metadata": {}, "outputs": [], "source": [ "def plotit(mtre):\n", " ## set up axes\n", " canvas = toyplot.Canvas(width=450, height=350)\n", " axes = canvas.cartesian()\n", "\n", " ## plot the tree\n", " mtre.draw_cloudtree(\n", " axes=axes,\n", " edge_style={\"opacity\": 0.01},\n", " use_edge_lengths=True,\n", " orient='right',\n", " );\n", "\n", " ## style axes\n", " axes.y.show = False\n", " axes.x.show = True\n", " axes.x.ticks.show = True\n", " axes.x.ticks.locator = toyplot.locator.Explicit(\n", " locations=np.linspace(0, -3, 3) / 1000.,\n", " labels=np.linspace(0, 3, 3),\n", " )\n", " axes.x.label.text = \"Divergence time (substitutions/site x 10-3)\"\n", " return canvas, axes" ] }, { "cell_type": "code", "execution_count": 191, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(,\n", " )" ] }, "execution_count": 191, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/html": [ "
glabratainsipidayoponensismaximatonduzii0.01.53.0Divergence time (substitutions/site x 10-3)
    \n", "
  • \n", "
  • \n", " Save as .csv\n", "
  • \n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "mtre = toytree.multitree(\n", " \"analysis-bpp/pharma-loci500-b10-r0.mcmc.txt\",\n", " fixed_order=boo.tree.get_leaf_names(),\n", " treeslice=(150, 1500, 5),\n", " )\n", "\n", "plotit(mtre)" ] }, { "cell_type": "code", "execution_count": 179, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(,\n", " )" ] }, "execution_count": 179, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/html": [ "
glabratainsipidayoponensismaximatonduzii0.01.53.0Divergence time (substitutions/site x 10-3)
    \n", "
  • \n", "
  • \n", " Save as .csv\n", "
  • \n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "mtre = toytree.multitree(\n", " \"analysis-bpp/pharma-loci500-b10-r2.mcmc.txt\",\n", " fixed_order=boo.tree.get_leaf_names(),\n", " treeslice=(150, 1500, 5),\n", " )\n", "\n", "\n", "\n", "plotit(mtre)" ] }, { "cell_type": "code", "execution_count": 209, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
tonduziimaximayoponensisglabratainsipidaidx: 0\n", "name: 0\n", "dist: 0\n", "support: 100idx: 1\n", "name: 1\n", "dist: 68\n", "support: 68idx: 2\n", "name: 2\n", "dist: 105\n", "support: 105idx: 3\n", "name: tonduzii\n", "dist: 100\n", "support: 100idx: 4\n", "name: maxima\n", "dist: 100\n", "support: 100idx: 5\n", "name: yoponensis\n", "dist: 100\n", "support: 100idx: 6\n", "name: glabrata\n", "dist: 100\n", "support: 100idx: 7\n", "name: insipida\n", "dist: 100\n", "support: 100
    \n", "
  • \n", "
  • \n", " Save as .csv\n", "
  • \n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ctre = mtre.get_consensus_tree()\n", "#ctre.root(['tonduzii', 'maxima'])\n", "ctre.draw(width=300, height=300, node_labels=True);" ] }, { "cell_type": "code", "execution_count": 201, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
glabratainsipidayoponensismaximatonduzii0.01.53.0Divergence time (substitutions/site x 10-3)
    \n", "
  • \n", "
  • \n", " Save as .csv\n", "
  • \n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "mtre = toytree.multitree(\n", " \"analysis-bpp/pharma-loci50-bb10-r0.mcmc.txt\",\n", " fixed_order=boo.tree.get_leaf_names(),\n", " treeslice=(500, 5000, 10),\n", " )\n", "\n", "plotit(mtre);" ] }, { "cell_type": "code", "execution_count": 210, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
glabratainsipidayoponensismaximatonduzii0.01.53.0Divergence time (substitutions/site x 10-3)
    \n", "
  • \n", "
  • \n", " Save as .csv\n", "
  • \n", "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "mtre = toytree.multitree(\n", " \"analysis-bpp/pharma-loci50-bb10-r9.mcmc.txt\",\n", " fixed_order=boo.tree.get_leaf_names(),\n", " treeslice=(1000, 5000, 10),\n", " )\n", "\n", "plotit(mtre);" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 1 }