{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### BPP parallelization without blocking" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ipcoal\n", "import toytree\n", "import ipyrad.analysis as ipa\n", "import ipyparallel as ipp" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Start an ipcluster instance\n", "\n", "Here I assume that you already started an ipcluster instance in a terminal using the command below, or by starting engines in the Ipython Clusters tab in Jupyter. Remember that when you pull in new updates and restart your kernel you also need to restart your cluster instance. \n", "\n", "\n", "```bash\n", "ipcluster start --n=4\n", "```" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 3]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# connect to a running client\n", "ipyclient = ipp.Client()\n", "\n", "# show number of engines\n", "ipyclient.ids" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Simulate loci under a known scenario" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "wrote 100 loci to /tmp/test.seqs.hdf5\n" ] }, { "data": { "text/html": [ "
012345678r0r1r2r3r40250000500000
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# make a random tree\n", "tree = toytree.rtree.unittree(ntips=5, treeheight=5e5, seed=1243)\n", "tree.draw(ts='p');\n", "\n", "# simulate loci and write to HDF5\n", "model = ipcoal.Model(tree, Ne=1e5, nsamples=4)\n", "model.sim_loci(100, 500)\n", "model.write_loci_to_hdf5(name=\"test\", outdir=\"/tmp\", diploid=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Setup BPP" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'r0': ['r0-0', 'r0-1'],\n", " 'r1': ['r1-0', 'r1-1'],\n", " 'r2': ['r2-0', 'r2-1'],\n", " 'r3': ['r3-0', 'r3-1'],\n", " 'r4': ['r4-0', 'r4-1']}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# create an IMAP \n", "IMAP = {\n", " 'r' + str(i): [j for j in model.alpha_ordered_names if int(j[1]) == i][:2] \n", " for i in range(5)\n", "}\n", "IMAP" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'binary': '/tmp/bpp-4.1.4-linux-x86_64/bin/bpp',\n", " 'infer_sptree': 0,\n", " 'infer_delimit': 0,\n", " 'infer_delimit_args': (0, 2),\n", " 'speciesmodelprior': 1,\n", " 'seed': 12345,\n", " 'burnin': 1000,\n", " 'nsample': 5000,\n", " 'sampfreq': 2,\n", " 'thetaprior': (3, 0.002),\n", " 'tauprior': (3, 0.002),\n", " 'phiprior': (1, 1),\n", " 'usedata': 1,\n", " 'cleandata': 0,\n", " 'finetune': (0.01, 0.02, 0.03, 0.04, 0.05, 0.01, 0.01),\n", " 'copied': False}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# init bpp tool.\n", "bpp1 = ipa.bpp(\n", " data=\"/tmp/test.seqs.hdf5\",\n", " name=\"test1\", \n", " workdir=\"/tmp\",\n", " guidetree=tree,\n", " imap=IMAP,\n", " maxloci=100,\n", " burnin=1000,\n", " nsample=5000,\n", ")\n", "bpp1.kwargs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit BPP jobs to run on cluster (using `._run()`)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[locus filter] full data: 100\n", "[locus filter] post filter: 100\n", "[ipa bpp] bpp v4.1.4\n", "[ipa.bpp] distributed 2 bpp jobs (name=test1, nloci=100)\n" ] } ], "source": [ "# submit 2 jobs to ipyclient\n", "bpp1._run(nreps=2, ipyclient=ipyclient, force=True, block=False, dry_run=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Submit more jobs on the same ipyclient\n", "Here I use the `.copy()` function for convenience, but you could just create a new BPP object and call the `._run()` command with the same ipyclient object." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[locus filter] full data: 100\n", "[locus filter] post filter: 100\n", "[ipa bpp] bpp v4.1.4\n", "[ipa.bpp] distributed 4 bpp jobs (name=test2, nloci=100)\n" ] } ], "source": [ "# submit X other jobs to ipyclient (e.g., using diff job name)\n", "bpp2 = bpp1.copy(\"test2\")\n", "bpp2._run(nreps=4, ipyclient=ipyclient, force=True, block=False, dry_run=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The asynchronous job objects" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[, ]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# see the jobs that are submitted\n", "bpp1.asyncs" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[,\n", " ,\n", " ,\n", " ]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bpp2.asyncs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Block until jobs finish (or don't)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'ce917031-cd9a6264cd01a65d101b520d_1',\n", " 'ce917031-cd9a6264cd01a65d101b520d_2',\n", " 'ce917031-cd9a6264cd01a65d101b520d_3',\n", " 'ce917031-cd9a6264cd01a65d101b520d_4',\n", " 'ce917031-cd9a6264cd01a65d101b520d_5',\n", " 'ce917031-cd9a6264cd01a65d101b520d_6'}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# see outstanding jobs (optional, this does NOT BLOCK)\n", "ipyclient.outstanding" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# BLOCK until all jobs on ipyclient are finished (returns True when done)\n", "ipyclient.wait()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Summarize results (WHEN FINISHED)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ipa.bpp] found 2 existing result files\n", "[ipa.bpp] summarizing algorithm '00' results\n", "[ipa.bpp] combining mcmc files\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
theta_1___r0theta_2___r1theta_3___r2theta_4___r3theta_5___r4theta_6___r4___r3___r2___r1___r0theta_7___r3___r2___r1___r0theta_8___r2___r1___r0theta_9___r1___r0tau_6___r4___r3___r2___r1___r0tau_7___r3___r2___r1___r0tau_8___r2___r1___r0tau_9___r1___r0lnL
mean0.0056180.0051650.0051470.0050150.0056390.0037710.0034540.0032080.0042850.0060470.0049320.0037170.002147-85060.071771
median0.0054650.0050880.0050660.0049590.0055900.0037360.0032760.0029990.0042190.0060410.0049390.0037310.002151-85059.570500
S.D0.0010840.0009050.0007500.0006680.0007050.0006620.0017050.0014330.0012640.0003280.0003320.0002800.00023920.100881
min0.0031760.0028960.0032370.0031620.0031660.0021020.0004970.0007780.0013620.0050140.0038410.0028440.001302-85152.837000
max0.0120410.0096450.0095510.0087700.0091120.0064630.0102330.0113920.0095790.0071920.0059750.0045380.002916-84984.863000
2.5%0.0039140.0037100.0038920.0038420.0043950.0025780.0008410.0011610.0020800.0054610.0042910.0031640.001685-85099.356000
97.5%0.0080720.0072000.0067970.0065120.0071720.0051490.0073170.0064520.0069550.0066900.0055630.0042250.002611-85021.884000
2.5%HPD0.0038390.0035730.0037880.0036970.0043060.0025470.0006010.0010540.0019200.0054370.0042900.0031930.001697-85100.071000
97.5%HPD0.0079450.0069770.0066300.0063120.0070310.0050610.0066130.0060610.0067030.0066540.0055550.0042470.002619-85022.941000
ESS*1134.830056921.2958911705.2499061975.0462531918.43454243.49688929.77833443.885352130.54197133.53730133.27685894.383590212.132455643.462922
Eff*0.2269660.1842590.3410500.3950090.3836870.0086990.0059560.0087770.0261080.0067070.0066550.0188770.0424260.128693
\n", "
" ], "text/plain": [ " theta_1___r0 theta_2___r1 theta_3___r2 theta_4___r3 theta_5___r4 \\\n", "mean 0.005618 0.005165 0.005147 0.005015 0.005639 \n", "median 0.005465 0.005088 0.005066 0.004959 0.005590 \n", "S.D 0.001084 0.000905 0.000750 0.000668 0.000705 \n", "min 0.003176 0.002896 0.003237 0.003162 0.003166 \n", "max 0.012041 0.009645 0.009551 0.008770 0.009112 \n", "2.5% 0.003914 0.003710 0.003892 0.003842 0.004395 \n", "97.5% 0.008072 0.007200 0.006797 0.006512 0.007172 \n", "2.5%HPD 0.003839 0.003573 0.003788 0.003697 0.004306 \n", "97.5%HPD 0.007945 0.006977 0.006630 0.006312 0.007031 \n", "ESS* 1134.830056 921.295891 1705.249906 1975.046253 1918.434542 \n", "Eff* 0.226966 0.184259 0.341050 0.395009 0.383687 \n", "\n", " theta_6___r4___r3___r2___r1___r0 theta_7___r3___r2___r1___r0 \\\n", "mean 0.003771 0.003454 \n", "median 0.003736 0.003276 \n", "S.D 0.000662 0.001705 \n", "min 0.002102 0.000497 \n", "max 0.006463 0.010233 \n", "2.5% 0.002578 0.000841 \n", "97.5% 0.005149 0.007317 \n", "2.5%HPD 0.002547 0.000601 \n", "97.5%HPD 0.005061 0.006613 \n", "ESS* 43.496889 29.778334 \n", "Eff* 0.008699 0.005956 \n", "\n", " theta_8___r2___r1___r0 theta_9___r1___r0 \\\n", "mean 0.003208 0.004285 \n", "median 0.002999 0.004219 \n", "S.D 0.001433 0.001264 \n", "min 0.000778 0.001362 \n", "max 0.011392 0.009579 \n", "2.5% 0.001161 0.002080 \n", "97.5% 0.006452 0.006955 \n", "2.5%HPD 0.001054 0.001920 \n", "97.5%HPD 0.006061 0.006703 \n", "ESS* 43.885352 130.541971 \n", "Eff* 0.008777 0.026108 \n", "\n", " tau_6___r4___r3___r2___r1___r0 tau_7___r3___r2___r1___r0 \\\n", "mean 0.006047 0.004932 \n", "median 0.006041 0.004939 \n", "S.D 0.000328 0.000332 \n", "min 0.005014 0.003841 \n", "max 0.007192 0.005975 \n", "2.5% 0.005461 0.004291 \n", "97.5% 0.006690 0.005563 \n", "2.5%HPD 0.005437 0.004290 \n", "97.5%HPD 0.006654 0.005555 \n", "ESS* 33.537301 33.276858 \n", "Eff* 0.006707 0.006655 \n", "\n", " tau_8___r2___r1___r0 tau_9___r1___r0 lnL \n", "mean 0.003717 0.002147 -85060.071771 \n", "median 0.003731 0.002151 -85059.570500 \n", "S.D 0.000280 0.000239 20.100881 \n", "min 0.002844 0.001302 -85152.837000 \n", "max 0.004538 0.002916 -84984.863000 \n", "2.5% 0.003164 0.001685 -85099.356000 \n", "97.5% 0.004225 0.002611 -85021.884000 \n", "2.5%HPD 0.003193 0.001697 -85100.071000 \n", "97.5%HPD 0.004247 0.002619 -85022.941000 \n", "ESS* 94.383590 212.132455 643.462922 \n", "Eff* 0.018877 0.042426 0.128693 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res, mcmc = bpp1.summarize_results(\"00\")\n", "res" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ipa.bpp] found 4 existing result files\n", "[ipa.bpp] summarizing algorithm '00' results\n", "[ipa.bpp] combining mcmc files\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
theta_1___r0theta_2___r1theta_3___r2theta_4___r3theta_5___r4theta_6___r4___r3___r2___r1___r0theta_7___r3___r2___r1___r0theta_8___r2___r1___r0theta_9___r1___r0tau_6___r4___r3___r2___r1___r0tau_7___r3___r2___r1___r0tau_8___r2___r1___r0tau_9___r1___r0lnL
mean0.0055950.0052680.0051710.0050000.0055980.0039880.0027840.0037880.0040570.0059310.0050480.0036320.002188-85057.458892
median0.0055190.0051580.0051130.0049640.0055550.0039640.0026130.0035790.0040090.0059250.0050570.0036380.002172-85056.968500
S.D0.0010290.0009810.0007570.0006650.0007100.0005970.0012270.0015410.0014740.0002740.0002820.0003100.00028019.979165
min0.0032660.0029040.0031780.0031190.0030630.0022300.0005270.0009850.0006460.0051030.0040630.0027050.001193-85140.649000
max0.0118130.0106690.0095790.0079170.0092200.0062960.0086540.0125070.0102660.0069330.0059040.0045480.003183-84988.645000
2.5%0.0038780.0036790.0038780.0038360.0043260.0028670.0009870.0014960.0011150.0054240.0044990.0030210.001684-85098.361000
97.5%0.0079020.0075950.0068400.0064480.0070990.0052020.0056780.0074430.0070820.0064860.0055660.0042250.002799-85018.670000
2.5%HPD0.0037340.0034720.0038460.0037570.0042430.0028580.0008660.0010560.0010180.0053940.0044990.0030370.001688-85099.726000
97.5%HPD0.0076970.0071630.0067830.0062880.0069770.0051810.0053410.0067250.0068350.0064350.0055660.0042350.002799-85020.784000
ESS*933.516203529.7791201462.0785741355.3232601658.165363199.46324386.21066863.84369580.629951132.112942106.77510767.380380103.6082941524.229702
Eff*0.1867030.1059560.2924160.2710650.3316330.0398930.0172420.0127690.0161260.0264230.0213550.0134760.0207220.304846
\n", "
" ], "text/plain": [ " theta_1___r0 theta_2___r1 theta_3___r2 theta_4___r3 theta_5___r4 \\\n", "mean 0.005595 0.005268 0.005171 0.005000 0.005598 \n", "median 0.005519 0.005158 0.005113 0.004964 0.005555 \n", "S.D 0.001029 0.000981 0.000757 0.000665 0.000710 \n", "min 0.003266 0.002904 0.003178 0.003119 0.003063 \n", "max 0.011813 0.010669 0.009579 0.007917 0.009220 \n", "2.5% 0.003878 0.003679 0.003878 0.003836 0.004326 \n", "97.5% 0.007902 0.007595 0.006840 0.006448 0.007099 \n", "2.5%HPD 0.003734 0.003472 0.003846 0.003757 0.004243 \n", "97.5%HPD 0.007697 0.007163 0.006783 0.006288 0.006977 \n", "ESS* 933.516203 529.779120 1462.078574 1355.323260 1658.165363 \n", "Eff* 0.186703 0.105956 0.292416 0.271065 0.331633 \n", "\n", " theta_6___r4___r3___r2___r1___r0 theta_7___r3___r2___r1___r0 \\\n", "mean 0.003988 0.002784 \n", "median 0.003964 0.002613 \n", "S.D 0.000597 0.001227 \n", "min 0.002230 0.000527 \n", "max 0.006296 0.008654 \n", "2.5% 0.002867 0.000987 \n", "97.5% 0.005202 0.005678 \n", "2.5%HPD 0.002858 0.000866 \n", "97.5%HPD 0.005181 0.005341 \n", "ESS* 199.463243 86.210668 \n", "Eff* 0.039893 0.017242 \n", "\n", " theta_8___r2___r1___r0 theta_9___r1___r0 \\\n", "mean 0.003788 0.004057 \n", "median 0.003579 0.004009 \n", "S.D 0.001541 0.001474 \n", "min 0.000985 0.000646 \n", "max 0.012507 0.010266 \n", "2.5% 0.001496 0.001115 \n", "97.5% 0.007443 0.007082 \n", "2.5%HPD 0.001056 0.001018 \n", "97.5%HPD 0.006725 0.006835 \n", "ESS* 63.843695 80.629951 \n", "Eff* 0.012769 0.016126 \n", "\n", " tau_6___r4___r3___r2___r1___r0 tau_7___r3___r2___r1___r0 \\\n", "mean 0.005931 0.005048 \n", "median 0.005925 0.005057 \n", "S.D 0.000274 0.000282 \n", "min 0.005103 0.004063 \n", "max 0.006933 0.005904 \n", "2.5% 0.005424 0.004499 \n", "97.5% 0.006486 0.005566 \n", "2.5%HPD 0.005394 0.004499 \n", "97.5%HPD 0.006435 0.005566 \n", "ESS* 132.112942 106.775107 \n", "Eff* 0.026423 0.021355 \n", "\n", " tau_8___r2___r1___r0 tau_9___r1___r0 lnL \n", "mean 0.003632 0.002188 -85057.458892 \n", "median 0.003638 0.002172 -85056.968500 \n", "S.D 0.000310 0.000280 19.979165 \n", "min 0.002705 0.001193 -85140.649000 \n", "max 0.004548 0.003183 -84988.645000 \n", "2.5% 0.003021 0.001684 -85098.361000 \n", "97.5% 0.004225 0.002799 -85018.670000 \n", "2.5%HPD 0.003037 0.001688 -85099.726000 \n", "97.5%HPD 0.004235 0.002799 -85020.784000 \n", "ESS* 67.380380 103.608294 1524.229702 \n", "Eff* 0.013476 0.020722 0.304846 " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res, mcmc = bpp2.summarize_results(\"00\")\n", "res" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 4 }