{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "

ipyrad-analysis toolkit: Popgen summary statistics

\n", "\n", "Calculate summary statistics such as pi, Tajima's D, Fst" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "12\n" ] } ], "source": [ "import ipyrad\n", "import ipyrad.analysis as ipa\n", "import ipyparallel as ipp\n", "from ipyrad.analysis.popgen import Popgen\n", "\n", "# Start the ipcluster instance externally before running this.\n", "# Run this on the command line:\n", "# `ipcluster start --cluster-id=popgen --n=12 --daemonize`\n", "#\n", "# And remember to stop the cluster when you're done\n", "# `ipcluster stop --cluster-id=popgen`\n", "\n", "ipyclient = ipp.Client(cluster_id=\"popgen\")\n", "print(len(ipyclient))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# The necessary data file is the HDF5 formatted sequence file (in the output directory)\n", "data = \"/tmp/ipyrad-test/wat_outfiles/wat.seqs.hdf5\"\n", "\n", "# popgen tools can also accept an ipyrad assembly\n", "#data = ipyrad.load_json(\"/tmp/ipyrad-test/wat.json\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# Dictionary mapping samples to populations\n", "imap = {\n", " \"pop1\" : [\"1A_0\", \"1B_0\", \"1C_0\", \"1D_0\"],\n", " \"pop2\" : [\"2E_0\", \"2F_0\", \"2G_0\", \"2H_0\"],\n", " \"pop3\" : [\"3I_0\", \"3J_0\", \"3K_0\", \"3L_0\"],\n", "}" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parallel connection | bobolink: 12 cores\n", "[locus filter] full data: 1000\n", "[locus filter] post filter: 998\n", "[####################] 100% 0:00:05 | Calculating sumstats for nloci 998 \n", "[####################] 100% 0:00:01 | Collating sumstats for npops 3 \n" ] } ], "source": [ "# Create the popgen object and run it\n", "popgen = Popgen(data=data, imap=imap)\n", "popgen.run(ipyclient=ipyclient)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "between {'Dxy': pop1 pop2 pop3\n", "pop1 0.0 0.025 0.025\n", "pop2 0.0 0.000 0.024\n", "pop3 0.0 0.000 0.000, 'Fst': pop1 pop2 pop3\n", "pop1 0.0 0.372 0.416\n", "pop2 0.0 0.000 0.416\n", "pop3 0.0 0.000 0.000, 'Fst_adj': pop1 pop2 pop3\n", "pop1 0.0 0.299 0.343\n", "pop2 0.0 0.000 0.344\n", "pop3 0.0 0.000 0.000}\n", "within mean_pi mean_Watterson mean_TajimasD\n", "pop1 0.006 0.005 0.312\n", "pop2 0.006 0.005 0.279\n", "pop3 0.006 0.005 0.293" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check the results\n", "popgen.results\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 2 }