{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ipyrad as ip" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "New Assembly: test\n" ] } ], "source": [ "data = ip.Assembly(\"test\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data.set_params(\"project_dir\", \"sixseven\")\n", "data.set_params(\"raw_fastq_path\", \"./ipsimdata/rad_example_R1_.fastq.gz\")\n", "data.set_params(\"barcodes_path\", \"./ipsimdata/rad_example_barcodes.txt\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Assembly: test\n", "[####################] 100% sorting reads | 0:00:02 | s1 | \n", "[####################] 100% writing/compressing | 0:00:00 | s1 | \n", "[####################] 100% processing reads | 0:00:02 | s2 | \n", "[####################] 100% dereplicating | 0:00:00 | s3 | \n", "[####################] 100% clustering | 0:00:01 | s3 | \n", "[####################] 100% building clusters | 0:00:00 | s3 | \n", "[####################] 100% chunking | 0:00:00 | s3 | \n", "[####################] 100% aligning | 0:00:10 | s3 | \n", "[####################] 100% concatenating | 0:00:00 | s3 | \n", "[####################] 100% inferring [H, E] | 0:00:03 | s4 | \n", "[####################] 100% calculating depths | 0:00:00 | s5 | \n", "[####################] 100% chunking clusters | 0:00:00 | s5 | \n", "[####################] 100% consens calling | 0:00:13 | s5 | \n" ] } ], "source": [ "data.run(\"12345\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set populations" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'1': (3, ['1A_0', '1B_0', '1C_0', '1D_0']),\n", " '2': (3, ['2G_0', '2E_0', '2F_0', '2H_0']),\n", " '3': (3, ['3K_0', '3J_0', '3I_0', '3L_0'])}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# define the groups\n", "popdata = {\n", " \"1\": [i for i in data.samples if \"1\" in i], \n", " \"2\": [i for i in data.samples if \"2\" in i],\n", " \"3\": [i for i in data.samples if \"3\" in i],\n", " }\n", "\n", "# mincov values for the groups\n", "popmins = {\"1\": 3, \"2\": 3, \"3\": 3}\n", "\n", "# link the two dictionaries into a populations attribute\n", "data._link_populations(popdict=popdata, popmins=popmins)\n", "\n", "# view populations\n", "data.populations" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### run six and seven with pops" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Assembly: test\n", "[####################] 100% concat/shuffle input | 0:00:00 | s6 | \n", "[####################] 100% clustering across | 0:00:01 | s6 | \n", "[####################] 100% building clusters | 0:00:00 | s6 | \n", "[####################] 100% aligning clusters | 0:00:04 | s6 | \n", "[####################] 100% database indels | 0:00:00 | s6 | \n", "[####################] 100% indexing clusters | 0:00:01 | s6 | \n", "[####################] 100% building database | 0:00:00 | s6 | \n" ] } ], "source": [ "data.run(\"6\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Assembly: test\n", "[####################] 100% filtering loci | 0:00:06 | s7 | \n", "[####################] 100% building loci/stats | 0:00:00 | s7 | \n", "[####################] 100% building vcf file | 0:00:03 | s7 | \n", "[####################] 100% writing vcf file | 0:00:00 | s7 | \n", "[####################] 100% building arrays | 0:00:00 | s7 | \n", "[####################] 100% writing outfiles | 0:00:00 | s7 | \n", "Outfiles written to: ~/scratch/ipyrad-hot/tests/sixseven/test_outfiles\n", "\n" ] } ], "source": [ "data.run(\"7\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### But, now if we branch it breaks" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Assembly: d2\n", "[####################] 100% filtering loci | 0:00:00 | s7 | \n", "\n", " Encountered an unexpected error (see ./ipyrad_log.txt)\n", " Error message is below -------------------------------\n", "error in filter_stacks on chunk 0: IndexError(index 11 is out of bounds for axis 1 with size 11)\n" ] } ], "source": [ "# branch assembly\n", "d2 = data.branch(\"d2\", subsamples=[i for i in data.samples if i != \"1A_0\"])\n", "\n", "# update pops to match new samples \n", "d2.populations['1'] = (3, ['1B_0', '1C_0', '1D_0'])\n", "\n", "# run 7\n", "d2.run(\"7\", force=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The same above retried after fix to code" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ipyrad as ip" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "loading Assembly: d2\n", "from saved path: ~/scratch/ipyrad-hot/tests/sixseven/d2.json\n", "Assembly: d2\n", "[####################] 100% filtering loci | 0:00:07 | s7 | \n", "[####################] 100% building loci/stats | 0:00:00 | s7 | \n", "[####################] 100% building vcf file | 0:00:02 | s7 | \n", "[####################] 100% writing vcf file | 0:00:00 | s7 | \n", "[####################] 100% building arrays | 0:00:00 | s7 | \n", "[####################] 100% writing outfiles | 0:00:00 | s7 | \n", "Outfiles written to: ~/scratch/ipyrad-hot/tests/sixseven/d2_outfiles\n", "\n" ] } ], "source": [ "d2 = ip.load_json(\"sixseven/d2.json\")\n", "d2.run(\"7\", force=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### modify pops again" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Assembly: d2\n", "[####################] 100% filtering loci | 0:00:00 | s7 | \n", "[####################] 100% building loci/stats | 0:00:00 | s7 | \n", "[####################] 100% building vcf file | 0:00:01 | s7 | \n", "[####################] 100% writing vcf file | 0:00:00 | s7 | \n", "[####################] 100% building arrays | 0:00:00 | s7 | \n", "[####################] 100% writing outfiles | 0:00:00 | s7 | \n", "Outfiles written to: ~/scratch/ipyrad-hot/tests/sixseven/d2_outfiles\n", "\n" ] } ], "source": [ "# update pops to be diff from samples\n", "d2.populations['1'] = (2, ['1C_0', '1D_0'])\n", "\n", "# run 7\n", "d2.run(\"7\", force=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Assembly: d3\n", "[####################] 100% filtering loci | 0:00:00 | s7 | \n", "[####################] 100% building loci/stats | 0:00:00 | s7 | \n", "[####################] 100% building vcf file | 0:00:01 | s7 | \n", "[####################] 100% writing vcf file | 0:00:00 | s7 | \n", "[####################] 100% building arrays | 0:00:00 | s7 | \n", "[####################] 100% writing outfiles | 0:00:00 | s7 | \n", "Outfiles written to: ~/scratch/ipyrad-hot/tests/sixseven/d3_outfiles\n", "\n" ] } ], "source": [ "# update pops to be diff from samples\n", "d3 = d2.branch(\"d3\", subsamples=[i for i in d2.samples if i != \"1B_0\"])\n", "d3.populations['1'] = (2, ['1C_0', '1D_0'])\n", "\n", "# run 7\n", "d3.run(\"7\", force=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Py2 (hot)", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.15" } }, "nbformat": 4, "nbformat_minor": 1 }