{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import ipyrad as ip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "New Assembly: test\n"
     ]
    }
   ],
   "source": [
    "data = ip.Assembly(\"test\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.set_params(\"project_dir\", \"sixseven\")\n",
    "data.set_params(\"raw_fastq_path\", \"./ipsimdata/rad_example_R1_.fastq.gz\")\n",
    "data.set_params(\"barcodes_path\", \"./ipsimdata/rad_example_barcodes.txt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Assembly: test\n",
      "[####################] 100%  sorting reads         | 0:00:02 | s1 | \n",
      "[####################] 100%  writing/compressing   | 0:00:00 | s1 | \n",
      "[####################] 100%  processing reads      | 0:00:02 | s2 | \n",
      "[####################] 100%  dereplicating         | 0:00:00 | s3 | \n",
      "[####################] 100%  clustering            | 0:00:01 | s3 | \n",
      "[####################] 100%  building clusters     | 0:00:00 | s3 | \n",
      "[####################] 100%  chunking              | 0:00:00 | s3 | \n",
      "[####################] 100%  aligning              | 0:00:10 | s3 | \n",
      "[####################] 100%  concatenating         | 0:00:00 | s3 | \n",
      "[####################] 100%  inferring [H, E]      | 0:00:03 | s4 | \n",
      "[####################] 100%  calculating depths    | 0:00:00 | s5 | \n",
      "[####################] 100%  chunking clusters     | 0:00:00 | s5 | \n",
      "[####################] 100%  consens calling       | 0:00:13 | s5 | \n"
     ]
    }
   ],
   "source": [
    "data.run(\"12345\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set populations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'1': (3, ['1A_0', '1B_0', '1C_0', '1D_0']),\n",
       " '2': (3, ['2G_0', '2E_0', '2F_0', '2H_0']),\n",
       " '3': (3, ['3K_0', '3J_0', '3I_0', '3L_0'])}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# define the groups\n",
    "popdata = {\n",
    "    \"1\": [i for i in data.samples if \"1\" in i], \n",
    "    \"2\": [i for i in data.samples if \"2\" in i],\n",
    "    \"3\": [i for i in data.samples if \"3\" in i],\n",
    "    }\n",
    "\n",
    "# mincov values for the groups\n",
    "popmins = {\"1\": 3, \"2\": 3, \"3\": 3}\n",
    "\n",
    "# link the two dictionaries into a populations attribute\n",
    "data._link_populations(popdict=popdata, popmins=popmins)\n",
    "\n",
    "# view populations\n",
    "data.populations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### run six and seven with pops"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Assembly: test\n",
      "[####################] 100%  concat/shuffle input  | 0:00:00 | s6 | \n",
      "[####################] 100%  clustering across     | 0:00:01 | s6 | \n",
      "[####################] 100%  building clusters     | 0:00:00 | s6 | \n",
      "[####################] 100%  aligning clusters     | 0:00:04 | s6 | \n",
      "[####################] 100%  database indels       | 0:00:00 | s6 | \n",
      "[####################] 100%  indexing clusters     | 0:00:01 | s6 | \n",
      "[####################] 100%  building database     | 0:00:00 | s6 | \n"
     ]
    }
   ],
   "source": [
    "data.run(\"6\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Assembly: test\n",
      "[####################] 100%  filtering loci        | 0:00:06 | s7 | \n",
      "[####################] 100%  building loci/stats   | 0:00:00 | s7 | \n",
      "[####################] 100%  building vcf file     | 0:00:03 | s7 | \n",
      "[####################] 100%  writing vcf file      | 0:00:00 | s7 | \n",
      "[####################] 100%  building arrays       | 0:00:00 | s7 | \n",
      "[####################] 100%  writing outfiles      | 0:00:00 | s7 | \n",
      "Outfiles written to: ~/scratch/ipyrad-hot/tests/sixseven/test_outfiles\n",
      "\n"
     ]
    }
   ],
   "source": [
    "data.run(\"7\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### But, now if we branch it breaks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Assembly: d2\n",
      "[####################] 100%  filtering loci        | 0:00:00 | s7 | \n",
      "\n",
      "  Encountered an unexpected error (see ./ipyrad_log.txt)\n",
      "  Error message is below -------------------------------\n",
      "error in filter_stacks on chunk 0: IndexError(index 11 is out of bounds for axis 1 with size 11)\n"
     ]
    }
   ],
   "source": [
    "# branch assembly\n",
    "d2 = data.branch(\"d2\", subsamples=[i for i in data.samples if i != \"1A_0\"])\n",
    "\n",
    "# update pops to match new samples \n",
    "d2.populations['1'] = (3, ['1B_0', '1C_0', '1D_0'])\n",
    "\n",
    "# run 7\n",
    "d2.run(\"7\", force=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### The same above retried after fix to code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import ipyrad as ip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading Assembly: d2\n",
      "from saved path: ~/scratch/ipyrad-hot/tests/sixseven/d2.json\n",
      "Assembly: d2\n",
      "[####################] 100%  filtering loci        | 0:00:07 | s7 | \n",
      "[####################] 100%  building loci/stats   | 0:00:00 | s7 | \n",
      "[####################] 100%  building vcf file     | 0:00:02 | s7 | \n",
      "[####################] 100%  writing vcf file      | 0:00:00 | s7 | \n",
      "[####################] 100%  building arrays       | 0:00:00 | s7 | \n",
      "[####################] 100%  writing outfiles      | 0:00:00 | s7 | \n",
      "Outfiles written to: ~/scratch/ipyrad-hot/tests/sixseven/d2_outfiles\n",
      "\n"
     ]
    }
   ],
   "source": [
    "d2 = ip.load_json(\"sixseven/d2.json\")\n",
    "d2.run(\"7\", force=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### modify pops again"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Assembly: d2\n",
      "[####################] 100%  filtering loci        | 0:00:00 | s7 | \n",
      "[####################] 100%  building loci/stats   | 0:00:00 | s7 | \n",
      "[####################] 100%  building vcf file     | 0:00:01 | s7 | \n",
      "[####################] 100%  writing vcf file      | 0:00:00 | s7 | \n",
      "[####################] 100%  building arrays       | 0:00:00 | s7 | \n",
      "[####################] 100%  writing outfiles      | 0:00:00 | s7 | \n",
      "Outfiles written to: ~/scratch/ipyrad-hot/tests/sixseven/d2_outfiles\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# update pops to be diff from samples\n",
    "d2.populations['1'] = (2, ['1C_0', '1D_0'])\n",
    "\n",
    "# run 7\n",
    "d2.run(\"7\", force=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Assembly: d3\n",
      "[####################] 100%  filtering loci        | 0:00:00 | s7 | \n",
      "[####################] 100%  building loci/stats   | 0:00:00 | s7 | \n",
      "[####################] 100%  building vcf file     | 0:00:01 | s7 | \n",
      "[####################] 100%  writing vcf file      | 0:00:00 | s7 | \n",
      "[####################] 100%  building arrays       | 0:00:00 | s7 | \n",
      "[####################] 100%  writing outfiles      | 0:00:00 | s7 | \n",
      "Outfiles written to: ~/scratch/ipyrad-hot/tests/sixseven/d3_outfiles\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# update pops to be diff from samples\n",
    "d3 = d2.branch(\"d3\", subsamples=[i for i in d2.samples if i != \"1B_0\"])\n",
    "d3.populations['1'] = (2, ['1C_0', '1D_0'])\n",
    "\n",
    "# run 7\n",
    "d3.run(\"7\", force=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Py2 (hot)",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}