{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "toc": true
   },
   "source": [
    "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
    "<div class=\"toc\" style=\"margin-top: 1em;\"><ul class=\"toc-item\"><li><span><a href=\"#Test-Targets.align_and_parse\" data-toc-modified-id=\"Test-Targets.align_and_parse-1\">Test <code>Targets.align_and_parse</code></a></span><ul class=\"toc-item\"><li><span><a href=\"#Setup\" data-toc-modified-id=\"Setup-1.1\">Setup</a></span></li><li><span><a href=\"#Align-and-parse-separately\" data-toc-modified-id=\"Align-and-parse-separately-1.2\">Align and parse separately</a></span></li><li><span><a href=\"#Align-and-parse-together\" data-toc-modified-id=\"Align-and-parse-together-1.3\">Align and parse together</a></span></li></ul></li></ul></div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test `Targets.align_and_parse`\n",
    "This notebook makes sure that running `Targets.align_and_parse` returns the same output as running `Targets.align` and `Targets.parse_alignment` separately."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup\n",
    "Import Python modules:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import warnings\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "import alignparse.ccs\n",
    "import alignparse.minimap2\n",
    "import alignparse.targets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Suppress warnings that clutter output:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "warnings.simplefilter('ignore')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Directory for output:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "outdir_base = '_temp_check_Targets_align_and_parse'\n",
    "outdir_separate = os.path.join(outdir_base, 'separate/')\n",
    "outdir_together = os.path.join(outdir_base, 'together/')\n",
    "os.makedirs(outdir_separate, exist_ok=True)\n",
    "os.makedirs(outdir_together, exist_ok=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Get Target:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "recA_targetfile = '../notebooks/input_files/recA_amplicon.gb'\n",
    "recA_parse_specs_file = '../notebooks/input_files/recA_feature_parse_specs.yaml'\n",
    "targets = alignparse.targets.Targets(\n",
    "                seqsfile=recA_targetfile,\n",
    "                feature_parse_specs=recA_parse_specs_file)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Get the PacBio runs:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>report</th>\n",
       "      <th>fastq</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>recA_lib-1</td>\n",
       "      <td>../notebooks/input_files/recA_lib-1_report.txt</td>\n",
       "      <td>../notebooks/input_files/recA_lib-1_ccs.fastq</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>recA_lib-2</td>\n",
       "      <td>../notebooks/input_files/recA_lib-2_report.txt</td>\n",
       "      <td>../notebooks/input_files/recA_lib-2_ccs.fastq</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         name                                          report  \\\n",
       "0  recA_lib-1  ../notebooks/input_files/recA_lib-1_report.txt   \n",
       "1  recA_lib-2  ../notebooks/input_files/recA_lib-2_report.txt   \n",
       "\n",
       "                                           fastq  \n",
       "0  ../notebooks/input_files/recA_lib-1_ccs.fastq  \n",
       "1  ../notebooks/input_files/recA_lib-2_ccs.fastq  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "run_names = ['recA_lib-1', 'recA_lib-2']\n",
    "ccs_dir = 'input_files'\n",
    "\n",
    "pacbio_runs = pd.DataFrame(\n",
    "            {'name': run_names,\n",
    "             'report': [f\"../notebooks/{ccs_dir}/{name}_report.txt\"\n",
    "                        for name in run_names],\n",
    "             'fastq': [f\"../notebooks/{ccs_dir}/{name}_ccs.fastq\"\n",
    "                       for name in run_names]\n",
    "             })\n",
    "\n",
    "pacbio_runs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a `Mapper`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "mapper = alignparse.minimap2.Mapper(alignparse.minimap2.OPTIONS_CODON_DMS)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Align and parse separately\n",
    "First, add the names of the desired alignment files to our data frame:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>report</th>\n",
       "      <th>fastq</th>\n",
       "      <th>alignments</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>recA_lib-1</td>\n",
       "      <td>../notebooks/input_files/recA_lib-1_report.txt</td>\n",
       "      <td>../notebooks/input_files/recA_lib-1_ccs.fastq</td>\n",
       "      <td>_temp_check_Targets_align_and_parse/separate/r...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>recA_lib-2</td>\n",
       "      <td>../notebooks/input_files/recA_lib-2_report.txt</td>\n",
       "      <td>../notebooks/input_files/recA_lib-2_ccs.fastq</td>\n",
       "      <td>_temp_check_Targets_align_and_parse/separate/r...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         name                                          report  \\\n",
       "0  recA_lib-1  ../notebooks/input_files/recA_lib-1_report.txt   \n",
       "1  recA_lib-2  ../notebooks/input_files/recA_lib-2_report.txt   \n",
       "\n",
       "                                           fastq  \\\n",
       "0  ../notebooks/input_files/recA_lib-1_ccs.fastq   \n",
       "1  ../notebooks/input_files/recA_lib-2_ccs.fastq   \n",
       "\n",
       "                                          alignments  \n",
       "0  _temp_check_Targets_align_and_parse/separate/r...  \n",
       "1  _temp_check_Targets_align_and_parse/separate/r...  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pacbio_runs = pacbio_runs.assign(alignments=lambda x: outdir_separate +\n",
    "                                 x['name'] + '_alignments.sam')\n",
    "\n",
    "pacbio_runs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now use the mapper to actually align the FASTQ queries to the target:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "for tup in pacbio_runs.itertuples(index=False):\n",
    "    targets.align(queryfile=tup.fastq,\n",
    "                  alignmentfile=tup.alignments,\n",
    "                  mapper=mapper)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Parse the alignments:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "readstats = []\n",
    "aligned = {targetname: [] for targetname in targets.target_names}\n",
    "filtered = {targetname: [] for targetname in targets.target_names}\n",
    "\n",
    "for run in pacbio_runs.itertuples():\n",
    "    \n",
    "    run_readstats, run_aligned, run_filtered = targets.parse_alignment(run.alignments)\n",
    "    \n",
    "    # when concatenating add the run name to keep track of runs for results\n",
    "    readstats.append(run_readstats\n",
    "                     .assign(name=run.name)\n",
    "                     )\n",
    "    for targetname in targets.target_names:\n",
    "        aligned[targetname].append(run_aligned[targetname]\n",
    "                                   .assign(name=run.name)\n",
    "                                   )\n",
    "        filtered[targetname].append(run_filtered[targetname]\n",
    "                                    .assign(name=run.name)\n",
    "                                    )\n",
    "        \n",
    "# now concatenate the data frames for each run\n",
    "readstats = pd.concat(readstats, ignore_index=True, sort=False)\n",
    "for targetname in targets.target_names:\n",
    "    aligned[targetname] = pd.concat(aligned[targetname], ignore_index=True, sort=False)\n",
    "    filtered[targetname] = pd.concat(filtered[targetname], ignore_index=True, sort=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First lets look at the read stats:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category</th>\n",
       "      <th>count</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>aligned RecA_PacBio_amplicon</td>\n",
       "      <td>123</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>filtered RecA_PacBio_amplicon</td>\n",
       "      <td>16</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>unmapped</td>\n",
       "      <td>0</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>aligned RecA_PacBio_amplicon</td>\n",
       "      <td>112</td>\n",
       "      <td>recA_lib-2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>filtered RecA_PacBio_amplicon</td>\n",
       "      <td>12</td>\n",
       "      <td>recA_lib-2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>unmapped</td>\n",
       "      <td>0</td>\n",
       "      <td>recA_lib-2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        category  count        name\n",
       "0   aligned RecA_PacBio_amplicon    123  recA_lib-1\n",
       "1  filtered RecA_PacBio_amplicon     16  recA_lib-1\n",
       "2                       unmapped      0  recA_lib-1\n",
       "3   aligned RecA_PacBio_amplicon    112  recA_lib-2\n",
       "4  filtered RecA_PacBio_amplicon     12  recA_lib-2\n",
       "5                       unmapped      0  recA_lib-2"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "readstats"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now look at the information on the filtered reads.\n",
    "This is a bigger data frame, so we just look at the first few lines for the first target (of which there is only one anyway):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>query_name</th>\n",
       "      <th>filter_reason</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>m54228_180801_171631/4194459/ccs</td>\n",
       "      <td>spacer mutation_nt_count</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>m54228_180801_171631/4325806/ccs</td>\n",
       "      <td>barcode mutation_nt_count</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>m54228_180801_171631/4391313/ccs</td>\n",
       "      <td>termini3 mutation_nt_count</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>m54228_180801_171631/4391375/ccs</td>\n",
       "      <td>gene clip3</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>m54228_180801_171631/4391467/ccs</td>\n",
       "      <td>gene clip3</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         query_name               filter_reason        name\n",
       "0  m54228_180801_171631/4194459/ccs    spacer mutation_nt_count  recA_lib-1\n",
       "1  m54228_180801_171631/4325806/ccs   barcode mutation_nt_count  recA_lib-1\n",
       "2  m54228_180801_171631/4391313/ccs  termini3 mutation_nt_count  recA_lib-1\n",
       "3  m54228_180801_171631/4391375/ccs                  gene clip3  recA_lib-1\n",
       "4  m54228_180801_171631/4391467/ccs                  gene clip3  recA_lib-1"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered[targets.target_names[0]].head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally, we can look at the information for the validly aligned (not filtered) reads.\n",
    "First just look at the first few entries in the data frame for the first target:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>query_name</th>\n",
       "      <th>query_clip5</th>\n",
       "      <th>query_clip3</th>\n",
       "      <th>gene_mutations</th>\n",
       "      <th>gene_accuracy</th>\n",
       "      <th>barcode_sequence</th>\n",
       "      <th>barcode_accuracy</th>\n",
       "      <th>variant_tag5_sequence</th>\n",
       "      <th>variant_tag3_sequence</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>m54228_180801_171631/4391577/ccs</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>C100A T102A G658C C659T del840to840</td>\n",
       "      <td>0.999455</td>\n",
       "      <td>AAGATACACTCGAAATCT</td>\n",
       "      <td>1.0</td>\n",
       "      <td>G</td>\n",
       "      <td>C</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>m54228_180801_171631/4915465/ccs</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>C142G G144T T329A A738G A946T C947A</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>AAATATCATCGCGGCCAG</td>\n",
       "      <td>1.0</td>\n",
       "      <td>T</td>\n",
       "      <td>T</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>m54228_180801_171631/4981392/ccs</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>C142G G144T T329A A738G A946T C947A</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>AAATATCATCGCGGCCAG</td>\n",
       "      <td>1.0</td>\n",
       "      <td>G</td>\n",
       "      <td>C</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>m54228_180801_171631/6029553/ccs</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>T83A G84A A106T T107A G108A ins693G G862T C863...</td>\n",
       "      <td>0.999940</td>\n",
       "      <td>CTAATAGTAGTTTTCCAG</td>\n",
       "      <td>1.0</td>\n",
       "      <td>G</td>\n",
       "      <td>C</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>m54228_180801_171631/6488565/ccs</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>A254C G255T A466T T467G C468T C940G G942A</td>\n",
       "      <td>0.999967</td>\n",
       "      <td>TATTTATACCCATGAGTG</td>\n",
       "      <td>1.0</td>\n",
       "      <td>A</td>\n",
       "      <td>T</td>\n",
       "      <td>recA_lib-1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         query_name  query_clip5  query_clip3  \\\n",
       "0  m54228_180801_171631/4391577/ccs            1            0   \n",
       "1  m54228_180801_171631/4915465/ccs            0            0   \n",
       "2  m54228_180801_171631/4981392/ccs            0            0   \n",
       "3  m54228_180801_171631/6029553/ccs            0            0   \n",
       "4  m54228_180801_171631/6488565/ccs            0            0   \n",
       "\n",
       "                                      gene_mutations  gene_accuracy  \\\n",
       "0                C100A T102A G658C C659T del840to840       0.999455   \n",
       "1                C142G G144T T329A A738G A946T C947A       1.000000   \n",
       "2                C142G G144T T329A A738G A946T C947A       1.000000   \n",
       "3  T83A G84A A106T T107A G108A ins693G G862T C863...       0.999940   \n",
       "4          A254C G255T A466T T467G C468T C940G G942A       0.999967   \n",
       "\n",
       "     barcode_sequence  barcode_accuracy variant_tag5_sequence  \\\n",
       "0  AAGATACACTCGAAATCT               1.0                     G   \n",
       "1  AAATATCATCGCGGCCAG               1.0                     T   \n",
       "2  AAATATCATCGCGGCCAG               1.0                     G   \n",
       "3  CTAATAGTAGTTTTCCAG               1.0                     G   \n",
       "4  TATTTATACCCATGAGTG               1.0                     A   \n",
       "\n",
       "  variant_tag3_sequence        name  \n",
       "0                     C  recA_lib-1  \n",
       "1                     T  recA_lib-1  \n",
       "2                     C  recA_lib-1  \n",
       "3                     C  recA_lib-1  \n",
       "4                     T  recA_lib-1  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "aligned[targets.target_names[0]].head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Align and parse together\n",
    "Use `Targets.align_and_parse`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "readstats2, aligned2, filtered2 = targets.align_and_parse(\n",
    "        df=pacbio_runs,\n",
    "        mapper=mapper,\n",
    "        outdir=outdir_together,\n",
    "        name_col='name',\n",
    "        queryfile_col='fastq',\n",
    "        overwrite=True,\n",
    "        ncpus=2,\n",
    "        )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make sure the data frames are the same:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.testing.assert_frame_equal(readstats, readstats2, check_like=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.testing.assert_frame_equal(aligned[targets.target_names[0]],\n",
    "                              aligned2[targets.target_names[0]],\n",
    "                              check_like=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.testing.assert_frame_equal(filtered[targets.target_names[0]],\n",
    "                              filtered2[targets.target_names[0]],\n",
    "                              check_like=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  },
  "toc": {
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": false,
   "toc_cell": true,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}