{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test Burrows Wheeler Transform (BWT)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is a simple notebook to test the implementation of the BWT."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Burrows_Wheeler_Transform import transform\n",
    "from Burrows_Wheeler_Transform import inverse\n",
    "import time"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Corectness of the implementation "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First we test the correctness of the implementation for the transform using some known BWT. We can have as input a string with or without '$'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The BWT of the string Banana is: a$nnBaa\n",
      "The BWT of the string mississippi is: ipssm$pissii\n"
     ]
    }
   ],
   "source": [
    "finalString = transform('Banana')\n",
    "if finalString == 'a$nnBaa':\n",
    "    print('The BWT of the string Banana is: %s' %(finalString))\n",
    "\n",
    "\n",
    "finalString = transform('mississippi$')\n",
    "\n",
    "if finalString == 'ipssm$pissii':\n",
    "    print('The BWT of the string mississippi is: %s' %(finalString))\n",
    "    \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then we test  the correctness of the implementation for the inverse of BWT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The original string is: Banana\n",
      "The original string is: mississippi\n"
     ]
    }
   ],
   "source": [
    "originalString = inverse('a$nnBaa')\n",
    "if originalString == 'Banana':\n",
    "    print('The original string is: %s' %(originalString))\n",
    "    \n",
    "\n",
    "originalString = inverse('ipssm$pissii')\n",
    "if originalString == 'mississippi':\n",
    "    print('The original string is: %s' %(originalString))\n",
    "       "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Common error\n",
    "This is a list of common error that an user could make "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "ename": "invalid_string_error",
     "evalue": "The input mississippi$$ cointains more than one $",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31minvalid_string_error\u001b[0m                      Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-4-b7d159470ec6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# transform with more then one $\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mfinalString\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'mississippi$$'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfinalString\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'ipssm$pissii'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'The BWT of the string mississippi is: %s'\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfinalString\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Documents/Università/Magistrale/Primo anno/Secondo semestre/scientific programming/ProgrammingPython/Burrows_Wheeler_Transform/functions.py\u001b[0m in \u001b[0;36mtransform\u001b[0;34m(string)\u001b[0m\n\u001b[1;32m     18\u001b[0m     \u001b[0;31m# the string must contains only one termination char\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     19\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mstring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'$'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0minvalid_string_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'The input '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstring\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' cointains more than one $'\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     21\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     22\u001b[0m     \u001b[0;31m# check if the last char contains the '$' otherwhise add it\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31minvalid_string_error\u001b[0m: The input mississippi$$ cointains more than one $"
     ]
    }
   ],
   "source": [
    "# transform with more then one $\n",
    "finalString = transform('mississippi$$')\n",
    "\n",
    "if finalString == 'ipssm$pissii':\n",
    "    print('The BWT of the string mississippi is: %s' %(finalString))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "ename": "invalid_string_error",
     "evalue": "The input annBaa does not contain exactly one $ character",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31minvalid_string_error\u001b[0m                      Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-5-163491cab8ac>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# inverse string without the $ character\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0moriginalString\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minverse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'annBaa'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moriginalString\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'Banana'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'The original string is: %s'\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moriginalString\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Documents/Università/Magistrale/Primo anno/Secondo semestre/scientific programming/ProgrammingPython/Burrows_Wheeler_Transform/functions.py\u001b[0m in \u001b[0;36minverse\u001b[0;34m(bwt)\u001b[0m\n\u001b[1;32m     44\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mnot_a_string_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'The input '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mbwt\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' is not a string'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     45\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mbwt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'$'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 46\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0minvalid_string_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'The input '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mbwt\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' does not contain exactly one $ character'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     48\u001b[0m     \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbwt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31minvalid_string_error\u001b[0m: The input annBaa does not contain exactly one $ character"
     ]
    }
   ],
   "source": [
    "# inverse string without the $ character \n",
    "originalString = inverse('annBaa')\n",
    "if originalString == 'Banana':\n",
    "    print('The original string is: %s' %(originalString))    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Performance test "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now that we have test the correctness of the implementation we will test the speed of this implementation, in particular we are interesting in smoothly handle string with 1000 characters."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('TestSample/oneThousandChar.txt') as file:\n",
    "    oneChar = file.read().replace('\\n', '')\n",
    "\n",
    " \n",
    "  \n",
    "with open('TestSample/twoThousandChar.txt') as file:\n",
    "    twoChar = file.read().replace('\\n', '')\n",
    "\n",
    "\n",
    "with open('TestSample/twelveThousandChar.txt') as file:\n",
    "    twelveChar = file.read().replace('\\n', '')\n",
    "    \n",
    "with open('TestSample/twentyFourChar.txt') as file:\n",
    "    twentyFourChar = file.read().replace('\\n', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The time for the transform is: \n",
      "CPU times: user 11.5 ms, sys: 4.61 ms, total: 16.1 ms\n",
      "Wall time: 14.4 ms\n",
      "The time for the inverse is: \n",
      "CPU times: user 23.5 ms, sys: 2.77 ms, total: 26.3 ms\n",
      "Wall time: 24.9 ms\n"
     ]
    }
   ],
   "source": [
    "# compute the time for the transform of one thousand characters \n",
    "print('The time for the transform is: ')\n",
    "%time finalString = transform(oneChar)\n",
    "\n",
    "# compute the time for the inverse of one thousand characters \n",
    "print('The time for the inverse is: ')\n",
    "%time inverseTransf = inverse(finalString)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The time for the transform is: \n",
      "CPU times: user 36 ms, sys: 15.9 ms, total: 51.9 ms\n",
      "Wall time: 51 ms\n",
      "The time for the inverse is: \n"
     ]
    }
   ],
   "source": [
    "# compute the time for the transform of two thousand characters \n",
    "print('The time for the transform is: ')\n",
    "%time finalString = transform(twoChar)\n",
    "\n",
    "# compute the time for the inverse of two thousand characters \n",
    "print('The time for the inverse is: ')\n",
    "inverseTransf = inverse(finalString)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The time for the transform is: \n",
      "CPU times: user 1.19 s, sys: 470 ms, total: 1.66 s\n",
      "Wall time: 1.66 s\n",
      "The time for the inverse is: \n",
      "CPU times: user 1.61 s, sys: 10.8 ms, total: 1.63 s\n",
      "Wall time: 1.64 s\n"
     ]
    }
   ],
   "source": [
    "# compute the time for the transform of twelve thousand characters\n",
    "print('The time for the transform is: ')\n",
    "%time finalString = transform(twelveChar)\n",
    "\n",
    "# compute the time for the inverse of twelve thousand characters \n",
    "print('The time for the inverse is: ')\n",
    "%time inverseTransf = inverse(finalString)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The time for the transform is: \n",
      "CPU times: user 1.21 s, sys: 451 ms, total: 1.66 s\n",
      "Wall time: 1.66 s\n",
      "The time for the inverse is: \n",
      "CPU times: user 1.6 s, sys: 7.79 ms, total: 1.61 s\n",
      "Wall time: 1.62 s\n"
     ]
    }
   ],
   "source": [
    "# compute the time for the transform of twenty-four thousand characters \n",
    "print('The time for the transform is: ')\n",
    "%time finalString = transform(twentyFourChar)\n",
    "\n",
    "\n",
    "# compute the time for the inverse of twenty-four thousand characters \n",
    "print('The time for the inverse is: ')\n",
    "%time inverseTransf = inverse(finalString)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}