{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Test Burrows Wheeler Transform (BWT)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This is a simple notebook to test the implementation of the BWT." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from Burrows_Wheeler_Transform import transform\n", "from Burrows_Wheeler_Transform import inverse\n", "import time" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "first we test the correctness of the implementation using some known BWT\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The BWT of the string Banana is: a$nnBaa\n", "The BWT of the string mississippi is: ipssm$pissii\n" ] } ], "source": [ "finalString = transform('Banana')\n", "if finalString == 'a$nnBaa':\n", " print('The BWT of the string Banana is: %s' %(finalString))\n", " \n", "\n", "finalString = transform('mississippi')\n", "\n", "if finalString == 'ipssm$pissii':\n", " print('The BWT of the string mississippi is: %s' %(finalString))\n", " \n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now that we have thast the correctness of the implementation we will test the speed of this implementation, in particular we are interesting in smoothly handle string with 1000 characters." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "with open('TestSample/oneThousandChar.txt') as file:\n", " oneChar = file.read().replace('\\n', '')\n", "\n", " \n", " \n", "with open('TestSample/twoThousandChar.txt') as file:\n", " twoChar = file.read().replace('\\n', '')\n", "\n", "\n", "with open('TestSample/twelveThousandChar.txt') as file:\n", " twelveChar = file.read().replace('\\n', '')\n", " \n", "with open('TestSample/twentyFourChar.txt') as file:\n", " twentyFourChar = file.read().replace('\\n', '')\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Execution time for the transform of the one thousand characters string is: 0.014513969421386719 seconds\n", "Execution time for the inverse of the one thousand characters string is: 0.020147085189819336 seconds\n" ] } ], "source": [ "start_time = time.time()\n", "finalString = transform(oneChar)\n", "end_time = time.time()\n", "print(\"Execution time BWT of the one thousand characters string: %s seconds\" % (end_time - start_time))\n", "\n", "\n", "start_time = time.time()\n", "inverseTransf = inverse(finalString)\n", "end_time = time.time()\n", "\n", "print(\"Execution time IBWT of the one thousand characters string: %s seconds\" % (end_time - start_time))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Execution time for the transform of the two thousand characters string is: 0.05203890800476074 seconds\n", "Execution time for the inverse of the two thousand characters string is: 0.05255603790283203 seconds\n" ] } ], "source": [ "start_time = time.time()\n", "finalString = transform(twoChar)\n", "end_time = time.time()\n", "print(\"Execution time BWT of the two thousand characters: %s seconds\" % (end_time - start_time))\n", "\n", "\n", "start_time = time.time()\n", "inverseTransf = inverse(finalString)\n", "end_time = time.time()\n", "\n", "print(\"Execution time IBWT of the two thousand characters: %s seconds\" % (end_time - start_time))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Execution time for the transform of the twelve thousand characters string is: 1.6825170516967773 seconds\n", "Execution time for the inverse of the twelve thousand characters string is: 1.6775963306427002 seconds\n" ] } ], "source": [ "start_time = time.time()\n", "finalString = transform(twelveChar)\n", "end_time = time.time()\n", "print(\"Execution time BWT of the twelve thousand characters: %s seconds\" % (end_time - start_time))\n", "\n", "\n", "start_time = time.time()\n", "inverseTransf = inverse(finalString)\n", "end_time = time.time()\n", "\n", "print(\"Execution time IBWT of the twelve thousand characters: %s seconds\" % (end_time - start_time))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Execution time for the transform of the twenty-four thousand characters string is: 1.7439227104187012 seconds\n", "Execution time for the inverse of the twenty-four thousand characters string is: 1.5500197410583496 seconds\n" ] } ], "source": [ "start_time = time.time()\n", "finalString = transform(twentyFourChar)\n", "end_time = time.time()\n", "print(\"Execution time BWT of the twenty-four thousand characters: %s seconds\" % (end_time - start_time))\n", "\n", "\n", "start_time = time.time()\n", "inverseTransf = inverse(finalString)\n", "end_time = time.time()\n", "\n", "print(\"Execution time IBWT of the twenty-four thousand characters: %s seconds\" % (end_time - start_time))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }