{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "f = open('input/blosum62_affine.txt', 'r')\n",
    "score = [line.strip().split() for line in f]\n",
    "alphabet = score[0]\n",
    "blosum62 = [i[1:] for i in score[1:]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'5'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "blosum62[alphabet.index('R')][alphabet.index('R')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#!/usr/bin/env python\n",
    "'''A Bioinformatics Algorithms script containing scoring matrices.'''\n",
    "\n",
    "import os\n",
    "\n",
    "\n",
    "class BLOSUM62(object):\n",
    "    \"\"\"The BLOSUM62 scoring matrix class.\"\"\"\n",
    "\n",
    "    def __init__(self):\n",
    "        \"\"\"Initialize the scoring matrix.\"\"\"\n",
    "        with open(os.path.join(os.path.dirname(__file__), 'input/BLOSUM62.txt')) as input_data:\n",
    "            items = [line.strip().split() for line in input_data.readlines()]\n",
    "            self.scoring_matrix = {(item[0], item[1]):int(item[2]) for item in items}\n",
    "\n",
    "    def __getitem__(self, pair):\n",
    "        \"\"\"Returns the score of the given pair of protein.\"\"\"\n",
    "        return self.scoring_matrix[pair[0], pair[1]]\n",
    "\n",
    "\n",
    "class PAM250(object):\n",
    "    \"\"\"The PAM250 scoring matrix class.\"\"\"\n",
    "\n",
    "    def __init__(self):\n",
    "        \"\"\"Initialize the scoring matrix.\"\"\"\n",
    "        with open(os.path.join(os.path.dirname(__file__), 'data/PAM250.txt')) as input_data:\n",
    "            items = [line.strip().split() for line in input_data.readlines()]\n",
    "            self.scoring_matrix = {(item[0], item[1]):int(item[2]) for item in items}\n",
    "\n",
    "    def __getitem__(self, pair):\n",
    "        \"\"\"Returns the score of the given pair of protein.\"\"\"\n",
    "        return self.scoring_matrix[pair[0], pair[1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "global name '__file__' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-13-63b2c8c90850>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mBLOSUM62\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-11-bc2495e10efd>\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     10\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m         \u001b[0;34m\"\"\"Initialize the scoring matrix.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m         \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdirname\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m__file__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'data/BLOSUM62.txt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0minput_data\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     13\u001b[0m             \u001b[0mitems\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mline\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minput_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadlines\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscoring_matrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mitem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mitems\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: global name '__file__' is not defined"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def global_alignment_affine_gap_penalty(v, w, scoring_matrix, sigma, epsilon):\n",
    "    '''Returns the global alignment score of v and w with constant gap peantaly sigma subject to the scoring_matrix.'''\n",
    "    # Initialize the matrices.\n",
    "    S = [[[0 for j in xrange(len(w)+1)] for i in xrange(len(v)+1)] for k in xrange(3)]\n",
    "    backtrack = [[[0 for j in xrange(len(w)+1)] for i in xrange(len(v)+1)] for k in xrange(3)]\n",
    "\n",
    "    # Initialize the edges with the given penalties.\n",
    "    for i in xrange(1, len(v)+1):\n",
    "        S[0][i][0] = -sigma - (i-1)*epsilon\n",
    "        S[1][i][0] = -sigma - (i-1)*epsilon\n",
    "        S[2][i][0] = -10*sigma\n",
    "    for j in xrange(1, len(w)+1):\n",
    "        S[2][0][j] = -sigma - (j-1)*epsilon\n",
    "        S[1][0][j] = -sigma - (j-1)*epsilon\n",
    "        S[0][0][j] = -10*sigma\n",
    "\n",
    "    # Fill in the scores for the lower, middle, upper, and backtrack matrices.\n",
    "    for i in xrange(1, len(v)+1):\n",
    "        for j in xrange(1, len(w)+1):\n",
    "            lower_scores = [S[0][i-1][j] - epsilon, S[1][i-1][j] - sigma]\n",
    "            S[0][i][j] = max(lower_scores)\n",
    "            backtrack[0][i][j] = lower_scores.index(S[0][i][j])\n",
    "\n",
    "            upper_scores = [S[2][i][j-1] - epsilon, S[1][i][j-1] - sigma]\n",
    "            S[2][i][j] = max(upper_scores)\n",
    "            backtrack[2][i][j] = upper_scores.index(S[2][i][j])\n",
    "\n",
    "            middle_scores = [S[0][i][j], S[1][i-1][j-1] + scoring_matrix[v[i-1], w[j-1]], S[2][i][j]]\n",
    "            S[1][i][j] = max(middle_scores)\n",
    "            backtrack[1][i][j] = middle_scores.index(S[1][i][j])\n",
    "\n",
    "   # Initialize the values of i, j and the aligned sequences.\n",
    "    i,j = len(v), len(w)\n",
    "    v_aligned, w_aligned = v, w\n",
    "\n",
    "    # Get the maximum score, and the corresponding backtrack starting position.\n",
    "    matrix_scores = [S[0][i][j], S[1][i][j], S[2][i][j]]\n",
    "    max_score = max(matrix_scores)\n",
    "    backtrack_matrix = matrix_scores.index(max_score)\n",
    "\n",
    "    # Quick lambda function to insert indels.\n",
    "    insert_indel = lambda word, i: word[:i] + '-' + word[i:]\n",
    "\n",
    "    # Backtrack to the edge of the matrix starting bottom right.\n",
    "    while i*j != 0:\n",
    "        if backtrack_matrix == 0:  # Lower backtrack matrix conditions.\n",
    "            if backtrack[0][i][j] == 1:\n",
    "                backtrack_matrix = 1\n",
    "            i -= 1\n",
    "            w_aligned = insert_indel(w_aligned, j)\n",
    "\n",
    "        elif backtrack_matrix == 1:  # Middle backtrack matrix conditions.\n",
    "            if backtrack[1][i][j] == 0:\n",
    "                backtrack_matrix = 0\n",
    "            elif backtrack[1][i][j] == 2:\n",
    "                backtrack_matrix = 2\n",
    "            else:\n",
    "                i -= 1\n",
    "                j -= 1\n",
    "\n",
    "        else:  # Upper backtrack matrix conditions.\n",
    "            if backtrack[2][i][j] == 1:\n",
    "                backtrack_matrix = 1\n",
    "            j -= 1\n",
    "            v_aligned = insert_indel(v_aligned, i)\n",
    "\n",
    "    # Prepend the necessary preceeding indels to get to (0,0).\n",
    "    for _ in xrange(i):\n",
    "        w_aligned = insert_indel(w_aligned, 0)\n",
    "    for _ in xrange(j):\n",
    "        v_aligned = insert_indel(v_aligned, 0)\n",
    "\n",
    "    return str(max_score), v_aligned, w_aligned"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "'type' object has no attribute '__getitem__'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-23-1b8f717a7bbe>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'PRTEINS'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'PRTWPSEIN'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mglobal_alignment_affine_gap_penalty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBLOSUM62\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m11\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-22-7e38bfcb39f2>\u001b[0m in \u001b[0;36mglobal_alignment_affine_gap_penalty\u001b[0;34m(v, w, scoring_matrix, sigma, epsilon)\u001b[0m\n\u001b[1;32m     26\u001b[0m             \u001b[0mbacktrack\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mupper_scores\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m             \u001b[0mmiddle_scores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mscoring_matrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     29\u001b[0m             \u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiddle_scores\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     30\u001b[0m             \u001b[0mbacktrack\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmiddle_scores\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: 'type' object has no attribute '__getitem__'"
     ]
    }
   ],
   "source": [
    "v = 'PRTEINS'\n",
    "w = 'PRTWPSEIN'\n",
    "global_alignment_affine_gap_penalty(v, w, BLOSUM62, 11, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}