{ "cells": [ { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f = open('input/blosum62_affine.txt', 'r')\n", "score = [line.strip().split() for line in f]\n", "alphabet = score[0]\n", "blosum62 = [i[1:] for i in score[1:]]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'5'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "blosum62[alphabet.index('R')][alphabet.index('R')]" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#!/usr/bin/env python\n", "'''A Bioinformatics Algorithms script containing scoring matrices.'''\n", "\n", "import os\n", "\n", "\n", "class BLOSUM62(object):\n", " \"\"\"The BLOSUM62 scoring matrix class.\"\"\"\n", "\n", " def __init__(self):\n", " \"\"\"Initialize the scoring matrix.\"\"\"\n", " with open(os.path.join(os.path.dirname(__file__), 'input/BLOSUM62.txt')) as input_data:\n", " items = [line.strip().split() for line in input_data.readlines()]\n", " self.scoring_matrix = {(item[0], item[1]):int(item[2]) for item in items}\n", "\n", " def __getitem__(self, pair):\n", " \"\"\"Returns the score of the given pair of protein.\"\"\"\n", " return self.scoring_matrix[pair[0], pair[1]]\n", "\n", "\n", "class PAM250(object):\n", " \"\"\"The PAM250 scoring matrix class.\"\"\"\n", "\n", " def __init__(self):\n", " \"\"\"Initialize the scoring matrix.\"\"\"\n", " with open(os.path.join(os.path.dirname(__file__), 'data/PAM250.txt')) as input_data:\n", " items = [line.strip().split() for line in input_data.readlines()]\n", " self.scoring_matrix = {(item[0], item[1]):int(item[2]) for item in items}\n", "\n", " def __getitem__(self, pair):\n", " \"\"\"Returns the score of the given pair of protein.\"\"\"\n", " return self.scoring_matrix[pair[0], pair[1]]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "ename": "NameError", "evalue": "global name '__file__' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mBLOSUM62\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\"\"\"Initialize the scoring matrix.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdirname\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m__file__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'data/BLOSUM62.txt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0minput_data\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0mitems\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mline\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minput_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadlines\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscoring_matrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mitem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mitems\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: global name '__file__' is not defined" ] } ], "source": [] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def global_alignment_affine_gap_penalty(v, w, scoring_matrix, sigma, epsilon):\n", " '''Returns the global alignment score of v and w with constant gap peantaly sigma subject to the scoring_matrix.'''\n", " # Initialize the matrices.\n", " S = [[[0 for j in xrange(len(w)+1)] for i in xrange(len(v)+1)] for k in xrange(3)]\n", " backtrack = [[[0 for j in xrange(len(w)+1)] for i in xrange(len(v)+1)] for k in xrange(3)]\n", "\n", " # Initialize the edges with the given penalties.\n", " for i in xrange(1, len(v)+1):\n", " S[0][i][0] = -sigma - (i-1)*epsilon\n", " S[1][i][0] = -sigma - (i-1)*epsilon\n", " S[2][i][0] = -10*sigma\n", " for j in xrange(1, len(w)+1):\n", " S[2][0][j] = -sigma - (j-1)*epsilon\n", " S[1][0][j] = -sigma - (j-1)*epsilon\n", " S[0][0][j] = -10*sigma\n", "\n", " # Fill in the scores for the lower, middle, upper, and backtrack matrices.\n", " for i in xrange(1, len(v)+1):\n", " for j in xrange(1, len(w)+1):\n", " lower_scores = [S[0][i-1][j] - epsilon, S[1][i-1][j] - sigma]\n", " S[0][i][j] = max(lower_scores)\n", " backtrack[0][i][j] = lower_scores.index(S[0][i][j])\n", "\n", " upper_scores = [S[2][i][j-1] - epsilon, S[1][i][j-1] - sigma]\n", " S[2][i][j] = max(upper_scores)\n", " backtrack[2][i][j] = upper_scores.index(S[2][i][j])\n", "\n", " middle_scores = [S[0][i][j], S[1][i-1][j-1] + scoring_matrix[v[i-1], w[j-1]], S[2][i][j]]\n", " S[1][i][j] = max(middle_scores)\n", " backtrack[1][i][j] = middle_scores.index(S[1][i][j])\n", "\n", " # Initialize the values of i, j and the aligned sequences.\n", " i,j = len(v), len(w)\n", " v_aligned, w_aligned = v, w\n", "\n", " # Get the maximum score, and the corresponding backtrack starting position.\n", " matrix_scores = [S[0][i][j], S[1][i][j], S[2][i][j]]\n", " max_score = max(matrix_scores)\n", " backtrack_matrix = matrix_scores.index(max_score)\n", "\n", " # Quick lambda function to insert indels.\n", " insert_indel = lambda word, i: word[:i] + '-' + word[i:]\n", "\n", " # Backtrack to the edge of the matrix starting bottom right.\n", " while i*j != 0:\n", " if backtrack_matrix == 0: # Lower backtrack matrix conditions.\n", " if backtrack[0][i][j] == 1:\n", " backtrack_matrix = 1\n", " i -= 1\n", " w_aligned = insert_indel(w_aligned, j)\n", "\n", " elif backtrack_matrix == 1: # Middle backtrack matrix conditions.\n", " if backtrack[1][i][j] == 0:\n", " backtrack_matrix = 0\n", " elif backtrack[1][i][j] == 2:\n", " backtrack_matrix = 2\n", " else:\n", " i -= 1\n", " j -= 1\n", "\n", " else: # Upper backtrack matrix conditions.\n", " if backtrack[2][i][j] == 1:\n", " backtrack_matrix = 1\n", " j -= 1\n", " v_aligned = insert_indel(v_aligned, i)\n", "\n", " # Prepend the necessary preceeding indels to get to (0,0).\n", " for _ in xrange(i):\n", " w_aligned = insert_indel(w_aligned, 0)\n", " for _ in xrange(j):\n", " v_aligned = insert_indel(v_aligned, 0)\n", "\n", " return str(max_score), v_aligned, w_aligned" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "ename": "TypeError", "evalue": "'type' object has no attribute '__getitem__'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'PRTEINS'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'PRTWPSEIN'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mglobal_alignment_affine_gap_penalty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBLOSUM62\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m11\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m\u001b[0m in \u001b[0;36mglobal_alignment_affine_gap_penalty\u001b[0;34m(v, w, scoring_matrix, sigma, epsilon)\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0mbacktrack\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mupper_scores\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m \u001b[0mmiddle_scores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mscoring_matrix\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 29\u001b[0m \u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiddle_scores\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0mbacktrack\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmiddle_scores\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: 'type' object has no attribute '__getitem__'" ] } ], "source": [ "v = 'PRTEINS'\n", "w = 'PRTWPSEIN'\n", "global_alignment_affine_gap_penalty(v, w, BLOSUM62, 11, 1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.5" } }, "nbformat": 4, "nbformat_minor": 0 }