{ "cells": [ { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "aminoacid = ['G', 'A', 'S', 'P', 'V', 'T', 'C', 'L', 'N', 'D', 'K', 'E', 'M', 'H', 'F', 'R', 'Y', 'W']\n", "aminoacidMass = {'G':57, 'A':71, 'S':87, 'P':97, 'V':99, 'T':101, 'C':103, 'L':113, 'N':114, 'D':115, 'K':128, 'E':129, 'M':131, 'H':137, 'F':147, 'R':156, 'Y':163, 'W':186}\n", "def expand(leaderboard):\n", " \"\"\"Expands each peptide/aminoacid in leaderboard by all 18 aminoacids with distinct masses.\"\"\"\n", " expanded = []\n", " for i in leaderboard:\n", " expanded += [i+j for j in aminoacidMass.keys()]\n", " return expanded \n", "\n", "def mass(peptide):\n", " \"\"\"Calculates the mass of peptide using the aminoacidMass dictionary\"\"\"\n", " massOfPeptide = 0\n", " for i in peptide:\n", " massOfPeptide += aminoacidMass[i]\n", " return massOfPeptide\n", "\n", "def cyclicSpectrum(peptide):\n", " \"\"\"Input: An amino acid string Peptide.\n", " Output: The cyclic spectrum of Peptide.\"\"\"\n", " prefixMass = [0]*((len(peptide)+1))\n", " for i in range(len(peptide)):\n", " prefixMass[i+1] = prefixMass[i] + aminoacidMass[peptide[i]]\n", " peptideMass = prefixMass[len(peptide)]\n", " cyclic_spectrum = [0]\n", " for i in range(len(prefixMass)-1):\n", " for j in range(i+1, len(prefixMass)):\n", " cyclic_spectrum.append(prefixMass[j] - prefixMass[i])\n", " if i > 0 and j < (len(prefixMass)-1):\n", " cyclic_spectrum.append(peptideMass - (prefixMass[j] - prefixMass[i]))\n", " return sorted(cyclic_spectrum) \n", "\n", "from collections import Counter\n", "def score_peptide(peptide, spectrum):\n", " \"\"\"Cyclopeptide Scoring Problem: Compute the score of a cyclic peptide against a spectrum.\n", " Input: An amino acid string Peptide and a collection of integers Spectrum. \n", " Output: The score of Peptide against Spectrum, Score(Peptide, Spectrum).\"\"\"\n", " spectrum_peptide = cyclicSpectrum(peptide)\n", " c1, c2 = Counter(spectrum_peptide), Counter(spectrum)\n", " return sum([min(n, c2[k]) for k,n in c1.items()])\n", "\n", "def linearSpectrum(peptide):\n", " \"\"\"Input: An amino acid string Peptide.\n", " Output: The linear spectrum of Peptide.\"\"\"\n", " prefixMass = [0]*((len(peptide)+1))\n", " for i in xrange(len(peptide)):\n", " prefixMass[i+1] = prefixMass[i] + aminoacidMass[peptide[i]]\n", " #print 'prefixMass', prefixMass\n", " linear_spectrum = [0]\n", " for i in xrange(len(prefixMass)-1):\n", " for j in xrange(i+1, len(prefixMass)):\n", " linear_spectrum.append(prefixMass[j] - prefixMass[i])\n", " return sorted(linear_spectrum) \n", "\n", "def score_linear_peptide(peptide, spectrum):\n", " \"\"\"Compute the score of a linear peptide with respect to a spectrum.\n", " Input: An amino acid string Peptide and a collection of integers Spectrum.\n", " Output: The linear score of Peptide with respect to Spectrum, LinearScore(Peptide, Spectrum).\"\"\"\n", " spectrum_linear_peptide = linearSpectrum(peptide)\n", " c3, c4 = Counter(spectrum_linear_peptide), Counter(spectrum)\n", " return sum([min(n, c4[k]) for k,n in c3.items()])\n", "\n", "def trim_leaderboard(leaderboard, spectrum, N):\n", " \"\"\"Input: A collection of peptides Leaderboard, a collection of integers Spectrum, and an integer N.\n", " Output: The N highest-scoring linear peptides on Leaderboard with respect to Spectrum.\"\"\"\n", " scores = [[score_linear_peptide(peptide, spectrum), peptide] for peptide in leaderboard]\n", " sorted_scores = sorted(scores, reverse = True)\n", " if len(leaderboard) <= N:\n", " return [i[1] for i in sorted_scores]\n", " else:\n", " return [i[1] for i in sorted_scores if i[0] >= sorted_scores[int(N)-1][0]]\n", "\n", "def leaderboard_cyclopeptide_sequencing(spectrum, N):\n", " \"\"\" Input: An integer N and a collection of integers Spectrum.\n", " Output: LeaderPeptide after running LEADERBOARDCYCLOPEPTIDESEQUENCING(Spectrum, N)\"\"\"\n", " leaderboard = aminoacid\n", " leaderpeptide = ''\n", " parentmass = max(spectrum)\n", " while len(leaderboard) > 0:\n", " leaderboard = expand(leaderboard)\n", " for peptide in leaderboard[:]:\n", " if mass(peptide) == parentmass:\n", " if score_peptide(peptide, spectrum) > score_peptide(leaderpeptide, spectrum):\n", " leaderpeptide = peptide\n", " elif mass(peptide) > parentmass:\n", " leaderboard.remove(peptide)\n", " leaderboard = trim_leaderboard(leaderboard, spectrum, N) \n", " return leaderpeptide" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LFAE\n", "113-147-71-129\n" ] } ], "source": [ "#Example problem\n", "ans = leaderboard_cyclopeptide_sequencing([0, 71, 113, 129, 147, 200, 218, 260, 313, 331, 347, 389, 460], 10)\n", "print ans\n", "print '-'.join(str(i) for i in map(lambda i: aminoacidMass[i], ans))" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Read input data\n", "f = open('input/rosalind_ba4g.txt')\n", "N, spectrum = [int(line.strip()) if i==0 else map(int,line.strip().split()) for i, line in enumerate(f.readlines())]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Call function\n", "ans = leaderboard_cyclopeptide_sequencing(spectrum, N)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'186-87-115-115-147-87-113-113-113-71-114-115'" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Return answer in desired format\n", "'-'.join(str(i) for i in map(lambda i: aminoacidMass[i], ans))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.5" } }, "nbformat": 4, "nbformat_minor": 0 }