{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import print_function\n", "\n", "import math\n", "import timeit\n", "\n", "from Bio import PDB" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Structure exists: './pdb1tup.ent' \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6146.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6147.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6148.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain E is discontinuous at line 6149.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain F is discontinuous at line 6171.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6185.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6383.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6453.\n", " PDBConstructionWarning)\n" ] } ], "source": [ "repository = PDB.PDBList()\n", "parser = PDB.PDBParser()\n", "repository.retrieve_pdb_file('1TUP', pdir='.')\n", "p53_1tup = parser.get_structure('P 53', 'pdb1tup.ent')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " [ 58.10800171 23.24200058 57.42399979]\n", " [ 60.10800171 17.9810009 75.93099976]\n", " [ 33.65299988 0.403 74.11499786]\n" ] } ], "source": [ "zns = []\n", "for atom in p53_1tup.get_atoms():\n", " if atom.element == 'ZN':\n", " #print(atom, dir(atom), atom.mass, atom.element, atom.coord[0])\n", " zns.append(atom)\n", "for zn in zns:\n", " print(zn, zn.coord)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Suggest a pymol viewing" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Try this in numba?\n", "def get_closest_atoms(pdb_struct, ref_atom, distance):\n", " atoms = {}\n", " rx, ry, rz = ref_atom.coord\n", " for atom in pdb_struct.get_atoms():\n", " if atom == ref_atom:\n", " continue\n", " x, y, z = atom.coord\n", " my_dist = math.sqrt((x - rx)**2 + (y - ry)**2 + (z - rz)**2) \n", " if my_dist < distance:\n", " atoms[atom] = my_dist\n", " return atoms" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "[ 58.10800171 23.24200058 57.42399979]\n", "S 2.27892096249 [ 60.31700134 23.31800079 57.97900009]\n", "C 2.92437196013 [ 56.99300003 23.94300079 54.81299973]\n", "C 3.40801176963 [ 57.77000046 21.2140007 60.14199829]\n", "S 2.32622437996 [ 57.06499863 21.45199966 58.48199844]\n", "C 3.62725094648 [ 61.61000061 24.08699989 57.00099945]\n", "C 3.85772919812 [ 61.14799881 25.06100082 55.89699936]\n", "C 3.45665374923 [ 58.88600159 20.86700058 55.0359993 ]\n", "C 3.08721447067 [ 57.20500183 25.09900093 59.71900177]\n", "C 3.06412055976 [ 58.04700089 22.03800011 54.60699844]\n", "S 2.22531584465 [ 56.91400146 25.05400085 57.91699982]\n", "N 1.99182735373 [ 57.75500107 23.07299995 55.47100067]\n", "\n", "[ 60.10800171 17.9810009 75.93099976]\n", "C 2.98523321742 [ 61.33200073 20.38199997 74.64700317]\n", "C 3.80512639027 [ 62.57300186 18.26300049 78.81600189]\n", "S 2.24232090692 [ 58.97800064 19.40200043 77.24700165]\n", "C 3.41769274437 [ 57.5929985 15.78299999 75.20700073]\n", "C 3.18032005125 [ 61.52099991 17.13599968 78.65200043]\n", "S 2.32547215821 [ 58.58599854 17.08200073 74.41999817]\n", "C 3.46720709671 [ 62.27199936 17.17399979 73.34500122]\n", "C 3.2038921042 [ 57.62400055 18.41699982 77.90699768]\n", "C 3.11391347252 [ 62.06100082 18.61499977 73.58999634]\n", "N 2.05645999722 [ 61.36600113 19.05599976 74.70999908]\n", "S 2.20704048852 [ 61.28699875 16.4470005 76.99299622]\n", "\n", "[ 33.65299988 0.403 74.11499786]\n", "C 3.40762621232 [ 34.44200134 -0.639 77.26200104]\n", "C 3.203789888 [ 35.56900024 2.89400005 73.49199677]\n", "C 3.42690033588 [ 31.43499947 -1.55700004 72.38800049]\n", "N 3.8418381161 [ 32.61999893 -3.26699996 73.64199829]\n", "S 2.37880142799 [ 32.94200134 -0.60699999 72.08200073]\n", "S 2.13150441609 [ 32.39099884 1.93900001 74.88400269]\n", "C 3.15756814671 [ 36.18299866 -0.46900001 72.43900299]\n", "C 3.07685585317 [ 30.81999969 1.08200002 75.10500336]\n", "C 2.98511466119 [ 36.22499847 0.98000002 72.71399689]\n", "S 2.32042002279 [ 34.45299911 -1.23300004 75.5530014 ]\n", "N 2.09591300499 [ 35.24000168 1.60300004 73.45600128]\n", "C 3.94050478004 [ 35.47399902 0.46200001 77.60900116]\n" ] } ], "source": [ "for zn in zns:\n", " print()\n", " print(zn.coord)\n", " atoms = get_closest_atoms(p53_1tup, zn, 4)\n", " for atom, distance in atoms.items():\n", " print(atom.element, distance, atom.coord)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 [0, 0, 0]\n", "2 [1, 0, 0]\n", "4 [11, 11, 12]\n", "8 [109, 113, 106]\n", "16 [523, 721, 487]\n", "32 [2381, 3493, 2053]\n", "64 [5800, 5827, 5501]\n", "128 [5827, 5827, 5827]\n" ] } ], "source": [ "for distance in [1, 2, 4, 8, 16, 32, 64, 128]:\n", " my_atoms = []\n", " for zn in zns:\n", " atoms = get_closest_atoms(p53_1tup, zn, distance)\n", " my_atoms.append(len(atoms))\n", " print(distance, my_atoms)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "88.2678031921\n" ] } ], "source": [ "nexecs = 10\n", "print(timeit.timeit('get_closest_atoms(p53_1tup, zns[0], 4.0)',\n", " 'from __main__ import get_closest_atoms, p53_1tup, zns',\n", " number=nexecs) / nexecs * 1000)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_closest_alternative(pdb_struct, ref_atom, distance):\n", " atoms = {}\n", " rx, ry, rz = ref_atom.coord\n", " for atom in pdb_struct.get_atoms():\n", " if atom == ref_atom:\n", " continue\n", " x, y, z = atom.coord\n", " if abs(x - rx) > distance or abs(y - ry) > distance or abs(z - rz) > distance:\n", " continue\n", " my_dist = math.sqrt((x - rx)**2 + (y - ry)**2 + (z - rz)**2) \n", " if my_dist < distance:\n", " atoms[atom] = my_dist\n", " return atoms" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "38.8225793839\n" ] } ], "source": [ "print(timeit.timeit('get_closest_alternative(p53_1tup, zns[0], 4.0)',\n", " 'from __main__ import get_closest_alternative, p53_1tup, zns',\n", " number=nexecs) / nexecs * 1000)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Standard\n", "85.9097957611\n", "85.0714921951\n", "84.3131065369\n", "85.9227895737\n", "85.7550859451\n", "Optimized\n", "35.1483106613\n", "37.5775098801\n", "58.0827951431\n", "135.914206505\n", "133.298301697\n" ] } ], "source": [ "print('Standard')\n", "for distance in [1, 4, 16, 64, 128]:\n", " print(timeit.timeit('get_closest_atoms(p53_1tup, zns[0], distance)',\n", " 'from __main__ import get_closest_atoms, p53_1tup, zns, distance',\n", " number=nexecs) / nexecs * 1000)\n", "print('Optimized')\n", "for distance in [1, 4, 16, 64, 128]:\n", " print(timeit.timeit('get_closest_alternative(p53_1tup, zns[0], distance)',\n", " 'from __main__ import get_closest_alternative, p53_1tup, zns, distance',\n", " number=nexecs) / nexecs * 1000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#for interesting distances" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }