{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import print_function\n", "\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "from Bio import PDB" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#!rm -f 1tup.cif 2>/dev/null\n", "#!wget \"http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=cif&compression=NO&structureId=1TUP\" -O 1tup.cif\n", "#parser = PDB.MMCIFParser()\n", "#p53_1tup = parser.get_structure('P53', '1tup.cif')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Structure exists: './pdb1tup.ent' \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6146.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6147.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6148.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain E is discontinuous at line 6149.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain F is discontinuous at line 6171.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6185.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6383.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6453.\n", " PDBConstructionWarning)\n" ] } ], "source": [ "repository = PDB.PDBList()\n", "parser = PDB.PDBParser()\n", "repository.retrieve_pdb_file('1TUP', pdir='.')\n", "p53_1tup = parser.get_structure('P 53', 'pdb1tup.ent')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "set([' ', 'H_ ZN', 'W'])\n" ] } ], "source": [ "my_residues = set()\n", "for residue in p53_1tup.get_residues():\n", " my_residues.add(residue.id[0])\n", "print(my_residues)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
No WaterZincsWater
E6068.044120.00351.9868
F6258.204420.00223.9916
A20548.2630065.393167.8812
B20368.1884065.391119.9580
C20466.2254065.391279.9520
\n", "
" ], "text/plain": [ " No Water Zincs Water\n", "E 6068.04412 0.00 351.9868\n", "F 6258.20442 0.00 223.9916\n", "A 20548.26300 65.39 3167.8812\n", "B 20368.18840 65.39 1119.9580\n", "C 20466.22540 65.39 1279.9520" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_mass(atoms, accept_fun=lambda atom: atom.parent.id[0] != 'W'):\n", " return sum([atom.mass for atom in atoms if accept_fun(atom)])\n", "\n", "chain_names = [chain.id for chain in p53_1tup.get_chains()]\n", "my_mass = np.ndarray((len(chain_names), 3))\n", "for i, chain in enumerate(p53_1tup.get_chains()):\n", " my_mass[i, 0] = get_mass(chain.get_atoms())\n", " my_mass[i, 1] = get_mass(chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] not in [' ', 'W'])\n", " my_mass[i, 2] = get_mass(chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] == 'W')\n", "masses = pd.DataFrame(my_mass, index=chain_names, columns=['No Water', 'Zincs', 'Water'])\n", "masses" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_center(atoms, weight_fun=lambda atom: 1 if atom.parent.id[0] != 'W' else 0):\n", " xsum = ysum = zsum = 0.0\n", " acum = 0.0\n", " for atom in atoms:\n", " x, y, z = atom.coord\n", " weight = weight_fun(atom)\n", " acum += weight\n", " xsum += weight * x\n", " ysum += weight * y\n", " zsum += weight * z\n", " return xsum / acum, ysum / acum, zsum / acum" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(58.296333984624454, 15.48705585839803, 78.405295027957777)\n", "(58.122611380926472, 15.751286500376127, 78.375370368859649)\n" ] } ], "source": [ "print(get_center(p53_1tup.get_atoms()))\n", "print(get_center(p53_1tup.get_atoms(),\n", " weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
XYZX (Mass)Y (Mass)Z (Mass)
E49.72723132.74487981.25341749.70851332.75972581.207395
F51.98236833.84337081.57879552.00222333.82006481.624394
A72.99076328.82542956.71401272.82266828.81032756.716117
B67.81002612.62443588.65659067.72910012.72413088.545659
C38.221565-5.01049488.29314138.169364-4.91539588.166711
\n", "
" ], "text/plain": [ " X Y Z X (Mass) Y (Mass) Z (Mass)\n", "E 49.727231 32.744879 81.253417 49.708513 32.759725 81.207395\n", "F 51.982368 33.843370 81.578795 52.002223 33.820064 81.624394\n", "A 72.990763 28.825429 56.714012 72.822668 28.810327 56.716117\n", "B 67.810026 12.624435 88.656590 67.729100 12.724130 88.545659\n", "C 38.221565 -5.010494 88.293141 38.169364 -4.915395 88.166711" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "my_center = np.ndarray((len(chain_names), 6))\n", "for i, chain in enumerate(p53_1tup.get_chains()):\n", " x, y, z = get_center(chain.get_atoms())\n", " my_center[i, 0] = x\n", " my_center[i, 1] = y\n", " my_center[i, 2] = z\n", " x, y, z = get_center(chain.get_atoms(), weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0)\n", " my_center[i, 3] = x\n", " my_center[i, 4] = y\n", " my_center[i, 5] = z\n", "weights = pd.DataFrame(my_center, index=chain_names, columns=['X', 'Y', 'Z', 'X (Mass)', 'Y (Mass)', 'Z (Mass)'])\n", "weights" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Pymol viz" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }