{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from __future__ import print_function\n",
"\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from Bio import PDB"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#!rm -f 1tup.cif 2>/dev/null\n",
"#!wget \"http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=cif&compression=NO&structureId=1TUP\" -O 1tup.cif\n",
"#parser = PDB.MMCIFParser()\n",
"#p53_1tup = parser.get_structure('P53', '1tup.cif')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Structure exists: './pdb1tup.ent' \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6146.\n",
" PDBConstructionWarning)\n",
"/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6147.\n",
" PDBConstructionWarning)\n",
"/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6148.\n",
" PDBConstructionWarning)\n",
"/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain E is discontinuous at line 6149.\n",
" PDBConstructionWarning)\n",
"/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain F is discontinuous at line 6171.\n",
" PDBConstructionWarning)\n",
"/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6185.\n",
" PDBConstructionWarning)\n",
"/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6383.\n",
" PDBConstructionWarning)\n",
"/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6453.\n",
" PDBConstructionWarning)\n"
]
}
],
"source": [
"repository = PDB.PDBList()\n",
"parser = PDB.PDBParser()\n",
"repository.retrieve_pdb_file('1TUP', pdir='.')\n",
"p53_1tup = parser.get_structure('P 53', 'pdb1tup.ent')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"set([' ', 'H_ ZN', 'W'])\n"
]
}
],
"source": [
"my_residues = set()\n",
"for residue in p53_1tup.get_residues():\n",
" my_residues.add(residue.id[0])\n",
"print(my_residues)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" No Water | \n",
" Zincs | \n",
" Water | \n",
"
\n",
" \n",
" \n",
" \n",
" E | \n",
" 6068.04412 | \n",
" 0.00 | \n",
" 351.9868 | \n",
"
\n",
" \n",
" F | \n",
" 6258.20442 | \n",
" 0.00 | \n",
" 223.9916 | \n",
"
\n",
" \n",
" A | \n",
" 20548.26300 | \n",
" 65.39 | \n",
" 3167.8812 | \n",
"
\n",
" \n",
" B | \n",
" 20368.18840 | \n",
" 65.39 | \n",
" 1119.9580 | \n",
"
\n",
" \n",
" C | \n",
" 20466.22540 | \n",
" 65.39 | \n",
" 1279.9520 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" No Water Zincs Water\n",
"E 6068.04412 0.00 351.9868\n",
"F 6258.20442 0.00 223.9916\n",
"A 20548.26300 65.39 3167.8812\n",
"B 20368.18840 65.39 1119.9580\n",
"C 20466.22540 65.39 1279.9520"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def get_mass(atoms, accept_fun=lambda atom: atom.parent.id[0] != 'W'):\n",
" return sum([atom.mass for atom in atoms if accept_fun(atom)])\n",
"\n",
"chain_names = [chain.id for chain in p53_1tup.get_chains()]\n",
"my_mass = np.ndarray((len(chain_names), 3))\n",
"for i, chain in enumerate(p53_1tup.get_chains()):\n",
" my_mass[i, 0] = get_mass(chain.get_atoms())\n",
" my_mass[i, 1] = get_mass(chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] not in [' ', 'W'])\n",
" my_mass[i, 2] = get_mass(chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] == 'W')\n",
"masses = pd.DataFrame(my_mass, index=chain_names, columns=['No Water', 'Zincs', 'Water'])\n",
"masses"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def get_center(atoms, weight_fun=lambda atom: 1 if atom.parent.id[0] != 'W' else 0):\n",
" xsum = ysum = zsum = 0.0\n",
" acum = 0.0\n",
" for atom in atoms:\n",
" x, y, z = atom.coord\n",
" weight = weight_fun(atom)\n",
" acum += weight\n",
" xsum += weight * x\n",
" ysum += weight * y\n",
" zsum += weight * z\n",
" return xsum / acum, ysum / acum, zsum / acum"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(58.296333984624454, 15.48705585839803, 78.405295027957777)\n",
"(58.122611380926472, 15.751286500376127, 78.375370368859649)\n"
]
}
],
"source": [
"print(get_center(p53_1tup.get_atoms()))\n",
"print(get_center(p53_1tup.get_atoms(),\n",
" weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" X | \n",
" Y | \n",
" Z | \n",
" X (Mass) | \n",
" Y (Mass) | \n",
" Z (Mass) | \n",
"
\n",
" \n",
" \n",
" \n",
" E | \n",
" 49.727231 | \n",
" 32.744879 | \n",
" 81.253417 | \n",
" 49.708513 | \n",
" 32.759725 | \n",
" 81.207395 | \n",
"
\n",
" \n",
" F | \n",
" 51.982368 | \n",
" 33.843370 | \n",
" 81.578795 | \n",
" 52.002223 | \n",
" 33.820064 | \n",
" 81.624394 | \n",
"
\n",
" \n",
" A | \n",
" 72.990763 | \n",
" 28.825429 | \n",
" 56.714012 | \n",
" 72.822668 | \n",
" 28.810327 | \n",
" 56.716117 | \n",
"
\n",
" \n",
" B | \n",
" 67.810026 | \n",
" 12.624435 | \n",
" 88.656590 | \n",
" 67.729100 | \n",
" 12.724130 | \n",
" 88.545659 | \n",
"
\n",
" \n",
" C | \n",
" 38.221565 | \n",
" -5.010494 | \n",
" 88.293141 | \n",
" 38.169364 | \n",
" -4.915395 | \n",
" 88.166711 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" X Y Z X (Mass) Y (Mass) Z (Mass)\n",
"E 49.727231 32.744879 81.253417 49.708513 32.759725 81.207395\n",
"F 51.982368 33.843370 81.578795 52.002223 33.820064 81.624394\n",
"A 72.990763 28.825429 56.714012 72.822668 28.810327 56.716117\n",
"B 67.810026 12.624435 88.656590 67.729100 12.724130 88.545659\n",
"C 38.221565 -5.010494 88.293141 38.169364 -4.915395 88.166711"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"my_center = np.ndarray((len(chain_names), 6))\n",
"for i, chain in enumerate(p53_1tup.get_chains()):\n",
" x, y, z = get_center(chain.get_atoms())\n",
" my_center[i, 0] = x\n",
" my_center[i, 1] = y\n",
" my_center[i, 2] = z\n",
" x, y, z = get_center(chain.get_atoms(), weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0)\n",
" my_center[i, 3] = x\n",
" my_center[i, 4] = y\n",
" my_center[i, 5] = z\n",
"weights = pd.DataFrame(my_center, index=chain_names, columns=['X', 'Y', 'Z', 'X (Mass)', 'Y (Mass)', 'Z (Mass)'])\n",
"weights"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Pymol viz"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}