{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "from Bio import PDB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#!rm -f 1tup.cif 2>/dev/null\n",
    "#!wget \"http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=cif&compression=NO&structureId=1TUP\" -O 1tup.cif\n",
    "#parser = PDB.MMCIFParser()\n",
    "#p53_1tup = parser.get_structure('P53', '1tup.cif')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Structure exists: './pdb1tup.ent' \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6146.\n",
      "  PDBConstructionWarning)\n",
      "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6147.\n",
      "  PDBConstructionWarning)\n",
      "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6148.\n",
      "  PDBConstructionWarning)\n",
      "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain E is discontinuous at line 6149.\n",
      "  PDBConstructionWarning)\n",
      "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain F is discontinuous at line 6171.\n",
      "  PDBConstructionWarning)\n",
      "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6185.\n",
      "  PDBConstructionWarning)\n",
      "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6383.\n",
      "  PDBConstructionWarning)\n",
      "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6453.\n",
      "  PDBConstructionWarning)\n"
     ]
    }
   ],
   "source": [
    "repository = PDB.PDBList()\n",
    "parser = PDB.PDBParser()\n",
    "repository.retrieve_pdb_file('1TUP', pdir='.')\n",
    "p53_1tup = parser.get_structure('P 53', 'pdb1tup.ent')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "set([' ', 'H_ ZN', 'W'])\n"
     ]
    }
   ],
   "source": [
    "my_residues = set()\n",
    "for residue in p53_1tup.get_residues():\n",
    "    my_residues.add(residue.id[0])\n",
    "print(my_residues)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>No Water</th>\n",
       "      <th>Zincs</th>\n",
       "      <th>Water</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>6068.04412</td>\n",
       "      <td>0.00</td>\n",
       "      <td>351.9868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>F</th>\n",
       "      <td>6258.20442</td>\n",
       "      <td>0.00</td>\n",
       "      <td>223.9916</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>20548.26300</td>\n",
       "      <td>65.39</td>\n",
       "      <td>3167.8812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>20368.18840</td>\n",
       "      <td>65.39</td>\n",
       "      <td>1119.9580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>20466.22540</td>\n",
       "      <td>65.39</td>\n",
       "      <td>1279.9520</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      No Water  Zincs      Water\n",
       "E   6068.04412   0.00   351.9868\n",
       "F   6258.20442   0.00   223.9916\n",
       "A  20548.26300  65.39  3167.8812\n",
       "B  20368.18840  65.39  1119.9580\n",
       "C  20466.22540  65.39  1279.9520"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_mass(atoms, accept_fun=lambda atom: atom.parent.id[0] != 'W'):\n",
    "    return sum([atom.mass for atom in atoms if accept_fun(atom)])\n",
    "\n",
    "chain_names = [chain.id for chain in p53_1tup.get_chains()]\n",
    "my_mass = np.ndarray((len(chain_names), 3))\n",
    "for i, chain in enumerate(p53_1tup.get_chains()):\n",
    "    my_mass[i, 0] = get_mass(chain.get_atoms())\n",
    "    my_mass[i, 1] = get_mass(chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] not in [' ', 'W'])\n",
    "    my_mass[i, 2] = get_mass(chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] == 'W')\n",
    "masses = pd.DataFrame(my_mass, index=chain_names, columns=['No Water', 'Zincs', 'Water'])\n",
    "masses"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def get_center(atoms, weight_fun=lambda atom: 1 if atom.parent.id[0] != 'W' else 0):\n",
    "    xsum = ysum = zsum = 0.0\n",
    "    acum = 0.0\n",
    "    for atom in atoms:\n",
    "        x, y, z = atom.coord\n",
    "        weight = weight_fun(atom)\n",
    "        acum += weight\n",
    "        xsum += weight * x\n",
    "        ysum += weight * y\n",
    "        zsum += weight * z\n",
    "    return xsum / acum, ysum / acum, zsum / acum"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(58.296333984624454, 15.48705585839803, 78.405295027957777)\n",
      "(58.122611380926472, 15.751286500376127, 78.375370368859649)\n"
     ]
    }
   ],
   "source": [
    "print(get_center(p53_1tup.get_atoms()))\n",
    "print(get_center(p53_1tup.get_atoms(),\n",
    "                 weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>X</th>\n",
       "      <th>Y</th>\n",
       "      <th>Z</th>\n",
       "      <th>X (Mass)</th>\n",
       "      <th>Y (Mass)</th>\n",
       "      <th>Z (Mass)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>E</th>\n",
       "      <td>49.727231</td>\n",
       "      <td>32.744879</td>\n",
       "      <td>81.253417</td>\n",
       "      <td>49.708513</td>\n",
       "      <td>32.759725</td>\n",
       "      <td>81.207395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>F</th>\n",
       "      <td>51.982368</td>\n",
       "      <td>33.843370</td>\n",
       "      <td>81.578795</td>\n",
       "      <td>52.002223</td>\n",
       "      <td>33.820064</td>\n",
       "      <td>81.624394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>72.990763</td>\n",
       "      <td>28.825429</td>\n",
       "      <td>56.714012</td>\n",
       "      <td>72.822668</td>\n",
       "      <td>28.810327</td>\n",
       "      <td>56.716117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>67.810026</td>\n",
       "      <td>12.624435</td>\n",
       "      <td>88.656590</td>\n",
       "      <td>67.729100</td>\n",
       "      <td>12.724130</td>\n",
       "      <td>88.545659</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>38.221565</td>\n",
       "      <td>-5.010494</td>\n",
       "      <td>88.293141</td>\n",
       "      <td>38.169364</td>\n",
       "      <td>-4.915395</td>\n",
       "      <td>88.166711</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           X          Y          Z   X (Mass)   Y (Mass)   Z (Mass)\n",
       "E  49.727231  32.744879  81.253417  49.708513  32.759725  81.207395\n",
       "F  51.982368  33.843370  81.578795  52.002223  33.820064  81.624394\n",
       "A  72.990763  28.825429  56.714012  72.822668  28.810327  56.716117\n",
       "B  67.810026  12.624435  88.656590  67.729100  12.724130  88.545659\n",
       "C  38.221565  -5.010494  88.293141  38.169364  -4.915395  88.166711"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_center = np.ndarray((len(chain_names), 6))\n",
    "for i, chain in enumerate(p53_1tup.get_chains()):\n",
    "    x, y, z = get_center(chain.get_atoms())\n",
    "    my_center[i, 0] = x\n",
    "    my_center[i, 1] = y\n",
    "    my_center[i, 2] = z\n",
    "    x, y, z = get_center(chain.get_atoms(), weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0)\n",
    "    my_center[i, 3] = x\n",
    "    my_center[i, 4] = y\n",
    "    my_center[i, 5] = z\n",
    "weights = pd.DataFrame(my_center, index=chain_names, columns=['X', 'Y', 'Z', 'X (Mass)', 'Y (Mass)', 'Z (Mass)'])\n",
    "weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#Pymol viz"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}