{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import print_function\n", "from Bio import PDB" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading PDB structure '1TUP'...\n", "Downloading PDB structure '1OLG'...\n", "Downloading PDB structure '1YCQ'...\n" ] }, { "data": { "text/plain": [ "'./pdb1ycq.ent'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repository = PDB.PDBList()\n", "repository.retrieve_pdb_file('1TUP', pdir='.')\n", "repository.retrieve_pdb_file('1OLG', pdir='.')\n", "repository.retrieve_pdb_file('1YCQ', pdir='.')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6146.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6147.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6148.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain E is discontinuous at line 6149.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain F is discontinuous at line 6171.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6185.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6383.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6453.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 1125.\n", " PDBConstructionWarning)\n", "/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 1160.\n", " PDBConstructionWarning)\n" ] } ], "source": [ "parser = PDB.PDBParser()\n", "p53_1tup = parser.get_structure('P 53 - DNA Binding', 'pdb1tup.ent')\n", "p53_1olg = parser.get_structure('P 53 - Tetramerization', 'pdb1olg.ent')\n", "p53_1ycq = parser.get_structure('P 53 - Transactivation', 'pdb1ycq.ent')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " structure_method: x-ray diffraction\n", " head: antitumor protein/dna\n", " journal: AUTH Y.CHO,S.GORINA,P.D.JEFFREY,N.P.PAVLETICHTITL CRYSTAL STRUCTURE OF A P53 TUMOR SUPPRESSOR-DNATITL 2 COMPLEX: UNDERSTANDING TUMORIGENIC MUTATIONS.REF SCIENCE V. 265 346 1994REFN ISSN 0036-8075PMID 8023157\n", " journal_reference: y.cho,s.gorina,p.d.jeffrey,n.p.pavletich crystal structure of a p53 tumor suppressor-dna complex: understanding tumorigenic mutations. science v. 265 346 1994 issn 0036-8075 8023157 \n", "\n", " compound:\n", "\n", " 1:\n", " molecule: dna (5'-d(*tp*tp*tp*cp*cp*tp*ap*gp*ap*cp*tp*tp*gp*cp*cp*cp*a p*ap*tp*tp*a)-3') \n", " misc: \n", " engineered: yes\n", " chain: e\n", "\n", "\n", " 3:\n", " molecule: protein (p53 tumor suppressor )\n", " misc: \n", " engineered: yes\n", " chain: a, b, c\n", "\n", "\n", " 2:\n", " molecule: dna (5'-d(*ap*tp*ap*ap*tp*tp*gp*gp*gp*cp*ap*ap*gp*tp*cp*tp*a p*gp*gp*ap*a)-3') \n", " misc: \n", " engineered: yes\n", " chain: f\n", "\n", "\n", " keywords: antigen p53, antitumor protein/dna complex\n", " name: tumor suppressor p53 complexed with dna\n", " author: Y.Cho,S.Gorina,P.D.Jeffrey,N.P.Pavletich\n", " deposition_date: 1995-07-11\n", " release_date: 1995-07-11\n", "\n", " source:\n", "\n", " 1:\n", " synthetic: yes\n", " misc: \n", "\n", "\n", " 3:\n", " expression_system: escherichia coli\n", " expression_system_taxid: 562\n", " organism_scientific: homo sapiens\n", " misc: \n", " cell: human vulva carcinoma\n", " expression_system_plasmid: pet3d\n", " cell_line: a431\n", " organism_taxid: 9606\n", " organism_common: human\n", "\n", "\n", " 2:\n", " synthetic: yes\n", " misc: \n", "\n", "\n", " resolution: 2.2\n", " structure_reference:\n", " -> n.p.pavletich,k.a.chambers,c.o.pabo the dna-binding domain of p53 contains the four conserved regions and the major mutation hot spots genes dev. v. 7 2556 1993 issn 0890-9369 \n", " -> b.vogelstein,k.w.kinzler p53 function and dysfunction cell(cambridge,mass.) v. 70 523 1992 issn 0092-8674 \n" ] } ], "source": [ "def print_pdb_headers(headers, indent=0):\n", " ind_text = ' ' * indent\n", " for header, content in headers.items():\n", " if type(content) == dict:\n", " print('\\n%s%20s:' % (ind_text, header))\n", " print_pdb_headers(content, indent + 4)\n", " print()\n", " elif type(content) == list:\n", " print('%s%20s:' % (ind_text, header))\n", " for elem in content:\n", " print('%s%21s %s' % (ind_text, '->', elem))\n", " else:\n", " print('%s%20s: %s' % (ind_text, header, content))\n", "\n", "print_pdb_headers(p53_1tup.header)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'1': {'molecule': \"dna (5'-d(*tp*tp*tp*cp*cp*tp*ap*gp*ap*cp*tp*tp*gp*cp*cp*cp*a p*ap*tp*tp*a)-3') \", 'misc': '', 'engineered': 'yes', 'chain': 'e'}, '3': {'molecule': 'protein (p53 tumor suppressor )', 'misc': '', 'engineered': 'yes', 'chain': 'a, b, c'}, '2': {'molecule': \"dna (5'-d(*ap*tp*ap*ap*tp*tp*gp*gp*gp*cp*ap*ap*gp*tp*cp*tp*a p*gp*gp*ap*a)-3') \", 'misc': '', 'engineered': 'yes', 'chain': 'f'}}\n", "{'1': {'molecule': 'tumor suppressor p53 (oligomerization domain)', 'misc': '', 'engineered': 'yes', 'chain': 'a, b, c, d'}}\n", "{'1': {'molecule': 'mdm2', 'engineered': 'yes', 'misc': '', 'synonym': 'mdm2', 'chain': 'a'}, '2': {'fragment': 'residues 13 - 29', 'molecule': 'p53', 'misc': '', 'engineered': 'yes', 'chain': 'b'}}\n" ] } ], "source": [ "print(p53_1tup.header['compound'])\n", "print(p53_1olg.header['compound'])\n", "print(p53_1ycq.header['compound'])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "1TUP - Chain: E. Number of residues: 43. Number of atoms: 442.\n", "1TUP - Chain: F. Number of residues: 35. Number of atoms: 449.\n", "1TUP - Chain: A. Number of residues: 395. Number of atoms: 1734.\n", "1TUP - Chain: B. Number of residues: 265. Number of atoms: 1593.\n", "1TUP - Chain: C. Number of residues: 276. Number of atoms: 1610.\n", "\n", "1OLG - Chain: A. Number of residues: 42. Number of atoms: 698.\n", "1OLG - Chain: B. Number of residues: 42. Number of atoms: 698.\n", "1OLG - Chain: C. Number of residues: 42. Number of atoms: 698.\n", "1OLG - Chain: D. Number of residues: 42. Number of atoms: 698.\n", "\n", "1YCQ - Chain: A. Number of residues: 123. Number of atoms: 741.\n", "1YCQ - Chain: B. Number of residues: 16. Number of atoms: 100.\n" ] } ], "source": [ "def describe_model(name, pdb):\n", " print()\n", " for model in pdb:\n", " for chain in model:\n", " print('%s - Chain: %s. Number of residues: %d. Number of atoms: %d.' %\n", " (name, chain.id, len(chain), len(list(chain.get_atoms()))))\n", "describe_model('1TUP', p53_1tup)\n", "describe_model('1OLG', p53_1olg)\n", "describe_model('1YCQ', p53_1ycq)\n", "#will go deep in a next recipe (bottom up)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('H_ ZN', 951, ' ')\n", "('H_ ZN', 952, ' ')\n", "('H_ ZN', 953, ' ')\n" ] } ], "source": [ "for residue in p53_1tup.get_residues():\n", " if residue.id[0] in [' ', 'W']:\n", " continue\n", " print(residue.id)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " 858 N\n", " 859 C\n", " 860 C\n", " 861 O\n", " 862 C\n", " 863 O\n", "\n" ] } ], "source": [ "res = next(p53_1tup[0]['A'].get_residues())\n", "print(res)\n", "for atom in res:\n", " print(atom, atom.serial_number, atom.element)\n", "print(p53_1tup[0]['A'][94]['CA'])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1TUP:B SSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNT 219\n", "1OLG:B KKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG 42\n", "1YCQ:B PLSQETFSDLWKLLPEN 17\n" ] } ], "source": [ "from Bio.SeqIO import PdbIO, FastaIO\n", "\n", "def get_fasta(pdb_file, fasta_file, transfer_ids=None):\n", " fasta_writer = FastaIO.FastaWriter(fasta_file)\n", " fasta_writer.write_header()\n", " for rec in PdbIO.PdbSeqresIterator(pdb_file):\n", " if len(rec.seq) == 0:\n", " continue\n", " if transfer_ids is not None and rec.id not in transfer_ids:\n", " continue\n", " print(rec.id, rec.seq, len(rec.seq))\n", " fasta_writer.write_record(rec)\n", " \n", "get_fasta(open('pdb1tup.ent'), open('1tup.fasta', 'w'), transfer_ids=['1TUP:B'])\n", "get_fasta(open('pdb1olg.ent'), open('1olg.fasta', 'w'), transfer_ids=['1OLG:B'])\n", "get_fasta(open('pdb1ycq.ent'), open('1ycq.fasta', 'w'), transfer_ids=['1YCQ:B'])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }