{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from collections import defaultdict\n", "\n", "import requests\n", "\n", "from Bio import ExPASy, SwissProt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#explain why not biopython\n", "server = 'http://www.uniprot.org/uniprot'\n", "def do_request(server, ID='', **kwargs):\n", " params = ''\n", " req = requests.get('%s/%s%s' % (server, ID, params),params=kwargs)\n", " if not req.ok:\n", " req.raise_for_status()\n", " return req" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "req = do_request(server, query='gene:p53 AND reviewed:yes',# AND organism:Human',\n", " format='tab',\n", " columns='id,entry name,length,organism,organism-id,database(PDB),database(HGNC)',\n", " limit='50')\n", "#We might revisit this for KEGG" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EntryEntry nameLengthOrganismIDCross-reference (PDB)Cross-reference (HGNC)
0 Q9W678 P53_BARBU 369 Barbus barbus (Barbel) (Cyprinus barbus) 40830 NaN NaN
1 Q29537 P53_CANFA 381 Canis familiaris (Dog) (Canis lupus familiaris) 9615 NaN NaN
2 O09185 P53_CRIGR 393 Cricetulus griseus (Chinese hamster) (Cricetul... 10029 NaN NaN
3 Q8SPZ3 P53_DELLE 387 Delphinapterus leucas (Beluga whale) 9749 NaN NaN
4 P79892 P53_HORSE 280 Equus caballus (Horse) 9796 NaN NaN
5 P04637 P53_HUMAN 393 Homo sapiens (Human) 9606 1A1U;1AIE;1C26;1DT7;1GZH;1H26;1HS5;1JSP;1KZY;1... 11998;
6 O93379 P53_ICTPU 376 Ictalurus punctatus (Channel catfish) (Silurus... 7998 NaN NaN
7 P56423 P53_MACFA 393 Macaca fascicularis (Crab-eating macaque) (Cyn... 9541 NaN NaN
8 P61260 P53_MACFU 393 Macaca fuscata fuscata (Japanese macaque) 9543 NaN NaN
9 P56424 P53_MACMU 393 Macaca mulatta (Rhesus macaque) 9544 NaN NaN
10 P02340 P53_MOUSE 387 Mus musculus (Mouse) 10090 1HU8;2GEQ;2IOI;2IOM;2IOO;2P52;3EXJ;3EXL; NaN
11 P25035 P53_ONCMY 396 Oncorhynchus mykiss (Rainbow trout) (Salmo gai... 8022 NaN NaN
12 P79820 P53_ORYLA 352 Oryzias latipes (Medaka fish) (Japanese ricefish) 8090 NaN NaN
13 Q9TUB2 P53_PIG 386 Sus scrofa (Pig) 9823 NaN NaN
14 O12946 P53_PLAFE 366 Platichthys flesus (European flounder) (Pleuro... 8260 NaN NaN
15 P10361 P53_RAT 391 Rattus norvegicus (Rat) 10116 NaN NaN
16 Q9W679 P53_TETMU 367 Tetraodon miurus (Congo puffer) 94908 NaN NaN
17 Q9TTA1 P53_TUPBE 393 Tupaia belangeri (Common tree shrew) (Tupaia g... 37347 NaN NaN
18 O57538 P53_XIPHE 342 Xiphophorus helleri (Green swordtail) 8084 NaN NaN
19 Q92143 P53_XIPMA 342 Xiphophorus maculatus (Southern platyfish) (Pl... 8083 NaN NaN
20 Q42578 PER53_ARATH 335 Arabidopsis thaliana (Mouse-ear cress) 3702 1PA2;1QO4; NaN
\n", "
" ], "text/plain": [ " Entry Entry name Length \\\n", "0 Q9W678 P53_BARBU 369 \n", "1 Q29537 P53_CANFA 381 \n", "2 O09185 P53_CRIGR 393 \n", "3 Q8SPZ3 P53_DELLE 387 \n", "4 P79892 P53_HORSE 280 \n", "5 P04637 P53_HUMAN 393 \n", "6 O93379 P53_ICTPU 376 \n", "7 P56423 P53_MACFA 393 \n", "8 P61260 P53_MACFU 393 \n", "9 P56424 P53_MACMU 393 \n", "10 P02340 P53_MOUSE 387 \n", "11 P25035 P53_ONCMY 396 \n", "12 P79820 P53_ORYLA 352 \n", "13 Q9TUB2 P53_PIG 386 \n", "14 O12946 P53_PLAFE 366 \n", "15 P10361 P53_RAT 391 \n", "16 Q9W679 P53_TETMU 367 \n", "17 Q9TTA1 P53_TUPBE 393 \n", "18 O57538 P53_XIPHE 342 \n", "19 Q92143 P53_XIPMA 342 \n", "20 Q42578 PER53_ARATH 335 \n", "\n", " Organism ID \\\n", "0 Barbus barbus (Barbel) (Cyprinus barbus) 40830 \n", "1 Canis familiaris (Dog) (Canis lupus familiaris) 9615 \n", "2 Cricetulus griseus (Chinese hamster) (Cricetul... 10029 \n", "3 Delphinapterus leucas (Beluga whale) 9749 \n", "4 Equus caballus (Horse) 9796 \n", "5 Homo sapiens (Human) 9606 \n", "6 Ictalurus punctatus (Channel catfish) (Silurus... 7998 \n", "7 Macaca fascicularis (Crab-eating macaque) (Cyn... 9541 \n", "8 Macaca fuscata fuscata (Japanese macaque) 9543 \n", "9 Macaca mulatta (Rhesus macaque) 9544 \n", "10 Mus musculus (Mouse) 10090 \n", "11 Oncorhynchus mykiss (Rainbow trout) (Salmo gai... 8022 \n", "12 Oryzias latipes (Medaka fish) (Japanese ricefish) 8090 \n", "13 Sus scrofa (Pig) 9823 \n", "14 Platichthys flesus (European flounder) (Pleuro... 8260 \n", "15 Rattus norvegicus (Rat) 10116 \n", "16 Tetraodon miurus (Congo puffer) 94908 \n", "17 Tupaia belangeri (Common tree shrew) (Tupaia g... 37347 \n", "18 Xiphophorus helleri (Green swordtail) 8084 \n", "19 Xiphophorus maculatus (Southern platyfish) (Pl... 8083 \n", "20 Arabidopsis thaliana (Mouse-ear cress) 3702 \n", "\n", " Cross-reference (PDB) Cross-reference (HGNC) \n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "5 1A1U;1AIE;1C26;1DT7;1GZH;1H26;1HS5;1JSP;1KZY;1... 11998; \n", "6 NaN NaN \n", "7 NaN NaN \n", "8 NaN NaN \n", "9 NaN NaN \n", "10 1HU8;2GEQ;2IOI;2IOM;2IOO;2P52;3EXJ;3EXL; NaN \n", "11 NaN NaN \n", "12 NaN NaN \n", "13 NaN NaN \n", "14 NaN NaN \n", "15 NaN NaN \n", "16 NaN NaN \n", "17 NaN NaN \n", "18 NaN NaN \n", "19 NaN NaN \n", "20 1PA2;1QO4; NaN " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import StringIO\n", "\n", "uniprot_list = pd.read_table(StringIO.StringIO(req.text))\n", "uniprot_list.rename(columns={'Organism ID': 'ID'}, inplace=True)\n", "uniprot_list" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p53_human = uniprot_list[uniprot_list.ID == 9606]['Entry'].tolist()[0]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "handle = ExPASy.get_sprot_raw(p53_human)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sp_rec= SwissProt.read(handle)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('P53_HUMAN', 393, 'Name=TP53; Synonyms=P53;')\n", "RecName: Full=Cellular tumor antigen p53; AltName: Full=Antigen NY-CO-13; AltName: Full=Phosphoprotein p53; AltName: Full=Tumor suppressor p53;\n", "('Homo sapiens (Human).', (393, 43653, 'AD5C149FD8106131'))\n", "MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD\n" ] } ], "source": [ "print(sp_rec.entry_name, sp_rec.sequence_length, sp_rec.gene_name)\n", "print(sp_rec.description)\n", "print(sp_rec.organism, sp_rec.seqinfo)\n", "print(sp_rec.sequence)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['FUNCTION: Acts as a tumor suppressor in many tumor types; induces growth arrest or apoptosis depending on the physiological circumstances and cell type. Involved in cell cycle regulation as a trans-activator that acts to negatively regulate cell division by controlling a set of genes required for this process. One of the activated genes is an inhibitor of cyclin-dependent kinases. Apoptosis induction seems to be mediated either by stimulation of BAX and FAS antigen expression, or by repression of Bcl-2 expression. In cooperation with mitochondrial PPIF is involved in activating oxidative stress-induced necrosis; the function is largely independent of transcription. Induces the transcription of long intergenic non-coding RNA p21 (lincRNA-p21) and lincRNA- Mkln1. LincRNA-p21 participates in TP53-dependent transcriptional repression leading to apoptosis and seem to have to effect on cell-cycle regulation. Implicated in Notch signaling cross-over. Prevents CDK7 kinase activity when associated to CAK complex in response to DNA damage, thus stopping cell cycle progression. Isoform 2 enhances the transactivation activity of isoform 1 from some but not all TP53-inducible promoters. Isoform 4 suppresses transactivation activity and impairs growth suppression mediated by isoform 1. Isoform 7 inhibits isoform 1-mediated apoptosis. {ECO:0000269|PubMed:11025664, ECO:0000269|PubMed:12810724, ECO:0000269|PubMed:15186775, ECO:0000269|PubMed:15340061, ECO:0000269|PubMed:17317671, ECO:0000269|PubMed:17349958, ECO:0000269|PubMed:19556538, ECO:0000269|PubMed:20673990, ECO:0000269|PubMed:20959462, ECO:0000269|PubMed:22726440, ECO:0000269|PubMed:9840937}.', 'COFACTOR: Name=Zn(2+); Xref=ChEBI:CHEBI:29105; Note=Binds 1 zinc ion per subunit.;', \"SUBUNIT: Interacts with AXIN1. Probably part of a complex consisting of TP53, HIPK2 and AXIN1 (By similarity). Binds DNA as a homotetramer. Interacts with histone acetyltransferases EP300 and methyltransferases HRMT1L2 and CARM1, and recruits them to promoters. In vitro, the interaction of TP53 with cancer- associated/HPV (E6) viral proteins leads to ubiquitination and degradation of TP53 giving a possible model for cell growth regulation. This complex formation requires an additional factor, E6-AP, which stably associates with TP53 in the presence of E6. Interacts (via C-terminus) with TAF1; when TAF1 is part of the TFIID complex. Interacts with ING4; this interaction may be indirect. Found in a complex with CABLES1 and TP73. Interacts with HIPK1, HIPK2, and TP53INP1. Interacts with WWOX. May interact with HCV core protein. Interacts with USP7 and SYVN1. Interacts with HSP90AB1. Interacts with CHD8; leading to recruit histone H1 and prevent transactivation activity (By similarity). Interacts with ARMC10, BANP, CDKN2AIP, NUAK1, STK11/LKB1, UHRF2 and E4F1. Interacts with YWHAZ; the interaction enhances TP53 transcriptional activity. Phosphorylation of YWHAZ on 'Ser-58' inhibits this interaction. Interacts (via DNA-binding domain) with MAML1 (via N-terminus). Interacts with MKRN1. Interacts with PML (via C-terminus). Interacts with MDM2; leading to ubiquitination and proteasomal degradation of TP53. Directly interacts with FBXO42; leading to ubiquitination and degradation of TP53. Interacts (phosphorylated at Ser-15 by ATM) with the phosphatase PP2A-PPP2R5C holoenzyme; regulates stress-induced TP53-dependent inhibition of cell proliferation. Interacts with PPP2R2A. Interacts with AURKA, DAXX, BRD7 and TRIM24. Interacts (when monomethylated at Lys-382) with L3MBTL1. Isoform 1 interacts with isoform 2 and with isoform 4. Interacts with GRK5. Binds to the CAK complex (CDK7, cyclin H and MAT1) in response to DNA damage. Interacts with CDK5 in neurons. Interacts with AURKB, SETD2, UHRF2 and NOC2L. Interacts (via N-terminus) with PTK2/FAK1; this promotes ubiquitination by MDM2. Interacts with PTK2B/PYK2; this promotes ubiquitination by MDM2. Interacts with PRKCG. Interacts with PPIF; the association implicates preferentially tetrameric TP53, is induced by oxidative stress and is impaired by cyclosporin A (CsA). Interacts with human cytomegalovirus/HHV-5 protein UL123. Interacts with SNAI1; the interaction induces SNAI1 degradation via MDM2-mediated ubiquitination and inhibits SNAI1- induced cell invasion. Interacts with KAT6A. Interacts with UBC9. Interacts with ZNF385B; the interaction is direct. Interacts (via DNA-binding domain) with ZNF385A; the interaction is direct and enhances p53/TP53 transactivation functions on cell-cycle arrest target genes, resulting in growth arrest. Interacts with ANKRD2. Interacts with RFFL and RNF34; involved in p53/TP53 ubiquitination. Interacts with MTA1 and RFWD2. {ECO:0000250, ECO:0000269|PubMed:10570149, ECO:0000269|PubMed:10644996, ECO:0000269|PubMed:11025664, ECO:0000269|PubMed:11058590, ECO:0000269|PubMed:11706030, ECO:0000269|PubMed:11740489, ECO:0000269|PubMed:11780126, ECO:0000269|PubMed:11925430, ECO:0000269|PubMed:12507430, ECO:0000269|PubMed:12702766, ECO:0000269|PubMed:12750254, ECO:0000269|PubMed:12810724, ECO:0000269|PubMed:12851404, ECO:0000269|PubMed:14534297, ECO:0000269|PubMed:14702041, ECO:0000269|PubMed:15053879, ECO:0000269|PubMed:15109303, ECO:0000269|PubMed:15136035, ECO:0000269|PubMed:15186775, ECO:0000269|PubMed:15701641, ECO:0000269|PubMed:15855171, ECO:0000269|PubMed:16219768, ECO:0000269|PubMed:16322561, ECO:0000269|PubMed:16376338, ECO:0000269|PubMed:16377624, ECO:0000269|PubMed:16402859, ECO:0000269|PubMed:16474402, ECO:0000269|PubMed:16793544, ECO:0000269|PubMed:16845383, ECO:0000269|PubMed:17015838, ECO:0000269|PubMed:17108107, ECO:0000269|PubMed:17121812, ECO:0000269|PubMed:17170702, ECO:0000269|PubMed:17245430, ECO:0000269|PubMed:17317671, ECO:0000269|PubMed:17591690, ECO:0000269|PubMed:17719541, ECO:0000269|PubMed:17904127, ECO:0000269|PubMed:17967874, ECO:0000269|PubMed:18585004, ECO:0000269|PubMed:18650397, ECO:0000269|PubMed:18996393, ECO:0000269|PubMed:19509332, ECO:0000269|PubMed:19515728, ECO:0000269|PubMed:19536131, ECO:0000269|PubMed:19556538, ECO:0000269|PubMed:19776115, ECO:0000269|PubMed:19837670, ECO:0000269|PubMed:19880522, ECO:0000269|PubMed:20124405, ECO:0000269|PubMed:20142040, ECO:0000269|PubMed:20228809, ECO:0000269|PubMed:20364130, ECO:0000269|PubMed:20385133, ECO:0000269|PubMed:20660729, ECO:0000269|PubMed:20870725, ECO:0000269|PubMed:20959462, ECO:0000269|PubMed:21317932, ECO:0000269|PubMed:21952639, ECO:0000269|PubMed:22214662, ECO:0000269|PubMed:22726440, ECO:0000269|PubMed:22945289, ECO:0000269|PubMed:23431171, ECO:0000269|PubMed:8875926, ECO:0000269|PubMed:8875929, ECO:0000269|PubMed:9840937}.\", 'INTERACTION: Self; NbExp=7; IntAct=EBI-366083, EBI-366083; P03070:- (xeno); NbExp=18; IntAct=EBI-366083, EBI-617698; P26663:- (xeno); NbExp=9; IntAct=EBI-366083, EBI-6838571; Q7L7W2:-; NbExp=2; IntAct=EBI-366083, EBI-7210801; Q8QW27:- (xeno); NbExp=2; IntAct=EBI-366083, EBI-6863726; O95376:ARIH2; NbExp=5; IntAct=EBI-366083, EBI-711158; Q9UBL3:ASH2L; NbExp=5; IntAct=EBI-366083, EBI-540797; O15169:AXIN1; NbExp=4; IntAct=EBI-366083, EBI-710484; Q8N9N5:BANP; NbExp=3; IntAct=EBI-366083, EBI-744695; P10415:BCL2; NbExp=5; IntAct=EBI-366083, EBI-77694; Q07817-1:BCL2L1; NbExp=18; IntAct=EBI-366083, EBI-287195; P11274:BCR; NbExp=2; IntAct=EBI-366083, EBI-712838; O14503:BHLHE40; NbExp=5; IntAct=EBI-366083, EBI-711810; Q9NPI1:BRD7; NbExp=8; IntAct=EBI-366083, EBI-711221; Q9BX70:BTBD2; NbExp=2; IntAct=EBI-366083, EBI-710091; Q9ESJ1:Cables1 (xeno); NbExp=3; IntAct=EBI-366083, EBI-604411; Q9BWC9:CCDC106; NbExp=3; IntAct=EBI-366083, EBI-711501; P38936:CDKN1A; NbExp=3; IntAct=EBI-366083, EBI-375077; P17676:CEBPB; NbExp=4; IntAct=EBI-366083, EBI-969696; Q92793:CREBBP; NbExp=8; IntAct=EBI-366083, EBI-81215; P45481:Crebbp (xeno); NbExp=6; IntAct=EBI-366083, EBI-296306; P55060:CSE1L; NbExp=5; IntAct=EBI-366083, EBI-286709; P68400:CSNK2A1; NbExp=2; IntAct=EBI-366083, EBI-347804; Q14999:CUL7; NbExp=4; IntAct=EBI-366083, EBI-308606; Q8IWT3:CUL9; NbExp=3; IntAct=EBI-366083, EBI-311123; Q9P0U4:CXXC1; NbExp=7; IntAct=EBI-366083, EBI-949911; Q9UER7:DAXX; NbExp=12; IntAct=EBI-366083, EBI-77321; Q92841:DDX17; NbExp=3; IntAct=EBI-366083, EBI-746012; P17844:DDX5; NbExp=6; IntAct=EBI-366083, EBI-351962; Q9BV47:DUSP26; NbExp=9; IntAct=EBI-366083, EBI-2924519; P03126:E6 (xeno); NbExp=3; IntAct=EBI-366083, EBI-1177242; P06463:E6 (xeno); NbExp=2; IntAct=EBI-366083, EBI-1186926; Q09472:EP300; NbExp=10; IntAct=EBI-366083, EBI-447295; Q86XK2:FBXO11; NbExp=4; IntAct=EBI-366083, EBI-1047804; O43524:FOXO3; NbExp=2; IntAct=EBI-366083, EBI-1644164; P49841:GSK3B; NbExp=3; IntAct=EBI-366083, EBI-373586; P32780:GTF2H1; NbExp=5; IntAct=EBI-366083, EBI-715539; Q13547:HDAC1; NbExp=7; IntAct=EBI-366083, EBI-301834; Q86Z02:HIPK1; NbExp=2; IntAct=EBI-366083, EBI-692891; P61978:HNRNPK; NbExp=2; IntAct=EBI-366083, EBI-304185; P61978-2:HNRNPK; NbExp=2; IntAct=EBI-366083, EBI-7060731; P34931:HSPA1L; NbExp=2; IntAct=EBI-366083, EBI-354912; P38646:HSPA9; NbExp=6; IntAct=EBI-366083, EBI-354932; P04792:HSPB1; NbExp=3; IntAct=EBI-366083, EBI-352682; P42858:HTT; NbExp=4; IntAct=EBI-366083, EBI-466029; Q7Z6Z7:HUWE1; NbExp=3; IntAct=EBI-366083, EBI-625934; Q16666-2:IFI16; NbExp=3; IntAct=EBI-366083, EBI-6273540; Q08619:Ifi205b (xeno); NbExp=2; IntAct=EBI-366083, EBI-8064290; Q92993:KAT5; NbExp=3; IntAct=EBI-366083, EBI-399080; Q9H7Z6:KAT8; NbExp=2; IntAct=EBI-366083, EBI-896414; Q8IZD2:KMT2E; NbExp=4; IntAct=EBI-366083, EBI-2689959; Q16363:LAMA4; NbExp=2; IntAct=EBI-366083, EBI-711505; P43356:MAGEA2B; NbExp=6; IntAct=EBI-366083, EBI-5650739; Q9UBF1:MAGEC2; NbExp=3; IntAct=EBI-366083, EBI-5651487; P46821:MAP1B; NbExp=6; IntAct=EBI-366083, EBI-764611; Q15759:MAPK11; NbExp=2; IntAct=EBI-366083, EBI-298304; Q8IW41:MAPKAPK5; NbExp=2; IntAct=EBI-366083, EBI-1201460; Q00987:MDM2; NbExp=63; IntAct=EBI-366083, EBI-389668; O15151:MDM4; NbExp=13; IntAct=EBI-366083, EBI-398437; Q9UHC7:MKRN1; NbExp=8; IntAct=EBI-366083, EBI-373524; O75970:MPDZ; NbExp=3; IntAct=EBI-366083, EBI-821405; P04731:MT1A; NbExp=3; IntAct=EBI-366083, EBI-8045030; P19338:NCL; NbExp=2; IntAct=EBI-366083, EBI-346967; P23511:NFYA; NbExp=11; IntAct=EBI-366083, EBI-389739; P25208:NFYB; NbExp=6; IntAct=EBI-366083, EBI-389728; Q9Y3T9:NOC2L; NbExp=8; IntAct=EBI-366083, EBI-751547; P06748:NPM1; NbExp=6; IntAct=EBI-366083, EBI-78579; P06748-1:NPM1; NbExp=3; IntAct=EBI-366083, EBI-354150; Q15466:NR0B2; NbExp=3; IntAct=EBI-366083, EBI-3910729; P22736:NR4A1; NbExp=6; IntAct=EBI-366083, EBI-721550; O43847:NRD1; NbExp=6; IntAct=EBI-366083, EBI-2371631; O60285:NUAK1; NbExp=5; IntAct=EBI-366083, EBI-1046789; Q96FW1:OTUB1; NbExp=8; IntAct=EBI-366083, EBI-1058491; Q8TEW0:PARD3; NbExp=3; IntAct=EBI-366083, EBI-81968; P09874:PARP1; NbExp=3; IntAct=EBI-366083, EBI-355676; Q96KB5:PBK; NbExp=7; IntAct=EBI-366083, EBI-536853; O75925:PIAS1; NbExp=4; IntAct=EBI-366083, EBI-629434; O75928:PIAS2; NbExp=2; IntAct=EBI-366083, EBI-348555; Q8N2W9:PIAS4; NbExp=2; IntAct=EBI-366083, EBI-473160; Q13526:PIN1; NbExp=12; IntAct=EBI-366083, EBI-714158; P53350:PLK1; NbExp=6; IntAct=EBI-366083, EBI-476768; P29590:PML; NbExp=4; IntAct=EBI-366083, EBI-295890; P30405:PPIF; NbExp=4; IntAct=EBI-366083, EBI-5544229; P36873-1:PPP1CC; NbExp=2; IntAct=EBI-366083, EBI-356289; Q8WUF5:PPP1R13L; NbExp=11; IntAct=EBI-366083, EBI-5550163; P30153:PPP2R1A; NbExp=3; IntAct=EBI-366083, EBI-302388; Q13362:PPP2R5C; NbExp=4; IntAct=EBI-366083, EBI-1266156; P61289:PSME3; NbExp=7; IntAct=EBI-366083, EBI-355546; Q05397:PTK2; NbExp=13; IntAct=EBI-366083, EBI-702142; Q06609:RAD51; NbExp=2; IntAct=EBI-366083, EBI-297202; Q96PM5:RCHY1; NbExp=7; IntAct=EBI-366083, EBI-947779; Q8N488:RYBP; NbExp=3; IntAct=EBI-366083, EBI-752324; P23297:S100A1; NbExp=2; IntAct=EBI-366083, EBI-743686; P29034:S100A2; NbExp=2; IntAct=EBI-366083, EBI-752230; P26447:S100A4; NbExp=7; IntAct=EBI-366083, EBI-717058; P04271:S100B; NbExp=2; IntAct=EBI-366083, EBI-458391; Q15424:SAFB; NbExp=5; IntAct=EBI-366083, EBI-348298; Q8WTS6:SETD7; NbExp=6; IntAct=EBI-366083, EBI-1268586; P31947:SFN; NbExp=4; IntAct=EBI-366083, EBI-476295; Q96ST3:SIN3A; NbExp=2; IntAct=EBI-366083, EBI-347218; Q96EB6:SIRT1; NbExp=13; IntAct=EBI-366083, EBI-1802965; Q923E4:Sirt1 (xeno); NbExp=4; IntAct=EBI-366083, EBI-1802585; Q15796:SMAD2; NbExp=4; IntAct=EBI-366083, EBI-1040141; O95863:SNAI1; NbExp=2; IntAct=EBI-366083, EBI-1045459; Q12772:SREBF2; NbExp=3; IntAct=EBI-366083, EBI-465059; Q96SB4:SRPK1; NbExp=3; IntAct=EBI-366083, EBI-539478; Q86TM6:SYVN1; NbExp=5; IntAct=EBI-366083, EBI-947849; P20226:TBP; NbExp=2; IntAct=EBI-366083, EBI-355371; Q96GM8:TOE1; NbExp=3; IntAct=EBI-366083, EBI-717460; Q12888:TP53BP1; NbExp=2; IntAct=EBI-366083, EBI-396540; Q13625:TP53BP2; NbExp=2; IntAct=EBI-366083, EBI-77642; Q9H3D4:TP63; NbExp=5; IntAct=EBI-366083, EBI-2337775; O88898:Tp63 (xeno); NbExp=2; IntAct=EBI-366083, EBI-2338025; P13693:TPT1; NbExp=5; IntAct=EBI-366083, EBI-1783169; Q15672:TWIST1; NbExp=9; IntAct=EBI-366083, EBI-1797287; P26687:Twist1 (xeno); NbExp=4; IntAct=EBI-366083, EBI-6123119; P0CG48:UBC; NbExp=15; IntAct=EBI-366083, EBI-3390054; Q05086:UBE3A; NbExp=3; IntAct=EBI-366083, EBI-954357; Q96PU4:UHRF2; NbExp=3; IntAct=EBI-366083, EBI-625304; Q9H9J4:USP42; NbExp=2; IntAct=EBI-366083, EBI-2513638; Q9H9J4-2:USP42; NbExp=2; IntAct=EBI-366083, EBI-9118105; Q93009:USP7; NbExp=17; IntAct=EBI-366083, EBI-302474; P11473:VDR; NbExp=6; IntAct=EBI-366083, EBI-286357; Q99986:VRK1; NbExp=9; IntAct=EBI-366083, EBI-1769146; Q14191:WRN; NbExp=5; IntAct=EBI-366083, EBI-368417; P12956:XRCC6; NbExp=2; IntAct=EBI-366083, EBI-353208; P61981:YWHAG; NbExp=5; IntAct=EBI-366083, EBI-359832; P63104:YWHAZ; NbExp=2; IntAct=EBI-366083, EBI-347088; Q8TAQ5:ZNF420; NbExp=4; IntAct=EBI-366083, EBI-3923307; Q9PST7:znf585b (xeno); NbExp=3; IntAct=EBI-366083, EBI-1782562;', 'SUBCELLULAR LOCATION: Cytoplasm. Nucleus. Nucleus, PML body. Endoplasmic reticulum. Mitochondrion matrix. Note=Interaction with BANP promotes nuclear localization. Recruited into PML bodies together with CHEK2. Translocates to mitochondria upon oxidative stress.', 'SUBCELLULAR LOCATION: Isoform 1: Nucleus. Cytoplasm. Note=Predominantly nuclear but localizes to the cytoplasm when expressed with isoform 4.', 'SUBCELLULAR LOCATION: Isoform 2: Nucleus. Cytoplasm. Note=Localized mainly in the nucleus with minor staining in the cytoplasm.', 'SUBCELLULAR LOCATION: Isoform 3: Nucleus. Cytoplasm. Note=Localized in the nucleus in most cells but found in the cytoplasm in some cells.', 'SUBCELLULAR LOCATION: Isoform 4: Nucleus. Cytoplasm. Note=Predominantly nuclear but translocates to the cytoplasm following cell stress.', 'SUBCELLULAR LOCATION: Isoform 7: Nucleus. Cytoplasm. Note=Localized mainly in the nucleus with minor staining in the cytoplasm.', 'SUBCELLULAR LOCATION: Isoform 8: Nucleus. Cytoplasm. Note=Localized in both nucleus and cytoplasm in most cells. In some cells, forms foci in the nucleus that are different from nucleoli.', 'SUBCELLULAR LOCATION: Isoform 9: Cytoplasm.', 'ALTERNATIVE PRODUCTS: Event=Alternative promoter usage, Alternative splicing; Named isoforms=9; Name=1; Synonyms=p53, p53alpha; IsoId=P04637-1; Sequence=Displayed; Name=2; Synonyms=I9RET, p53beta; IsoId=P04637-2; Sequence=VSP_006535, VSP_006536; Note=Expressed in quiescent lymphocytes. Seems to be non-functional. May be produced at very low levels due to a premature stop codon in the mRNA, leading to nonsense-mediated mRNA decay.; Name=3; Synonyms=p53gamma; IsoId=P04637-3; Sequence=VSP_040560, VSP_040561; Note=Expressed in quiescent lymphocytes. Seems to be non-functional. May be produced at very low levels due to a premature stop codon in the mRNA, leading to nonsense-mediated mRNA decay.; Name=4; Synonyms=Del40-p53, Del40-p53alpha, p47; IsoId=P04637-4; Sequence=VSP_040832; Name=5; Synonyms=Del40-p53beta; IsoId=P04637-5; Sequence=VSP_040832, VSP_006535, VSP_006536; Name=6; Synonyms=Del40-p53gamma; IsoId=P04637-6; Sequence=VSP_040832, VSP_040560, VSP_040561; Name=7; Synonyms=Del133-p53, Del133-p53alpha; IsoId=P04637-7; Sequence=VSP_040833; Note=Produced by alternative promoter usage.; Name=8; Synonyms=Del133-p53beta; IsoId=P04637-8; Sequence=VSP_040833, VSP_006535, VSP_006536; Note=Produced by alternative promoter usage and alternative splicing.; Name=9; Synonyms=Del133-p53gamma; IsoId=P04637-9; Sequence=VSP_040833, VSP_040560, VSP_040561; Note=Produced by alternative promoter usage and alternative splicing.;', 'TISSUE SPECIFICITY: Ubiquitous. Isoforms are expressed in a wide range of normal tissues but in a tissue-dependent manner. Isoform 2 is expressed in most normal tissues but is not detected in brain, lung, prostate, muscle, fetal brain, spinal cord and fetal liver. Isoform 3 is expressed in most normal tissues but is not detected in lung, spleen, testis, fetal brain, spinal cord and fetal liver. Isoform 7 is expressed in most normal tissues but is not detected in prostate, uterus, skeletal muscle and breast. Isoform 8 is detected only in colon, bone marrow, testis, fetal brain and intestine. Isoform 9 is expressed in most normal tissues but is not detected in brain, heart, lung, fetal liver, salivary gland, breast or intestine. {ECO:0000269|PubMed:16131611}.', 'INDUCTION: Up-regulated in response to DNA damage. Isoform 2 is not induced in tumor cells in response to stress. {ECO:0000269|PubMed:10570149, ECO:0000269|PubMed:16131611}.', 'DOMAIN: The nuclear export signal acts as a transcriptional repression domain. The TADI and TADII motifs (residues 17 to 25 and 48 to 56) correspond both to 9aaTAD motifs which are transactivation domains present in a large number of yeast and animal transcription factors. {ECO:0000269|PubMed:17467953}.', 'PTM: Acetylated. Acetylation of Lys-382 by CREBBP enhances transcriptional activity. Deacetylation of Lys-382 by SIRT1 impairs its ability to induce proapoptotic program and modulate cell senescence. Deacetylation by SIRT2 impairs its ability to induce transcription activation in a AKT-dependent manner. {ECO:0000269|PubMed:10656795, ECO:0000269|PubMed:19608861, ECO:0000269|PubMed:20228809, ECO:0000269|PubMed:23431171}.', 'PTM: Phosphorylation on Ser residues mediates transcriptional activation. Phosphorylated by HIPK1 (By similarity). Phosphorylation at Ser-9 by HIPK4 increases repression activity on BIRC5 promoter. Phosphorylated on Thr-18 by VRK1. Phosphorylated on Ser-20 by CHEK2 in response to DNA damage, which prevents ubiquitination by MDM2. Phosphorylated on Ser-20 by PLK3 in response to reactive oxygen species (ROS), promoting p53/TP53- mediated apoptosis. Phosphorylated on Thr-55 by TAF1, which promotes MDM2-mediated degradation. Phosphorylated on Ser-33 by CDK7 in a CAK complex in response to DNA damage. Phosphorylated on Ser-46 by HIPK2 upon UV irradiation. Phosphorylation on Ser-46 is required for acetylation by CREBBP. Phosphorylated on Ser-392 following UV but not gamma irradiation. Phosphorylated on Ser-15 upon ultraviolet irradiation; which is enhanced by interaction with BANP. Phosphorylated by NUAK1 at Ser-15 and Ser-392; was intially thought to be mediated by STK11/LKB1 but it was later shown that it is indirect and that STK11/LKB1-dependent phosphorylation is probably mediated by downstream NUAK1 (PubMed:21317932). It is unclear whether AMP directly mediates phosphorylation at Ser-15. Phosphorylated on Thr-18 by isoform 1 and isoform 2 of VRK2. Phosphorylation on Thr-18 by isoform 2 of VRK2 results in a reduction in ubiquitination by MDM2 and an increase in acetylation by EP300. Stabilized by CDK5-mediated phosphorylation in response to genotoxic and oxidative stresses at Ser-15, Ser-33 and Ser-46, leading to accumulation of p53/TP53, particularly in the nucleus, thus inducing the transactivation of p53/TP53 target genes. Phosphorylated by DYRK2 at Ser-46 in response to genotoxic stress. Phosphorylated at Ser-315 and Ser- 392 by CDK2 in response to DNA-damage. {ECO:0000250, ECO:0000269|PubMed:10570149, ECO:0000269|PubMed:10606744, ECO:0000269|PubMed:10884347, ECO:0000269|PubMed:10951572, ECO:0000269|PubMed:11239457, ECO:0000269|PubMed:11447225, ECO:0000269|PubMed:11546806, ECO:0000269|PubMed:11551930, ECO:0000269|PubMed:11554766, ECO:0000269|PubMed:11740489, ECO:0000269|PubMed:11780126, ECO:0000269|PubMed:12810724, ECO:0000269|PubMed:14702041, ECO:0000269|PubMed:15053879, ECO:0000269|PubMed:15701641, ECO:0000269|PubMed:15866171, ECO:0000269|PubMed:16377624, ECO:0000269|PubMed:16704422, ECO:0000269|PubMed:1705009, ECO:0000269|PubMed:17108107, ECO:0000269|PubMed:17254968, ECO:0000269|PubMed:17349958, ECO:0000269|PubMed:17591690, ECO:0000269|PubMed:17967874, ECO:0000269|PubMed:18022393, ECO:0000269|PubMed:20041275, ECO:0000269|PubMed:20124405, ECO:0000269|PubMed:20959462, ECO:0000269|PubMed:21317932, ECO:0000269|PubMed:2141171, ECO:0000269|PubMed:22214662, ECO:0000269|PubMed:9372954}.', 'PTM: Dephosphorylated by PP2A-PPP2R5C holoenzyme at Thr-55. SV40 small T antigen inhibits the dephosphorylation by the AC form of PP2A.', 'PTM: May be O-glycosylated in the C-terminal basic region. Studied in EB-1 cell line. {ECO:0000269|PubMed:8632915}.', 'PTM: Ubiquitinated by MDM2 and SYVN1, which leads to proteasomal degradation. Ubiquitinated by RFWD3, which works in cooperation with MDM2 and may catalyze the formation of short polyubiquitin chains on p53/TP53 that are not targeted to the proteasome. Ubiquitinated by MKRN1 at Lys-291 and Lys-292, which leads to proteasomal degradation. Deubiquitinated by USP10, leading to its stabilization. Ubiquitinated by TRIM24, RFFL and RNF34, which leads to proteasomal degradation. Ubiquitination by TOPORS induces degradation. Deubiquitination by USP7, leading to stabilization. Isoform 4 is monoubiquitinated in an MDM2-independent manner. Ubiquitinated by RFWD2, which leads to proteasomal degradation. {ECO:0000269|PubMed:17121812, ECO:0000269|PubMed:19536131}.', 'PTM: Monomethylated at Lys-372 by SETD7, leading to stabilization and increased transcriptional activation. Monomethylated at Lys- 370 by SMYD2, leading to decreased DNA-binding activity and subsequent transcriptional regulation activity. Lys-372 monomethylation prevents interaction with SMYD2 and subsequent monomethylation at Lys-370. Dimethylated at Lys-373 by EHMT1 and EHMT2. Monomethylated at Lys-382 by SETD8, promoting interaction with L3MBTL1 and leading to repress transcriptional activity. Dimethylation at Lys-370 and Lys-382 diminishes p53 ubiquitination, through stabilizing association with the methyl reader PHF20. Demethylation of dimethylated Lys-370 by KDM1A prevents interaction with TP53BP1 and represses TP53-mediated transcriptional activation.', 'PTM: Sumoylated with SUMO1. Sumoylated at Lys-386 by UBC9. {ECO:0000269|PubMed:11124955, ECO:0000269|PubMed:22214662, ECO:0000269|Ref.35}.', 'DISEASE: Note=TP53 is found in increased amounts in a wide variety of transformed cells. TP53 is frequently mutated or inactivated in about 60% of cancers. TP53 defects are found in Barrett metaplasia a condition in which the normally stratified squamous epithelium of the lower esophagus is replaced by a metaplastic columnar epithelium. The condition develops as a complication in approximately 10% of patients with chronic gastroesophageal reflux disease and predisposes to the development of esophageal adenocarcinoma.', 'DISEASE: Esophageal cancer (ESCR) [MIM:133239]: A malignancy of the esophagus. The most common types are esophageal squamous cell carcinoma and adenocarcinoma. Cancer of the esophagus remains a devastating disease because it is usually not detected until it has progressed to an advanced incurable stage. Note=The disease is caused by mutations affecting the gene represented in this entry.', 'DISEASE: Li-Fraumeni syndrome (LFS) [MIM:151623]: Autosomal dominant familial cancer syndrome that in its classic form is defined by the existence of a proband affected by a sarcoma before 45 years with a first degree relative affected by any tumor before 45 years and another first degree relative with any tumor before 45 years or a sarcoma at any age. Other clinical definitions for LFS have been proposed (PubMed:8118819 and PubMed:8718514) and called Li-Fraumeni like syndrome (LFL). In these families affected relatives develop a diverse set of malignancies at unusually early ages. Four types of cancers account for 80% of tumors occurring in TP53 germline mutation carriers: breast cancers, soft tissue and bone sarcomas, brain tumors (astrocytomas) and adrenocortical carcinomas. Less frequent tumors include choroid plexus carcinoma or papilloma before the age of 15, rhabdomyosarcoma before the age of 5, leukemia, Wilms tumor, malignant phyllodes tumor, colorectal and gastric cancers. {ECO:0000269|PubMed:10484981, ECO:0000269|PubMed:1565144, ECO:0000269|PubMed:1737852, ECO:0000269|PubMed:1933902, ECO:0000269|PubMed:1978757, ECO:0000269|PubMed:2259385, ECO:0000269|PubMed:7887414, ECO:0000269|PubMed:8825920, ECO:0000269|PubMed:9452042}. Note=The disease is caused by mutations affecting the gene represented in this entry.', 'DISEASE: Squamous cell carcinoma of the head and neck (HNSCC) [MIM:275355]: A non-melanoma skin cancer affecting the head and neck. The hallmark of cutaneous SCC is malignant transformation of normal epidermal keratinocytes. Note=The gene represented in this entry is involved in disease pathogenesis.', 'DISEASE: Lung cancer (LNCR) [MIM:211980]: A common malignancy affecting tissues of the lung. The most common form of lung cancer is non-small cell lung cancer (NSCLC) that can be divided into 3 major histologic subtypes: squamous cell carcinoma, adenocarcinoma, and large cell lung cancer. NSCLC is often diagnosed at an advanced stage and has a poor prognosis. Note=The disease is caused by mutations affecting the gene represented in this entry.', 'DISEASE: Papilloma of choroid plexus (CPP) [MIM:260500]: A benign tumor of neuroectodermal origin that generally occurs in childhood, but has also been reported in adults. Although generally found within the ventricular system, choroid plexus papillomas can arise ectopically in the brain parenchyma or disseminate throughout the neuraxis. Patients present with signs and symptoms of increased intracranial pressure including headache, hydrocephalus, papilledema, nausea, vomiting, cranial nerve deficits, gait impairment, and seizures. {ECO:0000269|PubMed:12085209}. Note=The disease is caused by mutations affecting the gene represented in this entry.', 'DISEASE: Adrenocortical carcinoma (ADCC) [MIM:202300]: A malignant neoplasm of the adrenal cortex and a rare childhood tumor. It occurs with increased frequency in patients with Beckwith- Wiedemann syndrome and Li-Fraumeni syndrome. {ECO:0000269|PubMed:11481490}. Note=The disease is caused by mutations affecting the gene represented in this entry.', 'DISEASE: Basal cell carcinoma 7 (BCC7) [MIM:614740]: A common malignant skin neoplasm that typically appears on hair-bearing skin, most commonly on sun-exposed areas. It is slow growing and rarely metastasizes, but has potentialities for local invasion and destruction. It usually develops as a flat, firm, pale area that is small, raised, pink or red, translucent, shiny, and waxy, and the area may bleed following minor injury. Tumor size can vary from a few millimeters to several centimeters in diameter. {ECO:0000269|PubMed:21946351}. Note=Disease susceptibility is associated with variations affecting the gene represented in this entry.', 'SIMILARITY: Belongs to the p53 family. {ECO:0000305}.', 'WEB RESOURCE: Name=IARC TP53 mutation database; Note=Somatic and germline TP53 mutations in human cancers; URL=\"http://p53.iarc.fr/\";', 'WEB RESOURCE: Name=The TP53 mutant web site; Note=TP53 mutations and cancer; URL=\"http://p53.fr/\";', 'WEB RESOURCE: Name=Atlas of Genetics and Cytogenetics in Oncology and Haematology; URL=\"http://atlasgeneticsoncology.org/Genes/P53ID88.html\";', 'WEB RESOURCE: Name=NIEHS-SNPs; URL=\"http://egp.gs.washington.edu/data/tp53/\";', 'WEB RESOURCE: Name=SHMPD; Note=The Singapore human mutation and polymorphism database; URL=\"http://shmpd.bii.a-star.edu.sg/gene.php?genestart=A&genename=TP53\";', 'WEB RESOURCE: Name=Wikipedia; Note=P53 entry; URL=\"http://en.wikipedia.org/wiki/P53\";']\n", "['3D-structure', 'Acetylation', 'Activator', 'Alternative promoter usage', 'Alternative splicing', 'Apoptosis', 'Cell cycle', 'Complete proteome', 'Cytoplasm', 'Disease mutation', 'DNA-binding', 'Endoplasmic reticulum', 'Glycoprotein', 'Host-virus interaction', 'Isopeptide bond', 'Li-Fraumeni syndrome', 'Metal-binding', 'Methylation', 'Mitochondrion', 'Necrosis', 'Nucleus', 'Phosphoprotein', 'Polymorphism', 'Reference proteome', 'Transcription', 'Transcription regulation', 'Tumor suppressor', 'Ubl conjugation', 'Zinc']\n" ] } ], "source": [ "print(sp_rec.comments)\n", "print(sp_rec.keywords)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Help on Record in module Bio.SwissProt object:\n", "\n", "class Record(__builtin__.object)\n", " | Holds information from a SwissProt record.\n", " | \n", " | Members:\n", " | \n", " | - entry_name Name of this entry, e.g. RL1_ECOLI.\n", " | - data_class Either 'STANDARD' or 'PRELIMINARY'.\n", " | - molecule_type Type of molecule, 'PRT',\n", " | - sequence_length Number of residues.\n", " | \n", " | - accessions List of the accession numbers, e.g. ['P00321']\n", " | - created A tuple of (date, release).\n", " | - sequence_update A tuple of (date, release).\n", " | - annotation_update A tuple of (date, release).\n", " | \n", " | - description Free-format description.\n", " | - gene_name Gene name. See userman.txt for description.\n", " | - organism The source of the sequence.\n", " | - organelle The origin of the sequence.\n", " | - organism_classification The taxonomy classification. List of strings.\n", " | (http://www.ncbi.nlm.nih.gov/Taxonomy/)\n", " | - taxonomy_id A list of NCBI taxonomy id's.\n", " | - host_organism A list of names of the hosts of a virus, if any.\n", " | - host_taxonomy_id A list of NCBI taxonomy id's of the hosts, if any.\n", " | - references List of Reference objects.\n", " | - comments List of strings.\n", " | - cross_references List of tuples (db, id1[, id2][, id3]). See the docs.\n", " | - keywords List of the keywords.\n", " | - features List of tuples (key name, from, to, description).\n", " | from and to can be either integers for the residue\n", " | numbers, '<', '>', or '?'\n", " | \n", " | - seqinfo tuple of (length, molecular weight, CRC32 value)\n", " | - sequence The sequence.\n", " | \n", " | Methods defined here:\n", " | \n", " | __init__(self)\n", " | \n", " | ----------------------------------------------------------------------\n", " | Data descriptors defined here:\n", " | \n", " | __dict__\n", " | dictionary for instance variables (if defined)\n", " | \n", " | __weakref__\n", " | list of weak references to the object (if defined)\n", "\n" ] } ], "source": [ "help(sp_rec)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1493\n", "('CHAIN', 1, 393, 'Cellular tumor antigen p53.', 'PRO_0000185703')\n", "('DNA_BIND', 102, 292, '', '')\n", "('REGION', 1, 83, 'Interaction with HRMT1L2.', '')\n", "('MOTIF', 17, 25, 'TADI.', '')\n", "('METAL', 176, 176, 'Zinc.', '')\n", "('SITE', 120, 120, 'Interaction with DNA.', '')\n", "('MOD_RES', 9, 9, 'Phosphoserine; by HIPK4. {ECO:0000269|PubMed:18022393}.', '')\n", "('CROSSLNK', 291, 291, 'Glycyl lysine isopeptide (Lys-Gly) (interchain with G-Cter in ubiquitin). {ECO:0000269|PubMed:19536131}.', '')\n", "('VAR_SEQ', 1, 132, 'Missing (in isoform 7, isoform 8 and isoform 9). {ECO:0000303|PubMed:16131611}.', 'VSP_040833')\n", "('VARIANT', 5, 5, 'Q -> H (in a sporadic cancer; somatic mutation; abolishes strongly phosphorylation).', 'VAR_044543')\n", "('MUTAGEN', 15, 15, 'S->A: Loss of interaction with PPP2R5C, PPP2CA AND PPP2R1A. {ECO:0000269|PubMed:17967874}.', '')\n", "('HELIX', 19, 23, '{ECO:0000244|PDB:3DAC}.', '')\n", "('STRAND', 27, 29, '{ECO:0000244|PDB:2K8F}.', '')\n", "('TURN', 105, 108, '{ECO:0000244|PDB:3D06}.', '')\n", "604\n", "['GeneReviews', 'DNASU', 'MIM', 'SUPFAM', 'Genevestigator', 'HOVERGEN', 'ExpressionAtlas', 'MaxQB', 'GeneWiki', 'SMR', 'Orphanet', 'CTD', 'GO', 'PhylomeDB', 'CCDS', 'neXtProt', 'BindingDB', 'RefSeq', 'PRIDE', 'DMDM', 'Reactome', 'PROSITE', 'TreeFam', 'SWISS-2DPAGE', 'NextBio', 'DIP', 'PRO', 'PANTHER', 'TCDB', 'Gene3D', 'DrugBank', 'PMAP-CutDB', 'Bgee', 'EvolutionaryTrace', 'ChEMBL', 'PIR', 'InParanoid', 'GeneCards', 'Pfam', 'PDBsum', 'KEGG', 'eggNOG', 'EMBL', 'PaxDb', 'DisProt', 'Proteomes', 'ProteinModelPortal', 'Ensembl', 'ChiTaRS', 'SignaLink', 'HPA', 'IntAct', 'MINT', 'PDB', 'UniGene', 'OMA', 'InterPro', 'PharmGKB', 'PhosphoSite', 'GenomeRNAi', 'KO', 'BioGrid', 'UCSC', 'HGNC', 'PRINTS', 'GeneTree', 'GeneID']\n", "133\n", "('GO:0000785', 'C:chromatin', 'IBA:GO_Central')\n", "('GO:0005524', 'F:ATP binding', 'IDA:UniProtKB')\n", "('GO:0006915', 'P:apoptotic process', 'TAS:Reactome')\n" ] } ], "source": [ "done_features = set()\n", "print(len(sp_rec.features))\n", "for feature in sp_rec.features:\n", " if feature[0] in done_features:\n", " continue\n", " else:\n", " done_features.add(feature[0])\n", " print(feature)\n", "print(len(sp_rec.cross_references))\n", "per_source = defaultdict(list)\n", "for xref in sp_rec.cross_references:\n", " source = xref[0]\n", " per_source[source].append(xref[1:])\n", "print(per_source.keys())\n", "done_GOs = set()\n", "print(len(per_source['GO']))\n", "for annot in per_source['GO']:\n", " if annot[1][0] in done_GOs:\n", " continue\n", " else:\n", " done_GOs.add(annot[1][0])\n", " print(annot)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }