{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from Bio import Entrez, Medline, SeqIO" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Do not forget to inform NCBI of your email address (change below)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "Entrez.email = \"put@your_email.here\" " ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{u'DbList': ['pubmed', 'protein', 'nuccore', 'nucleotide', 'nucgss', 'nucest', 'structure', 'genome', 'assembly', 'genomeprj', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'clone', 'gap', 'gapplus', 'grasp', 'dbvar', 'epigenomics', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'journals', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'probe', 'proteinclusters', 'pcassay', 'biosystems', 'pccompound', 'pcsubstance', 'pubmedhealth', 'seqannot', 'snp', 'sra', 'taxonomy', 'toolkit', 'toolkitall', 'toolkitbook', 'unigene', 'gencoll', 'gtr']}\n" ] } ], "source": [ "#This gives you the list of available databases\n", "handle = Entrez.einfo()\n", "rec = Entrez.read(handle)\n", "print(rec)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "handle = Entrez.esearch(db=\"nucleotide\", term='CRT[Gene Name] AND \"Plasmodium falciparum\"[Organism]')\n", "rec_list = Entrez.read(handle)\n", "if rec_list['RetMax'] < rec_list['Count']:\n", " handle = Entrez.esearch(db=\"nucleotide\", term='CRT[Gene Name] AND \"Plasmodium falciparum\"[Organism]',\n", " retmax=rec_list['Count'])\n", " rec_list = Entrez.read(handle)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "id_list = rec_list['IdList']\n", "hdl = Entrez.efetch(db='nucleotide', id=id_list, rettype='gb')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "recs = list(SeqIO.parse(hdl, 'gb'))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "KM288867\n", "Plasmodium falciparum clone PF3D7_0709000 chloroquine resistance transporter (CRT) gene, complete cds.\n" ] } ], "source": [ "for rec in recs:\n", " if rec.name == 'KM288867':\n", " break\n", "print(rec.name)\n", "print(rec.description)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "not processed:\n", "type: source\n", "location: [0:10000](+)\n", "qualifiers:\n", " Key: clone, Value: ['PF3D7_0709000']\n", " Key: db_xref, Value: ['taxon:5833']\n", " Key: mol_type, Value: ['genomic DNA']\n", " Key: organism, Value: ['Plasmodium falciparum']\n", "\n", "['CRT']\n", "not processed:\n", "type: mRNA\n", "location: join{[2751:3543](+), [3720:3989](+), [4168:4341](+), [4513:4646](+), [4799:4871](+), [4994:5070](+), [5166:5249](+), [5376:5427](+), [5564:5621](+), [5769:5862](+), [6055:6100](+), [6247:6302](+), [6471:7598](+)}\n", "qualifiers:\n", " Key: gene, Value: ['CRT']\n", " Key: product, Value: ['chloroquine resistance transporter']\n", "Sub-Features\n", "type: mRNA\n", "location: [2751:3543](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [3720:3989](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [4168:4341](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [4513:4646](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [4799:4871](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [4994:5070](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [5166:5249](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [5376:5427](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [5564:5621](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [5769:5862](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [6055:6100](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [6247:6302](+)\n", "qualifiers:\n", "\n", "type: mRNA\n", "location: [6471:7598](+)\n", "qualifiers:\n", "\n", "\n", "not processed:\n", "type: 5'UTR\n", "location: [2751:3452](+)\n", "qualifiers:\n", " Key: gene, Value: ['CRT']\n", "\n", "not processed:\n", "type: primer_bind\n", "location: [2935:2958](+)\n", "qualifiers:\n", "\n", "not processed:\n", "type: primer_bind\n", "location: [3094:3121](+)\n", "qualifiers:\n", "\n", "not processed:\n", "type: CDS\n", "location: join{[3452:3543](+), [3720:3989](+), [4168:4341](+), [4513:4646](+), [4799:4871](+), [4994:5070](+), [5166:5249](+), [5376:5427](+), [5564:5621](+), [5769:5862](+), [6055:6100](+), [6247:6302](+), [6471:6548](+)}\n", "qualifiers:\n", " Key: codon_start, Value: ['1']\n", " Key: db_xref, Value: ['GI:706072609']\n", " Key: gene, Value: ['CRT']\n", " Key: product, Value: ['chloroquine resistance transporter']\n", " Key: protein_id, Value: ['AIW62921.1']\n", " Key: translation, Value: ['MKFASKKNNQKNSSKNDERYRELDNLVQEGNGSRLGGGSCLGKCAHVFKLIFKEIKDNIFIYILSIIYLSVCVMNKIFAKRTLNKIGNYSFVTSETHNFICMIMFFIVYSLFGNKKGNSKERHRSFNLQFFAISMLDACSVILAFIGLTRTTGNIQSFVLQLSIPINMFFCFLILRYRYHLYNYLGAVIIVVTIALVEMKLSFETQEENSIIFNLVLISALIPVCFSNMTREIVFKKYKIDILRLNAMVSFFQLFTSCLILPVYTLPFLKQLHLPYNEIWTNIKNGFACLFLGRNTVVENCGLGMAKLCDDCDGAWKTFALFSFFNICDNLITSYIIDKFSTMTYTIVSCIQGPAIAIAYYFKFLAGDVVREPRLLDFVTLFGYLFGSIIYRVGNIILERKKMRNEENEDSEGELTNVDSIITQ']\n", "Sub-Features\n", "type: CDS\n", "location: [3452:3543](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [3720:3989](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [4168:4341](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [4513:4646](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [4799:4871](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [4994:5070](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [5166:5249](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [5376:5427](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [5564:5621](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [5769:5862](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [6055:6100](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [6247:6302](+)\n", "qualifiers:\n", "\n", "type: CDS\n", "location: [6471:6548](+)\n", "qualifiers:\n", "\n", "\n", "('Exon', ExactPosition(3452), ExactPosition(3543), 1)\n", "('Exon', ExactPosition(3720), ExactPosition(3989), 1)\n", "('Exon', ExactPosition(4168), ExactPosition(4341), 1)\n", "not processed:\n", "type: primer_bind\n", "location: [4288:4323](-)\n", "qualifiers:\n", "\n", "('Exon', ExactPosition(4513), ExactPosition(4646), 1)\n", "('Exon', ExactPosition(4799), ExactPosition(4871), 1)\n", "('Exon', ExactPosition(4994), ExactPosition(5070), 1)\n", "('Exon', ExactPosition(5166), ExactPosition(5249), 1)\n", "('Exon', ExactPosition(5376), ExactPosition(5427), 1)\n", "('Exon', ExactPosition(5564), ExactPosition(5621), 1)\n", "('Exon', ExactPosition(5769), ExactPosition(5862), 1)\n", "('Exon', ExactPosition(6055), ExactPosition(6100), 1)\n", "('Exon', ExactPosition(6247), ExactPosition(6302), 1)\n", "('Exon', ExactPosition(6471), ExactPosition(6548), 1)\n", "not processed:\n", "type: 3'UTR\n", "location: [6548:7598](+)\n", "qualifiers:\n", " Key: gene, Value: ['CRT']\n", "\n", "not processed:\n", "type: primer_bind\n", "location: [7833:7856](-)\n", "qualifiers:\n", "\n" ] } ], "source": [ "for feature in rec.features:\n", " if feature.type == 'gene':\n", " print(feature.qualifiers['gene'])\n", " elif feature.type == 'exon':\n", " loc = feature.location\n", " print('Exon', loc.start, loc.end, loc.strand)\n", " else:\n", " print('not processed:\\n%s' % feature)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sequence_version=1\n", "source=Plasmodium falciparum (malaria parasite P. falciparum)\n", "taxonomy=['Eukaryota', 'Alveolata', 'Apicomplexa', 'Aconoidasida', 'Haemosporida', 'Plasmodium', 'Plasmodium (Laverania)']\n", "keywords=['']\n", "references=[Reference(title='Versatile control of Plasmodium falciparum gene expression with an inducible protein-RNA interaction', ...), Reference(title='Direct Submission', ...)]\n", "accessions=['KM288867']\n", "data_file_division=INV\n", "date=12-NOV-2014\n", "organism=Plasmodium falciparum\n", "gi=706072608\n" ] } ], "source": [ "for name, value in rec.annotations.items():\n", " print('%s=%s' % (name, value))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10000\n" ] } ], "source": [ "print(len(rec.seq))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "25370483\n", "LID: 10.1038/ncomms6329 [doi]\n", "STAT: In-Process\n", "DEP: 20141105\n", "MID: ['NIHMS630149']\n", "DA: 20141105\n", "AID: ['ncomms6329 [pii]', '10.1038/ncomms6329 [doi]']\n", "CRDT: ['2014/11/06 06:00']\n", "DP: 2014\n", "GR: ['1DP2OD007124/OD/NIH HHS/United States', '5-T32-ES007020/ES/NIEHS NIH HHS/United States', '5-T32-GM08334/GM/NIGMS NIH HHS/United States', 'DP2 OD007124/OD/NIH HHS/United States', 'P30 ES002109/ES/NIEHS NIH HHS/United States']\n", "OWN: NLM\n", "PT: ['Journal Article', 'Research Support, N.I.H., Extramural', \"Research Support, Non-U.S. Gov't\"]\n", "LA: ['eng']\n", "FAU: ['Goldfless, Stephen J', 'Wagner, Jeffrey C', 'Niles, Jacquin C']\n", "JT: Nature communications\n", "LR: 20150117\n", "PG: 5329\n", "TI: Versatile control of Plasmodium falciparum gene expression with an inducible protein-RNA interaction.\n", "PMCR: ['2015/05/05 00:00']\n", "PL: England\n", "TA: Nat Commun\n", "JID: 101528555\n", "AB: The available tools for conditional gene expression in Plasmodium falciparum are limited. Here, to enable reliable control of target gene expression, we build a system to efficiently modulate translation. We overcame several problems associated with other approaches for regulating gene expression in P. falciparum. Specifically, our system functions predictably across several native and engineered promoter contexts, and affords control over reporter and native parasite proteins irrespective of their subcellular compartmentalization. Induction and repression of gene expression are rapid, homogeneous and stable over prolonged periods. To demonstrate practical application of our system, we used it to reveal direct links between antimalarial drugs and their native parasite molecular target. This is an important outcome given the rapid spread of resistance, and intensified efforts to efficiently discover and optimize new antimalarial drugs. Overall, the studies presented highlight the utility of our system for broadly controlling gene expression and performing functional genetics in P. falciparum.\n", "AD: Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA. Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA. Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA.\n", "VI: 5\n", "IS: 2041-1723 (Electronic) 2041-1723 (Linking)\n", "PMC: PMC4223869\n", "AU: ['Goldfless SJ', 'Wagner JC', 'Niles JC']\n", "MHDA: 2014/11/06 06:00\n", "PHST: ['2014/04/15 [received]', '2014/09/20 [accepted]']\n", "OID: ['NLM: NIHMS630149 [Available on 05/05/15]', 'NLM: PMC4223869 [Available on 05/05/15]']\n", "EDAT: 2014/11/06 06:00\n", "SI: ['GENBANK/KM288848', 'GENBANK/KM288849', 'GENBANK/KM288850', 'GENBANK/KM288851', 'GENBANK/KM288852', 'GENBANK/KM288853', 'GENBANK/KM288854', 'GENBANK/KM288855', 'GENBANK/KM288856', 'GENBANK/KM288857', 'GENBANK/KM288858', 'GENBANK/KM288859', 'GENBANK/KM288860', 'GENBANK/KM288861', 'GENBANK/KM288862', 'GENBANK/KM288863', 'GENBANK/KM288864', 'GENBANK/KM288865', 'GENBANK/KM288866', 'GENBANK/KM288867']\n", "SO: Nat Commun. 2014 Nov 5;5:5329. doi: 10.1038/ncomms6329.\n", "SB: IM\n", "PMID: 25370483\n", "PST: epublish\n" ] } ], "source": [ "refs = rec.annotations['references']\n", "for ref in refs:\n", " if ref.pubmed_id != '':\n", " print(ref.pubmed_id)\n", " handle = Entrez.efetch(db=\"pubmed\", id=[ref.pubmed_id],\n", " rettype=\"medline\", retmode=\"text\")\n", " records = Medline.parse(handle)\n", " for med_rec in records:\n", " for k, v in med_rec.items():\n", " print('%s: %s' % (k, v))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }