{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from Bio import Entrez, Seq, SeqIO\n", "from Bio.Alphabet import IUPAC" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "Entrez.email = \"put@your_email.here\" \n", "hdl = Entrez.efetch(db='nucleotide', id=['NM_002299'], rettype='fasta') # Lactase gene\n", "#for l in hdl:\n", "# print l\n", "seq = SeqIO.read(hdl, 'fasta')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "SeqRecord(seq=Seq('ATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATGCTGG...TGA', SingleLetterAlphabet()), id='gi|32481205|ref|NM_002299.2|', name='gi|32481205|ref|NM_002299.2|', description='gi|32481205|ref|NM_002299.2| Homo sapiens lactase (LCT), mRNA', dbxrefs=[])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "w_seq = seq[11:5795]\n", "w_seq" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "w_hdl = open('example.fasta', 'w')\n", "SeqIO.write([w_seq], w_hdl, 'fasta')\n", "w_hdl.close()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "gi|32481205|ref|NM_002299.2| Homo sapiens lactase (LCT), mRNA\n", "ATGGAGCTGT\n", "SingleLetterAlphabet()\n" ] } ], "source": [ "recs = SeqIO.parse('example.fasta', 'fasta')\n", "for rec in recs:\n", " print(type(rec))\n", " seq = rec.seq\n", " print(rec.description)\n", " print(seq[:10])\n", " print(seq.alphabet)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Seq('ATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATGCTGG...TGA', IUPACUnambiguousDNA())" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seq = Seq.Seq(str(seq), IUPAC.unambiguous_dna)\n", "seq" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(Seq('ATGGAGCTGTCT', IUPACUnambiguousDNA()), Seq('TCTTCATTCTGA', IUPACUnambiguousDNA()))\n" ] }, { "data": { "text/plain": [ "Seq('AUGGAGCUGUCUUGGCAUGUAGUCUUUAUUGCCCUGCUAAGUUUUUCAUGCUGG...UGA', IUPACUnambiguousRNA())" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print((seq[:12], seq[-12:]))\n", "rna = seq.transcribe()\n", "rna" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Seq('MELSWHVVFIALLSFSCWGSDWESDRNFISTAGPLTNDLLHNLSGLLGDQSSNF...SF*', HasStopCodon(IUPACProtein(), '*'))" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prot = seq.translate()\n", "prot" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }