{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## epitopepredict plotting\n", "\n", "Testing code for plotting results with mhcpredict. We use matplotlib and bokeh." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(global) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " if (typeof (window._bokeh_onload_callbacks) === \"undefined\") {\n", " window._bokeh_onload_callbacks = [];\n", " }\n", "\n", " function run_callbacks() {\n", " window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", " delete window._bokeh_onload_callbacks\n", " console.info(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(js_urls, callback) {\n", " window._bokeh_onload_callbacks.push(callback);\n", " if (window._bokeh_is_loading > 0) {\n", " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls == null || js_urls.length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " window._bokeh_is_loading = js_urls.length;\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " var s = document.createElement('script');\n", " s.src = url;\n", " s.async = false;\n", " s.onreadystatechange = s.onload = function() {\n", " window._bokeh_is_loading--;\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", " run_callbacks()\n", " }\n", " };\n", " s.onerror = function() {\n", " console.warn(\"failed to load library \" + url);\n", " };\n", " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", " }\n", " };\n", "\n", " var js_urls = ['https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.11.1.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-compiler-0.11.1.min.js'];\n", "\n", " var inline_js = [\n", " function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", " \n", " function(Bokeh) {\n", " Bokeh.$(\"#1abd4f1c-3e09-4900-8838-b422a24538af\").text(\"BokehJS successfully loaded\");\n", " },\n", " function(Bokeh) {\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.11.1.min.css\");\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.11.1.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.11.1.min.css\");\n", " }\n", " ];\n", "\n", " function run_inline_js() {\n", " for (var i = 0; i < inline_js.length; i++) {\n", " inline_js[i](window.Bokeh);\n", " }\n", " }\n", "\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", " run_inline_js();\n", " } else {\n", " load_libs(js_urls, function() {\n", " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", "}(this));" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from bokeh.io import output_notebook, show\n", "output_notebook()\n", "import os, types\n", "from collections import OrderedDict\n", "import numpy as np\n", "import pandas as pd\n", "from bokeh.plotting import Figure\n", "from bokeh.models import Grid, Range1d, ColumnDataSource, HoverTool\n", "from bokeh.models.renderers import GlyphRenderer\n", "from epitopepredict import base, sequtils, analysis\n", "genbankfile = 'testing/zaire-ebolavirus.gb'" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "predictions done for 9 proteins in 3 alleles\n" ] } ], "source": [ "df = sequtils.genbank2Dataframe(genbankfile, cds=True)\n", "reload(base)\n", "P = base.getPredictor('tepitope')\n", "savepath1 = 'tepitope'\n", "#run prediction for several alleles and save results to savepath\n", "alleles = [\"HLA-DRB1*0101\", \"HLA-DRB1*0108\", \"HLA-DRB1*0305\", \"HLA-DRB1*0401\", \n", " \"HLA-DRB1*0404\", \"HLA-DRB3*0101\", \"HLA-DRB4*0104\"]\n", "#P.predictProteins(df,length=11,alleles=alleles,save=True,path=savepath1)\n", "\n", "#iedb mhcI\n", "P2 = base.getPredictor('iedbmhc1')\n", "savepath2 = 'iedbmhc1'\n", "mhc1alleles = [\"HLA-A*01:01\",\"HLA-A*68:02\"]\n", "mhc1alleles = pd.read_csv('testing/mhc1_common.csv').allele[:3]\n", "P2.predictProteins(df,length=11,alleles=mhc1alleles,save=True,path=savepath2)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "filename = 'tepitope/ZEBOVgp5.mpk'\n", "filename2 = 'iedbmhc1/ZEBOVgp5.mpk'\n", "P.data = pd.read_msgpack(filename)\n", "P2.data = pd.read_msgpack(filename2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## a self contained version of plotTracks from epitopemap" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "colormaps={'tepitope':'Greens','netmhciipan':'Oranges','iedbmhc2':'Pinks',\n", " 'threading':'Purples','iedbmhc1':'Blues'}\n", "colors = {'tepitope':'green','netmhciipan':'orange',\n", " 'iedbmhc1':'blue','iedbmhc2':'pink','threading':'purple'}\n", "\n", "def plotTracks(predictors, title='', alleles=2, width=900, height=None,\n", " seqdepot=None, bcell=None, exp=None, tools=True):\n", " \"\"\"Plot binding predictions in multiple alleles for a single protein.\n", " predictors: a dictionary of Predictor objects\n", " with their predicted binder data usually for a single protein. If data from \n", " multiple proteins is provided the first one is used\n", " alleles: the minimum number of alleles for a binder to be shown\n", " \"\"\"\n", "\n", " from collections import OrderedDict\n", " from bokeh.palettes import Spectral3\n", " \n", " if type(predictors) is not types.ListType:\n", " predictors = [predictors]\n", " if tools == True:\n", " tools=\"xpan, xwheel_zoom, resize, hover, reset, save\"\n", " else:\n", " tools=''\n", " \n", " #get title from the dataframe?\n", " \n", " alls=1\n", " n = alleles\n", " for p in predictors:\n", " alls += len(p.data.groupby('allele'))\n", " if height==None:\n", " height = 130+10*alls\n", " yrange = Range1d(start=0, end=alls+3)\n", " plot = Figure(title=title,title_text_font_size=\"11pt\",plot_width=width,\n", " plot_height=height, y_range=yrange,\n", " y_axis_label='allele',\n", " tools=tools,\n", " background_fill_color=\"#FAFAFA\",\n", " toolbar_location=\"below\")\n", " h=3\n", " '''if bcell != None:\n", " plotBCell(plot, bcell, alls)\n", " if seqdepot != None:\n", " plotAnnotations(plot,seqdepot)\n", " if exp is not None:\n", " plotExp(plot, exp)'''\n", "\n", " #lists for hover data\n", " #we plot all rects at once\n", " x=[];y=[];allele=[];widths=[];clrs=[];peptide=[]\n", " predictor=[];position=[];score=[];leg=[]\n", " l=80\n", " for pred in predictors: \n", " m = pred.name\n", " print m, pred \n", " df = pred.data \n", " sckey = pred.scorekey\n", " pb = pred.getPromiscuousBinders(data=df,n=n)\n", " if len(pb) == 0:\n", " continue\n", " l = pred.getLength()\n", " grps = df.groupby('allele')\n", " alleles = grps.groups.keys()\n", " if len(pb)==0:\n", " continue\n", " c=colors[m]\n", " leg.append(m)\n", "\n", " for a,g in grps:\n", " b = pred.getBinders(data=g) \n", " b = b[b.pos.isin(pb.pos)] #only promiscuous\n", " b.sort_values('pos',inplace=True)\n", " scores = b[sckey].values\n", " score.extend(scores)\n", " pos = b['pos'].values\n", " position.extend(pos)\n", " x.extend(pos+(l/2.0)) #offset as coords are rect centers\n", " widths.extend([l for i in scores])\n", " clrs.extend([c for i in scores])\n", " y.extend([h+0.5 for i in scores])\n", " alls = [a for i in scores]\n", " allele.extend(alls)\n", " peptide.extend(list(b.peptide.values))\n", " predictor.extend([m for i in scores])\n", " h+=1\n", "\n", " source = ColumnDataSource(data=dict(x=x,y=y,allele=allele,peptide=peptide,\n", " predictor=predictor,position=position,score=score))\n", " plot.rect(x,y, width=widths, height=0.8,\n", " #x_range=Range1d(start=1, end=seqlen+l),\n", " color=clrs,line_color='gray',alpha=0.7,source=source)\n", " \n", " hover = plot.select(dict(type=HoverTool))\n", " hover.tooltips = OrderedDict([\n", " (\"allele\", \"@allele\"),\n", " (\"position\", \"@position\"),\n", " (\"peptide\", \"@peptide\"),\n", " (\"score\", \"@score\"),\n", " (\"predictor\", \"@predictor\"),\n", " ])\n", "\n", " seqlen = pred.data.pos.max()+l\n", " plot.set(x_range=Range1d(start=0, end=seqlen+1))#, bounds=(0, seqlen+1)))\n", " plot.xaxis.major_label_text_font_size = \"8pt\"\n", " plot.xaxis.major_label_text_font_style = \"bold\"\n", " plot.ygrid.grid_line_color = None\n", " plot.yaxis.major_label_text_font_size = '0pt'\n", " plot.xaxis.major_label_orientation = np.pi/4 \n", " return plot\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plot = plotTracks([P,P2],alleles=3)\n", "show(plot)" ] }, { "cell_type": "code", "execution_count": 96, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "<Bokeh Notebook handle for In[96]>
<Bokeh Notebook handle for In[148]>
<Bokeh Notebook handle for In[108]>
<Bokeh Notebook handle for In[145]>