{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "intrahost_final.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "bGmGeK37JL9F" }, "source": [ "# Intrahost analysis in SARS-CoV-2 re-sequencing samples\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/galaxyproject/SARS-CoV-2/blob/master/data/ipynb/intrahost.ipynb)" ] }, { "cell_type": "markdown", "metadata": { "id": "VWWDijVW51sq" }, "source": [ "## Run these first\n", "\n", "Three sections below are ingesting dependencies, initializating functions etc... So run it first!" ] }, { "cell_type": "markdown", "metadata": { "id": "mWrJm5X0H5Ez" }, "source": [ "### Requirements" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ENWKo5zIIDjm", "outputId": "de1f75fa-f956-48b6-c42b-a75613f1790c" }, "source": [ "!pip install -U pandasql" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Collecting pandasql\n", " Downloading https://files.pythonhosted.org/packages/6b/c4/ee4096ffa2eeeca0c749b26f0371bd26aa5c8b611c43de99a4f86d3de0a7/pandasql-0.7.3.tar.gz\n", "Requirement already satisfied, skipping upgrade: numpy in /usr/local/lib/python3.7/dist-packages (from pandasql) (1.19.5)\n", "Requirement already satisfied, skipping upgrade: pandas in /usr/local/lib/python3.7/dist-packages (from pandasql) (1.1.5)\n", "Requirement already satisfied, skipping upgrade: sqlalchemy in /usr/local/lib/python3.7/dist-packages (from pandasql) (1.3.23)\n", "Requirement already satisfied, skipping upgrade: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->pandasql) (2.8.1)\n", "Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->pandasql) (2018.9)\n", "Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->pandasql) (1.15.0)\n", "Building wheels for collected packages: pandasql\n", " Building wheel for pandasql (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for pandasql: filename=pandasql-0.7.3-cp37-none-any.whl size=26820 sha256=767c3136b6fc6e57fdd0a5b5742b9ba823198a9bc280b04e232eca84412a62ef\n", " Stored in directory: /root/.cache/pip/wheels/53/6c/18/b87a2e5fa8a82e9c026311de56210b8d1c01846e18a9607fc9\n", "Successfully built pandasql\n", "Installing collected packages: pandasql\n", "Successfully installed pandasql-0.7.3\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "pPkdwxd1H_Xj" }, "source": [ "import pandas as pd\n", "import numpy as np\n", "from pandasql import sqldf\n", "pysqldf = lambda q: sqldf(q, globals())" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "K3HjNcTmJB5N" }, "source": [ "### Aux datasets" ] }, { "cell_type": "code", "metadata": { "id": "4GeP3k6MIOk6" }, "source": [ "gnm_url = 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/858/895/GCF_009858895.2_ASM985889v3/GCF_009858895.2_ASM985889v3_genomic.fna.gz'\n", "gnm_file = gnm_url.split('/')[-1]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "sDQGUEU-Id1a", "outputId": "ca567f20-720f-4522-c47d-563f3987fb28" }, "source": [ "# Get SARS-CoV-2 RefSeq genomes (in GenBank format) from NCBI\n", "import os.path\n", "from os import path\n", "if not path.exists(gnm_file[:-3]):\n", " !wget -nc {gnm_url}\n", " !gunzip {gnm_file}\n", "else:\n", " print('File {} is already here\\nDoing nothing!'.format(gnm_file))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "--2021-03-24 15:51:40-- https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/858/895/GCF_009858895.2_ASM985889v3/GCF_009858895.2_ASM985889v3_genomic.fna.gz\n", "Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 130.14.250.10, 130.14.250.11, 2607:f220:41e:250::10, ...\n", "Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|130.14.250.10|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 9591 (9.4K) [application/x-gzip]\n", "Saving to: ‘GCF_009858895.2_ASM985889v3_genomic.fna.gz’\n", "\n", "\r GCF_00985 0%[ ] 0 --.-KB/s \rGCF_009858895.2_ASM 100%[===================>] 9.37K --.-KB/s in 0s \n", "\n", "2021-03-24 15:51:40 (104 MB/s) - ‘GCF_009858895.2_ASM985889v3_genomic.fna.gz’ saved [9591/9591]\n", "\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "DGUP48yVIiMB" }, "source": [ "seq = \"\"\n", "with open(gnm_file[:-3],'r') as f:\n", " for line in f:\n", " if not line.startswith('>'):\n", " seq += line.rstrip()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "6TG1XhHDTz8G" }, "source": [ "annot = {\n", " 'start': [ 265, 805, 2719, 8554, 10054, 10972, 11842, 12091, 12685, 13024, 13441, 16236, 18039, 19620, 20658, 13441, 21562, 25392, 26244, 26522, 27201, 27393, 27755, 27893, 28273, 29557],\n", " 'end': [ 805, 2719, 8554, 10054, 10972, 11842, 12091, 12685, 13024, 13441, 16236, 18039, 19620, 20658, 21552, 13480, 25384, 26220, 26472, 27191, 27387, 27759, 27887, 28259, 29533, 29674 ], \n", " 'func': ['leader', 'nsp2', 'nsp3', 'nsp4', '3Cpro', 'nsp6', 'nsp7', 'nsp8', 'nsp9', 'nsp10', 'RdRp', 'helicase', 'ExoN', 'endoR', 'MethTr', 'nsp11', 'S', 'orf3a', 'E', 'M', 'orf6', 'orf7a', 'orf7b', 'orf8', 'N', 'orf10'], \n", " }" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "dmgYyVrtd925" }, "source": [ "gene_track = pd.DataFrame.from_dict(annot)[['start','end','func']].sort_values(by=['start']).reset_index()\n", "gene_track['top']= gene_track.index % 2\n", "gene_track['bottom'] = gene_track['top']-1\n", "gene_track.loc[gene_track['top'] == 0, 'color'] = 'red'\n", "gene_track.loc[gene_track['top'] != 0, 'color'] = 'blue'" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "UT_10JCZWwM6" }, "source": [ "voc_url = 'https://github.com/galaxyproject/SARS-CoV-2/raw/master/data/voc/voc.tsv.gz'" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "AIHd7KV_b8oF" }, "source": [ "# Sites under selection URL\n", "sel_url = 'https://github.com/galaxyproject/SARS-CoV-2/raw/master/data/selection/selection.tsv.gz'\n", "sel = pd.read_csv(sel_url,sep='\\t')" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "M76zbHqhI96a" }, "source": [ "### Functions" ] }, { "cell_type": "code", "metadata": { "id": "FQXgtsgyIkOJ" }, "source": [ "# Validation function for checking against genome\n", "\n", "def check_against_genome(seq,df,fields,name,debug=False):\n", " \n", " \"\"\" Takes sequence (seq), dataframe (df),\n", " names of columns containing position and reference allele\n", " (e.g., ['POS','REF']), and dataframe name.\n", " Setting debud to True outputs a list of problematic sites.\n", "\n", " Returns a dict with counts and list of wrong sites if any.\n", " \"\"\"\n", "\n", " df.name = df\n", " wrong = []\n", " bad = 0\n", " good = 0\n", " pb = df[fields].to_numpy()\n", " for pos,ref in pb:\n", " base = seq[pos:pos+len(ref)]\n", " if base == ref:\n", " good += 1\n", " else:\n", " bad += 1\n", " wrong.append([pos,ref])\n", " if debug is True:\n", " return(wrong)\n", " else:\n", " return({\n", " 'name':name,\n", " 'good':good,\n", " 'bad':bad,\n", " 'num_sites':len(df)\n", " })" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "trIQgKB6I020" }, "source": [ "class coordinateError(Exception): \n", " def __init__(self, data): \n", " self.data = data\n", " def __str__(self):\n", " return repr(self.data)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "XPindc6qjSWm" }, "source": [ "# Computing poisson expectations\n", "import math\n", "\n", "def ps(l,s,N):\n", " el = math.exp(-l)\n", " lkf = 1\n", " i = 0\n", " sp = {'samples':[],'N':[]}\n", " while (el*lkf*N)>0.1:\n", " sp['samples'].append(i)\n", " sp['N'].append((el*lkf*N))\n", " i += 1\n", " lkf *= l/i\n", " return(sp) " ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "r9VK55w8qiyi" }, "source": [ "# Histogram generation\n", "\n", "def create_hist(df,col,bins=100):\n", " hist, edges = np.histogram(df[col],bins)\n", " hist_df = pd.DataFrame({col: hist, \"left\": edges[:-1], \"right\": edges[1:]})\n", " hist_df[\"interval\"] = [\"%d to %d\" % (left, right) for left, right in zip(hist_df[\"left\"], hist_df[\"right\"])]\n", " return(hist_df)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "dR0rglPQt8Og" }, "source": [ "# Simple counts\n", "def site_stats(df):\n", " tot_var = len(df)\n", " tot_sites = len(df.groupby(['POS','ALT','REF']).groups)\n", " tot_samples = df['Sample'].nunique()\n", " return(tot_var,tot_sites,tot_samples)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "GEMDYoZo3cNQ" }, "source": [ "# Plotting AF distribution for a partticular site\n", "\n", "import scipy.stats as stats\n", "from bokeh.models import Rect\n", "\n", "# non-parametric pdf\n", "def site_example(site):\n", " af = var[var['POS']==site]['AF']\n", " nparam_density = stats.kde.gaussian_kde(af.values.ravel())\n", " x = np.linspace(0, 1, 100)\n", " pdf = nparam_density(x)\n", " plot = figure(\n", " plot_height = 200, plot_width = 800,\n", " title = 'Site {} ({} Samples)'.format(site,len(af)),\n", " x_axis_label = 'AF',\n", " y_axis_label = \"Density\",\n", " x_range=Range1d(start=0, end=1,bounds=(0, 1)),\n", " y_range=Range1d(start=0, end=5,bounds=(0, 30)),\n", " toolbar_location=\"below\"\n", " ) \n", "\n", " ticks = ColumnDataSource({'tick':af.values.ravel()})\n", " plot.line(\n", " x=x, \n", " y=pdf,\n", " line_color='orange',\n", " line_width=5\n", " )\n", " glyph = Rect(\n", " x='tick',\n", " y=0,\n", " width=0.0001, \n", " height=1,\n", " line_color='red',\n", " line_alpha=.5,\n", " fill_alpha =.5)\n", " plot.add_glyph(ticks,glyph)\n", " show(plot)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "B_QE0TR31KmK" }, "source": [ "" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "pGk58UsXJYKj" }, "source": [ "## Ingesting and validating\n", "\n", "- Translate `FUNCLASS` names\n", "- Convert coordinates to 0-based (because VCFs are 1-based)\n", "- Validate all sites by comparing contains of `REF` field to genome" ] }, { "cell_type": "code", "metadata": { "id": "zdF6zxa4KObD" }, "source": [ "# URL containing output of Galaxy workflow\n", "# These are stored here -> https://github.com/galaxyproject/SARS-CoV-2/tree/master/data\n", "per_sample_url = 'https://github.com/galaxyproject/SARS-CoV-2/raw/master/data/var/cog_20201120_by_sample.tsv.gz'\n", "per_variant_url = 'https://github.com/galaxyproject/SARS-CoV-2/raw/master/data/var/cog_20201120_by_var.tsv.gz'\n", "# Name of dataset that will be used in plots and reports\n", "dataset = 'COG-Post'" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "HveB8NVdH3QR" }, "source": [ "# Read data into Pandas dataframe\n", "var = pd.read_csv(per_sample_url,sep='\\t')" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "MErz2NAAK030" }, "source": [ "# Humanize FUNCLASS names\n", "funclass_translation = {'SILENT':'Synonymous','MISSENSE':'Non-synonymous','NONSENSE':'Stop','.':'Non-coding','NONE':'Indel'}\n", "var = var.replace({'FUNCLASS':funclass_translation})" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "ymMn9HkKLBK6" }, "source": [ "# Change coordinates to 0-based\n", "var['POS'] = var['POS']-1" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "_WrL5E50LNbW" }, "source": [ "# Shorten names of some columns\n", "var = var.rename(columns={'countunique(change)':'unique_changes', # Number of all samples\n", " 'min(AF)':'mAF', # Minimum AF\n", " 'max(AF)':'xAF', # Max AF\n", " 'countunique(FUNCLASS)':'unique_funclass', # Accession numbers of samples with AF above threshold\n", " })" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MnitsWA0Q3uU", "outputId": "b963e897-b8e1-4344-9bef-2d324b991b2f" }, "source": [ "# Check against Genome\n", "# This step matches contect of REF field\n", "# Against genome\n", "# 'bad' should be 0\n", "\n", "outcome = check_against_genome(seq,var,['POS','REF'],'var')\n", "if outcome['bad'] > 0:\n", " raise coordinateError(\"{} sites were not verified. Run check_agianst_genome with debig=True\".format(outcome['bad']))\n", "print(outcome)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "{'name': 'var', 'good': 38919, 'bad': 0, 'num_sites': 38919}\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:13: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n", " del sys.path[0]\n" ], "name": "stderr" } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "EVGxy0g48ylr", "outputId": "0edb84f3-14d8-4ef6-e883-ecc834a6fe5f" }, "source": [ "site_stats(var)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(38919, 5760, 1818)" ] }, "metadata": { "tags": [] }, "execution_count": 22 } ] }, { "cell_type": "markdown", "metadata": { "id": "o9GRyUMOZizX" }, "source": [ "## Thresholding\n", "\n", "Here we assess how common variants are (how many samples contain each variant) and compute threshold of how many samples should share a variants for us to consider it in future analysis" ] }, { "cell_type": "markdown", "metadata": { "id": "ssk4SJLKZyJU" }, "source": [ "For thresholding we only select sites with allele frequencing between 5% and 50% because these are more likely to be erroneous than sites with allele frequencies between 50% and 100%." ] }, { "cell_type": "code", "metadata": { "id": "_cCTEgXEOW_3" }, "source": [ "# For each genome position count how many unique samples contain a variant at that position woth freq < 0.5\n", "# This is very slow approach (make it faster in the future)\n", "\n", "cpb = []\n", "for i in range(len(seq)):\n", " cpb.append(len( np.unique( var[ ( var['POS']==i ) & ( var['AF']<=0.5 ) ]['Sample'].values ) ) )" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Ms_ssnxoVnye" }, "source": [ "# Poisson lambda\n", "l = np.sum(cpb)/len(seq)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "NUmhcg693lyt", "outputId": "7e7d1309-bb19-42f6-a462-575984dae33d" }, "source": [ "print(l)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "0.1502190415677357\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "biYeGiuATXgy" }, "source": [ "# Aggregate sample counts\n", "# How many sites are found in 1, 2, 3, 4 and so on samples\n", "unique, counts = np.unique(cpb, return_counts=True)\n", "bySiteCount = {'samples':unique,'N':counts}" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "VDD8Rnw4qpvj" }, "source": [ "# Compute Poisson estimate (see \"Functions' section below)\n", "estimate = ps(l,bySiteCount.values(),len(seq))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 317 }, "id": "Fa-pqRq8q5nx", "outputId": "aead2fe3-f03f-4a80-973a-ae1950b84285" }, "source": [ "import bokeh.io\n", "import bokeh.plotting\n", "from bokeh.models import ColumnDataSource,Range1d\n", "from bokeh.plotting import figure, show,output_file,save\n", "bokeh.io.output_notebook()\n", "\n", "exp = ColumnDataSource(estimate)\n", "obs = ColumnDataSource(bySiteCount)\n", "p = figure(plot_width=600, \n", " plot_height=300,\n", " y_axis_type='log',\n", " x_axis_type='linear',\n", "\n", " y_axis_label='# Individual variants',\n", " x_axis_label='# Samples sharing a variant',\n", " x_range=Range1d(start=0, end=20,bounds=(0, 100)),\n", " y_range=Range1d(start=0, end=100000,bounds=(0, 100000)),\n", " \n", " )\n", "p.line(y='N',x='samples',source=obs,line_color='red')\n", "p.circle(y='N',x='samples',source=obs,line_color='red')\n", "p.line(x='samples',y='N',source=exp,line_color='orange')\n", "output_file(\"thresholding_{}.html\".format(dataset))\n", "show(p)" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "application/javascript": [ "\n", "(function(root) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = true;\n", "\n", " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", " root._bokeh_onload_callbacks = [];\n", " root._bokeh_is_loading = undefined;\n", " }\n", "\n", " var JS_MIME_TYPE = 'application/javascript';\n", " var HTML_MIME_TYPE = 'text/html';\n", " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", " var CLASS_NAME = 'output_bokeh rendered_html';\n", "\n", " /**\n", " * Render data to the DOM node\n", " */\n", " function render(props, node) {\n", " var script = document.createElement(\"script\");\n", " node.appendChild(script);\n", " }\n", "\n", " /**\n", " * Handle when an output is cleared or removed\n", " */\n", " function handleClearOutput(event, handle) {\n", " var cell = handle.cell;\n", "\n", " var id = cell.output_area._bokeh_element_id;\n", " var server_id = cell.output_area._bokeh_server_id;\n", " // Clean up Bokeh references\n", " if (id != null && id in Bokeh.index) {\n", " Bokeh.index[id].model.document.clear();\n", " delete Bokeh.index[id];\n", " }\n", "\n", " if (server_id !== undefined) {\n", " // Clean up Bokeh references\n", " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", " cell.notebook.kernel.execute(cmd, {\n", " iopub: {\n", " output: function(msg) {\n", " var id = msg.content.text.trim();\n", " if (id in Bokeh.index) {\n", " Bokeh.index[id].model.document.clear();\n", " delete Bokeh.index[id];\n", " }\n", " }\n", " }\n", " });\n", " // Destroy server and session\n", " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", " cell.notebook.kernel.execute(cmd);\n", " }\n", " }\n", "\n", " /**\n", " * Handle when a new output is added\n", " */\n", " function handleAddOutput(event, handle) {\n", " var output_area = handle.output_area;\n", " var output = handle.output;\n", "\n", " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", " return\n", " }\n", "\n", " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", "\n", " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", " // store reference to embed id on output_area\n", " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", " }\n", " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", " var bk_div = document.createElement(\"div\");\n", " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", " var script_attrs = bk_div.children[0].attributes;\n", " for (var i = 0; i < script_attrs.length; i++) {\n", " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", " toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n", " }\n", " // store reference to server id on output_area\n", " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", " }\n", " }\n", "\n", " function register_renderer(events, OutputArea) {\n", "\n", " function append_mime(data, metadata, element) {\n", " // create a DOM node to render to\n", " var toinsert = this.create_output_subarea(\n", " metadata,\n", " CLASS_NAME,\n", " EXEC_MIME_TYPE\n", " );\n", " this.keyboard_manager.register_events(toinsert);\n", " // Render to node\n", " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", " render(props, toinsert[toinsert.length - 1]);\n", " element.append(toinsert);\n", " return toinsert\n", " }\n", "\n", " /* Handle when an output is cleared or removed */\n", " events.on('clear_output.CodeCell', handleClearOutput);\n", " events.on('delete.Cell', handleClearOutput);\n", "\n", " /* Handle when a new output is added */\n", " events.on('output_added.OutputArea', handleAddOutput);\n", "\n", " /**\n", " * Register the mime type and append_mime function with output_area\n", " */\n", " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", " /* Is output safe? */\n", " safe: true,\n", " /* Index of renderer in `output_area.display_order` */\n", " index: 0\n", " });\n", " }\n", "\n", " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", " if (root.Jupyter !== undefined) {\n", " var events = require('base/js/events');\n", " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", "\n", " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", " register_renderer(events, OutputArea);\n", " }\n", " }\n", "\n", " \n", " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", " root._bokeh_timeout = Date.now() + 5000;\n", " root._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"
\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"
\\n\"+\n", " \"\\n\"+\n",
" \"from bokeh.resources import INLINE\\n\"+\n",
" \"output_notebook(resources=INLINE)\\n\"+\n",
" \"
\\n\"+\n",
" \"\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"
\\n\"+\n \"\n", " | REF | \n", "ALT | \n", "Samplenunique | \n", "
---|---|---|---|
0 | \n", "A | \n", "C | \n", "79 | \n", "
1 | \n", "A | \n", "G | \n", "2965 | \n", "
2 | \n", "A | \n", "T | \n", "361 | \n", "
3 | \n", "C | \n", "A | \n", "307 | \n", "
4 | \n", "C | \n", "G | \n", "1281 | \n", "
5 | \n", "C | \n", "T | \n", "18521 | \n", "
6 | \n", "G | \n", "A | \n", "1318 | \n", "
7 | \n", "G | \n", "C | \n", "1900 | \n", "
8 | \n", "G | \n", "T | \n", "4867 | \n", "
9 | \n", "T | \n", "A | \n", "147 | \n", "
10 | \n", "T | \n", "C | \n", "2318 | \n", "
11 | \n", "T | \n", "G | \n", "75 | \n", "
\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"
\\n\"+\n", " \"\\n\"+\n",
" \"from bokeh.resources import INLINE\\n\"+\n",
" \"output_notebook(resources=INLINE)\\n\"+\n",
" \"
\\n\"+\n",
" \"\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"
\\n\"+\n \"\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"
\\n\"+\n", " \"\\n\"+\n",
" \"from bokeh.resources import INLINE\\n\"+\n",
" \"output_notebook(resources=INLINE)\\n\"+\n",
" \"
\\n\"+\n",
" \"\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"
\\n\"+\n \"\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"
\\n\"+\n", " \"\\n\"+\n",
" \"from bokeh.resources import INLINE\\n\"+\n",
" \"output_notebook(resources=INLINE)\\n\"+\n",
" \"
\\n\"+\n",
" \"\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"
\\n\"+\n \"\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"
\\n\"+\n", " \"\\n\"+\n",
" \"from bokeh.resources import INLINE\\n\"+\n",
" \"output_notebook(resources=INLINE)\\n\"+\n",
" \"
\\n\"+\n",
" \"\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"
\\n\"+\n \"