{ "cells": [ { "cell_type": "markdown", "id": "04fa53a2", "metadata": {}, "source": [ "# Synthetic expression data from asynchronous random walks on star network\n", "\n", "In this series of notebooks, we demonstrate how scBoolSeq can be employed to generate synthetic scRNA-Seq datasets from Boolean states of trajectories of mechanistic Boolean models.\n", "\n", "This notebook focuses on a toy model where a transcription factor progressively activates its target genes." ] }, { "cell_type": "code", "execution_count": 1, "id": "19cbcd2e", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "This notebook has been executed using the docker image `bnediction/scboolseq:v0`" ], "text/plain": [ "<IPython.core.display.Markdown object>" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "<script type=\"text/javascript\" id=\"colomoto-setup-5747392\" class=\"to-be-removed\">\n", " if (typeof Jupyter != 'undefined') {\n", " \n", "function detect_import(cell, module) {\n", " var code = cell.get_text();\n", " code = code.replace(/\\\\\\n/g, \"\");\n", " var lines = code.split(\"\\n\");\n", " var r_simple = new RegExp(\"^(\"+module+\")$\");\n", " var r_alias = new RegExp(\"^\"+module+\"\\\\s+as\\\\s+(\\\\w+)$\");\n", " for (var i = 0; i < lines.length; ++i) {\n", " if (/^import\\s/.test(lines[i])) {\n", " code = lines[i].substr(7);\n", " var parts = code.split(\",\")\n", " for (var j = 0; j < parts.length; ++j) {\n", " code = parts[j].trim();\n", " var m = code.match(r_simple);\n", " if (!m) {\n", " m = code.match(r_alias);\n", " }\n", " if (m) {\n", " return m[1];\n", " }\n", " }\n", " }\n", " }\n", " return module;\n", "}\n", "\n", "function colomoto_replace_call(cell, orig, dest, args, comment=false) {\n", " var call_regexp = new RegExp(\"\\\\.\"+orig.replace(\".\",\"\\\\.\")+\"\\\\(\");\n", " var call_replacer = new RegExp(\"\\\\.\"+orig.replace(\".\",\"\\\\.\")\n", " + \"\\\\(\\\\s*([^\\\\)]*)?\\\\)\");\n", " var code = cell.get_text();\n", " var lines = code.split(\"\\n\");\n", " if (args) {\n", " var strargs = \", \"+args.join(\", \");\n", " } else {\n", " var strargs = \"\";\n", " }\n", " for (var i = 0; i < lines.length; ++i) {\n", " if (call_regexp.test(lines[i])) {\n", " var code = \"\"\n", " if (comment) {\n", " code += \"#\"+lines[i]+\"\\n\";\n", " }\n", " code += lines[i].replace(call_replacer, \".\"+dest+\"($1\"+strargs+\")\")\n", " lines[i] = code;\n", " }\n", " }\n", " cell.set_text(code)\n", "}\n", "\n", "function colomoto_upload(Jupyter, ssid, input, py_callback_name, orig, dest) {\n", "\n", " function callback(out_data) {\n", " var cell_element = $(\"#\"+ssid).parents('.cell');\n", " var cell_idx = Jupyter.notebook.get_cell_elements().index(cell_element);\n", " var cell = Jupyter.notebook.get_cell(cell_idx);\n", "\n", " var filename = out_data.content.text;\n", "\n", " var code = cell.get_text();\n", " code = code.replace(new RegExp(\"\\\\b\" + orig.replace('.', '\\\\.')\n", " + \"\\\\(\\\\s*((\\\\w+)=[^\\\\)]*)?\\\\)\"),\n", " dest+\"(\\\"\"+filename+\"\\\",$1)\");\n", " code = code.replace('\",)', '\")')\n", " cell.set_text(code);\n", "\n", " Jupyter.notebook.select(cell_idx);\n", " Jupyter.notebook.execute_cell_and_select_below();\n", " }\n", "\n", " if (! (window.File && window.FileReader && window.FileList && window.Blob)) {\n", " alert(\"Interactive file upload is not supported by your browser.\");\n", " return;\n", " }\n", "\n", " input.disabled = true;\n", " input.style.cursor = \"wait\";\n", " input.parentElement.style.cursor = \"wait\";\n", "\n", " var f = input.files[0];\n", " var reader = new FileReader();\n", " reader.onload = (function(f) {\n", " return function (e) {\n", " var obj = {\n", " content: e.target.result,\n", " name: f.name\n", " };\n", "\n", " //var pycb = py_callback_name+\"(\"+JSON.stringify(obj)+\")\"\n", " // hack/workaround:\n", " // it seems that Jupyter does not like very long lines\n", " // so we split the data in chunks\n", " var chunk_length = 100;\n", " var pycb = \"__colomoto_upload_name = \" + JSON.stringify(obj.name) + \"\\n\";\n", " pycb += \"__colomoto_upload_content = \\\\\\n\";\n", " for (var i = 0; i < obj.content.length; i += chunk_length) {\n", " pycb += \"\\\"\" + obj.content.substr(i, chunk_length)+\"\\\"\\\\\\n\"\n", " }\n", " pycb += \"\\n\"\n", " pycb += py_callback_name+\"({'name':__colomoto_upload_name, 'content': __colomoto_upload_content})\\n\";\n", " pycb += \"del __colomoto_upload_name, __colomoto_upload_content\"\n", "\n", " IPython.notebook.kernel.execute(pycb, {iopub: {output: callback}});\n", " };\n", " })(f);\n", " reader.readAsDataURL(f);\n", "}\n", "\n", "function resolve_function(tool_api, funcname) {\n", " if (tool_api.hasOwnProperty(funcname)) {\n", " return tool_api[funcname];\n", " } else {\n", " return window[funcname];\n", " }\n", "}\n", "\n", "function colomoto_extension(Jupyter, ssid, name, menu, toolbar, tool_api) {\n", "\n", " function insert_snippet_code(snippet) {\n", " var cell = Jupyter.notebook.get_selected_cell();\n", " Jupyter.notebook.edit_mode();\n", " cell.code_mirror.replaceSelection(snippet, 'around');\n", " //cell.focus_editor();\n", " }\n", "\n", " /**\n", " from https://github.com/moble/jupyter_boilerplate/blob/master/main.js\n", " */\n", " function callback_insert_snippet (evt) {\n", " // this (or event.currentTarget, see below) always refers to the DOM\n", " // element the listener was attached to - see\n", " // http://stackoverflow.com/questions/12077859\n", " insert_snippet_code($(evt.currentTarget).data('snippet-code'));\n", " }\n", " function build_menu_element (menu_item_spec, direction) {\n", " // Create the menu item html element\n", " var element = $('<li/>');\n", "\n", " if (typeof menu_item_spec == 'string') {\n", " if (menu_item_spec != '---') {\n", " return element.html(menu_item_spec)\n", " .addClass('ui-state-disabled')\n", " .attr({\"style\": \"padding:2px .4em\"})\n", " ;\n", " }\n", " return element.addClass('divider');\n", " }\n", "\n", " var a = $('<a/>')\n", " .attr('href', '#')\n", " .html(menu_item_spec.name)\n", " .appendTo(element);\n", " if (menu_item_spec.hasOwnProperty('snippet')) {\n", " var snippet = menu_item_spec.snippet;\n", " if (typeof snippet == 'string' || snippet instanceof String) {\n", " snippet = [snippet];\n", " }\n", " a.attr({\n", " 'title' : \"\", // Do not remove this, even though it's empty!\n", " 'data-snippet-code' : snippet.join('\\n'),\n", " })\n", " .on('click', callback_insert_snippet)\n", " .addClass('snippet');\n", " }\n", " else if (menu_item_spec.hasOwnProperty('internal-link')) {\n", " a.attr('href', menu_item_spec['internal-link']);\n", " }\n", " else if (menu_item_spec.hasOwnProperty('external-link')) {\n", " a.empty();\n", " a.attr('href', menu_item_spec['external-link']);\n", " a.attr({\n", " 'target' : '_blank',\n", " 'title' : 'Opens in a new window',\n", " });\n", " $('<i class=\"fa fa-external-link menu-icon pull-right\"/>').appendTo(a);\n", " $('<span/>').html(menu_item_spec.name).appendTo(a);\n", " }\n", "\n", " if (menu_item_spec.hasOwnProperty('sub-menu')) {\n", " element\n", " .addClass('dropdown-submenu')\n", " .toggleClass('dropdown-submenu-left', direction === 'left');\n", " var sub_element = $('<ul class=\"dropdown-menu\"/>')\n", " .toggleClass('dropdown-menu-compact', menu_item_spec.overlay === true) // For space-saving menus\n", " .appendTo(element);\n", "\n", " var new_direction = (menu_item_spec['sub-menu-direction'] === 'left') ? 'left' : 'right';\n", " for (var j=0; j<menu_item_spec['sub-menu'].length; ++j) {\n", " var sub_menu_item_spec = build_menu_element(menu_item_spec['sub-menu'][j], new_direction);\n", " if(sub_menu_item_spec !== null) {\n", " sub_menu_item_spec.appendTo(sub_element);\n", " }\n", " }\n", " }\n", "\n", " return element;\n", " }\n", "\n", " function menu_setup (menu_item_specs, sibling, insert_before_sibling) {\n", " for (var i=0; i<menu_item_specs.length; ++i) {\n", " var menu_item_spec;\n", " if (insert_before_sibling) {\n", " menu_item_spec = menu_item_specs[i];\n", " } else {\n", " menu_item_spec = menu_item_specs[menu_item_specs.length-1-i];\n", " }\n", " var direction = (menu_item_spec['menu-direction'] == 'left') ? 'left' : 'right';\n", " var menu_element = build_menu_element(menu_item_spec, direction);\n", " // We need special properties if this item is in the navbar\n", " if ($(sibling).parent().is('ul.nav.navbar-nav')) {\n", " menu_element\n", " .addClass('dropdown')\n", " .removeClass('dropdown-submenu dropdown-submenu-left');\n", " menu_element.children('a')\n", " .addClass('dropdown-toggle')\n", " .attr({\n", " 'id': name+'_menu',\n", " 'data-toggle' : 'dropdown',\n", " 'aria-expanded' : 'false'\n", " });\n", " }\n", "\n", " // Insert the menu element into DOM\n", " menu_element[insert_before_sibling ? 'insertBefore': 'insertAfter'](sibling);\n", " }\n", " }\n", " /** end from */\n", "\n", "\n", " function self_cleanup() {\n", " var cell_element = $(\"script[class='to-be-removed']\").parents('.cell');\n", " var cell_idx = Jupyter.notebook.get_cell_elements().index(cell_element);\n", " var cell = Jupyter.notebook.get_cell(cell_idx);\n", " var to_remove = -1;\n", " for (var i = 0; i < cell.output_area.outputs.length; ++i) {\n", " var oa = cell.output_area.outputs[i];\n", " if (oa.output_type == \"display_data\"\n", " && typeof oa.data[\"text/html\"] != 'undefined'\n", " && oa.data[\"text/html\"].indexOf(' class=\"to-be-removed\"') >= 0) {\n", " to_remove = i;\n", " break;\n", " }\n", " }\n", " if (to_remove == -1) {\n", " console.log(\"cannot find toberemoved\");\n", " } else {\n", " cell.output_area.outputs.splice(to_remove, 1);\n", " }\n", " }\n", "\n", " function toolbar_setup(actions) {\n", " var buttons = [];\n", " for (var i = 0; i < actions.length; ++i) {\n", " var setup = actions[i].setup;\n", " if (typeof setup.handler == 'string') {\n", " setup.handler = resolve_function(tool_api, setup.handler);\n", " }\n", " buttons.push(Jupyter.actions.register(actions[i].setup,\n", " actions[i].name, name));\n", " }\n", " $(\"#\"+name+\"-toolbar\").remove();\n", " Jupyter.toolbar.add_buttons_group(buttons, name+\"-toolbar\");\n", " }\n", "\n", " function replace_menu_snippets(menu_spec, orig, dest) {\n", " if (menu_spec.hasOwnProperty(\"snippet\")) {\n", " var snippet = menu_spec.snippet;\n", " if (typeof snippet == \"string\" || snippet instanceof String) {\n", " menu_spec[\"snippet\"] = snippet.replace(orig, dest);\n", " } else {\n", " for (var i = 0; i < snippet.length; ++i) {\n", " menu_spec[\"snippet\"][i] = snippet[i].replace(orig, dest);\n", " }\n", " }\n", " }\n", " if (menu_spec.hasOwnProperty(\"sub-menu\")) {\n", " for (var i = 0; i < menu_spec[\"sub-menu\"].length; ++i) {\n", " replace_menu_snippets(menu_spec[\"sub-menu\"][i], orig, dest);\n", " }\n", " }\n", " }\n", "\n", " function load_ipython_extension() {\n", "\n", " var mycellelt = $(\"#\"+ssid).parents('.cell');\n", " var myidx = Jupyter.notebook.get_cell_elements().index(mycellelt);\n", " var import_cell = Jupyter.notebook.get_cell(myidx);\n", "\n", " var alias = detect_import(import_cell, name);\n", " tool_api.module_alias = alias;\n", " if (alias && alias != name) {\n", " var orig = new RegExp(\"\\\\b\"+name+\"\\\\b\", \"g\");\n", " replace_menu_snippets(menu, orig, alias);\n", " }\n", "\n", " if (toolbar) {\n", " toolbar_setup(toolbar);\n", " }\n", "\n", " $(\"#\"+name+\"_menu\").parent().remove();\n", " if (menu) {\n", " menu_setup([menu], $(\"#help_menu\").parent(), true);\n", " }\n", "\n", " if (tool_api.hasOwnProperty(\"post_install_callback\")) {\n", " tool_api.post_install_callback();\n", " }\n", "\n", " setTimeout(self_cleanup, 5000);\n", " };\n", "\n", " load_ipython_extension();\n", "}\n", "\n", "function resolve_toolbar_handlers(tool_api, toolbar_spec) {\n", " for (var i = 0; i < toolbar_spec.length; ++i) {\n", " func = resolve_function(tool_api, toolbar_spec[i][\"setup\"][\"handler\"]);\n", " toolbar_spec[i][\"setup\"][\"handler\"] = func;\n", " }\n", " return toolbar_spec\n", "}\n", "\n", "\n", " var minibn_jsapi = { };\n", " colomoto_extension(Jupyter, \"colomoto-setup-5747392\", \"minibn\", null, null, minibn_jsapi);\n", " }</script>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import random\n", "from colomoto.minibn import * # for Boolean network manipulation\n", "from scboolseq import scBoolSeq\n", "\n", "# set seed for reproducible results\n", "_rng_seed = 19834650\n", "# use a Generator instead of numpy's singleton\n", "_rng = np.random.default_rng(_rng_seed)\n", "random.seed(_rng_seed)" ] }, { "cell_type": "markdown", "id": "a4b051c2", "metadata": {}, "source": [ "## Load Boolean network model" ] }, { "cell_type": "code", "execution_count": 2, "id": "4a9e3bae", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "gene1 <- tf\n", "gene10 <- tf\n", "gene2 <- tf\n", "gene3 <- tf\n", "gene4 <- tf\n", "gene5 <- tf\n", "gene6 <- tf\n", "gene7 <- tf\n", "gene8 <- tf\n", "gene9 <- tf\n", "tf <- 1" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bn = BooleanNetwork.load(\"models/star.bnet\")\n", "bn" ] }, { "cell_type": "code", "execution_count": 3, "id": "09653812", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# computing graph layout...\n" ] }, { "data": { "image/svg+xml": [ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", "<!-- Generated by graphviz version 3.0.0 (20220315.2325)\n", " -->\n", "<!-- Pages: 1 -->\n", "<svg width=\"962pt\" height=\"131pt\"\n", " viewBox=\"0.00 0.00 961.84 131.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 127)\">\n", "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-127 957.84,-127 957.84,4 -4,4\"/>\n", "<!-- tf -->\n", "<g id=\"node1\" class=\"node\">\n", "<title>tf</title>\n", "<ellipse fill=\"none\" stroke=\"black\" cx=\"471\" cy=\"-105\" rx=\"27\" ry=\"18\"/>\n", "<text text-anchor=\"middle\" x=\"471\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\">tf</text>\n", "</g>\n", "<!-- gene1 -->\n", "<g id=\"node2\" class=\"node\">\n", "<title>gene1</title>\n", "<ellipse fill=\"none\" stroke=\"black\" cx=\"39\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n", "<text text-anchor=\"middle\" x=\"39\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene1</text>\n", "</g>\n", "<!-- tf->gene1 -->\n", "<g id=\"edge3\" class=\"edge\">\n", "<title>tf->gene1</title>\n", "<path fill=\"none\" stroke=\"black\" d=\"M444.5,-101.22C381.56,-94.2 218.49,-73.59 87,-36 84.03,-35.15 80.98,-34.22 77.93,-33.24\"/>\n", "<polygon fill=\"black\" stroke=\"black\" points=\"78.85,-29.86 68.26,-30.01 76.63,-36.5 78.85,-29.86\"/>\n", "<text text-anchor=\"middle\" x=\"229.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n", "</g>\n", "<!-- gene2 -->\n", "<g id=\"node3\" class=\"node\">\n", "<title>gene2</title>\n", "<ellipse fill=\"none\" stroke=\"black\" cx=\"135\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n", "<text text-anchor=\"middle\" x=\"135\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene2</text>\n", "</g>\n", "<!-- tf->gene2 -->\n", "<g id=\"edge6\" class=\"edge\">\n", "<title>tf->gene2</title>\n", "<path fill=\"none\" stroke=\"black\" d=\"M445.31,-98.95C394.67,-88.78 278.55,-64.35 183,-36 180.04,-35.12 177,-34.17 173.95,-33.18\"/>\n", "<polygon fill=\"black\" stroke=\"black\" points=\"174.88,-29.8 164.28,-29.92 172.64,-36.43 174.88,-29.8\"/>\n", "<text text-anchor=\"middle\" x=\"307.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n", "</g>\n", "<!-- gene3 -->\n", "<g id=\"node4\" class=\"node\">\n", "<title>gene3</title>\n", "<ellipse fill=\"none\" stroke=\"black\" cx=\"231\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n", "<text text-anchor=\"middle\" x=\"231\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene3</text>\n", "</g>\n", "<!-- tf->gene3 -->\n", "<g id=\"edge5\" class=\"edge\">\n", "<title>tf->gene3</title>\n", "<path fill=\"none\" stroke=\"black\" d=\"M447.55,-95.7C406.41,-81.13 320.84,-50.82 270.46,-32.98\"/>\n", "<polygon fill=\"black\" stroke=\"black\" points=\"271.47,-29.62 260.87,-29.58 269.13,-36.22 271.47,-29.62\"/>\n", "<text text-anchor=\"middle\" x=\"372.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n", "</g>\n", "<!-- gene4 -->\n", "<g id=\"node5\" class=\"node\">\n", "<title>gene4</title>\n", "<ellipse fill=\"none\" stroke=\"black\" cx=\"327\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n", "<text text-anchor=\"middle\" x=\"327\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene4</text>\n", "</g>\n", "<!-- tf->gene4 -->\n", "<g id=\"edge1\" class=\"edge\">\n", "<title>tf->gene4</title>\n", "<path fill=\"none\" stroke=\"black\" d=\"M451.28,-92.36C427.51,-78.33 387.24,-54.56 359,-37.89\"/>\n", "<polygon fill=\"black\" stroke=\"black\" points=\"360.68,-34.82 350.29,-32.75 357.12,-40.85 360.68,-34.82\"/>\n", "<text text-anchor=\"middle\" x=\"414.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n", "</g>\n", "<!-- gene5 -->\n", "<g id=\"node6\" class=\"node\">\n", "<title>gene5</title>\n", "<ellipse fill=\"none\" stroke=\"black\" cx=\"423\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n", "<text text-anchor=\"middle\" x=\"423\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene5</text>\n", "</g>\n", "<!-- tf->gene5 -->\n", "<g id=\"edge8\" class=\"edge\">\n", "<title>tf->gene5</title>\n", "<path fill=\"none\" stroke=\"black\" d=\"M461.97,-88.01C455.02,-75.7 445.28,-58.46 437.27,-44.28\"/>\n", "<polygon fill=\"black\" stroke=\"black\" points=\"440.28,-42.48 432.31,-35.5 434.18,-45.92 440.28,-42.48\"/>\n", "<text text-anchor=\"middle\" x=\"456.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n", "</g>\n", "<!-- gene6 -->\n", "<g id=\"node7\" class=\"node\">\n", "<title>gene6</title>\n", "<ellipse fill=\"none\" stroke=\"black\" cx=\"519\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n", "<text text-anchor=\"middle\" x=\"519\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene6</text>\n", "</g>\n", "<!-- tf->gene6 -->\n", "<g id=\"edge7\" class=\"edge\">\n", "<title>tf->gene6</title>\n", "<path fill=\"none\" stroke=\"black\" d=\"M480.02,-88.01C486.98,-75.7 496.72,-58.46 504.72,-44.28\"/>\n", "<polygon fill=\"black\" stroke=\"black\" points=\"507.81,-45.92 509.68,-35.5 501.72,-42.48 507.81,-45.92\"/>\n", "<text text-anchor=\"middle\" x=\"504.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n", "</g>\n", "<!-- gene7 -->\n", "<g id=\"node8\" class=\"node\">\n", "<title>gene7</title>\n", "<ellipse fill=\"none\" stroke=\"black\" cx=\"615\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n", "<text text-anchor=\"middle\" x=\"615\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene7</text>\n", "</g>\n", "<!-- tf->gene7 -->\n", "<g id=\"edge10\" class=\"edge\">\n", "<title>tf->gene7</title>\n", "<path fill=\"none\" stroke=\"black\" d=\"M490.72,-92.36C514.49,-78.33 554.75,-54.56 582.99,-37.89\"/>\n", "<polygon fill=\"black\" stroke=\"black\" points=\"584.87,-40.85 591.7,-32.75 581.31,-34.82 584.87,-40.85\"/>\n", "<text text-anchor=\"middle\" x=\"558.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n", "</g>\n", "<!-- gene8 -->\n", "<g id=\"node9\" class=\"node\">\n", "<title>gene8</title>\n", "<ellipse fill=\"none\" stroke=\"black\" cx=\"711\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n", "<text text-anchor=\"middle\" x=\"711\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene8</text>\n", "</g>\n", "<!-- tf->gene8 -->\n", "<g id=\"edge9\" class=\"edge\">\n", "<title>tf->gene8</title>\n", "<path fill=\"none\" stroke=\"black\" d=\"M494.45,-95.7C535.58,-81.13 621.15,-50.82 671.54,-32.98\"/>\n", "<polygon fill=\"black\" stroke=\"black\" points=\"672.86,-36.22 681.12,-29.58 670.52,-29.62 672.86,-36.22\"/>\n", "<text text-anchor=\"middle\" x=\"612.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n", "</g>\n", "<!-- gene9 -->\n", "<g id=\"node10\" class=\"node\">\n", "<title>gene9</title>\n", "<ellipse fill=\"none\" stroke=\"black\" cx=\"807\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n", "<text text-anchor=\"middle\" x=\"807\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene9</text>\n", "</g>\n", "<!-- tf->gene9 -->\n", "<g id=\"edge2\" class=\"edge\">\n", "<title>tf->gene9</title>\n", "<path fill=\"none\" stroke=\"black\" d=\"M496.68,-98.97C547.33,-88.82 663.47,-64.42 759,-36 761.95,-35.12 764.99,-34.17 768.04,-33.17\"/>\n", "<polygon fill=\"black\" stroke=\"black\" points=\"769.35,-36.42 777.71,-29.91 767.11,-29.79 769.35,-36.42\"/>\n", "<text text-anchor=\"middle\" x=\"695.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n", "</g>\n", "<!-- gene10 -->\n", "<g id=\"node11\" class=\"node\">\n", "<title>gene10</title>\n", "<ellipse fill=\"none\" stroke=\"black\" cx=\"909\" cy=\"-18\" rx=\"44.69\" ry=\"18\"/>\n", "<text text-anchor=\"middle\" x=\"909\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene10</text>\n", "</g>\n", "<!-- tf->gene10 -->\n", "<g id=\"edge4\" class=\"edge\">\n", "<title>tf->gene10</title>\n", "<path fill=\"none\" stroke=\"black\" d=\"M497.41,-100.92C560.18,-93.25 722.88,-71.35 855,-36 858.53,-35.06 862.17,-34.03 865.82,-32.96\"/>\n", "<polygon fill=\"black\" stroke=\"black\" points=\"866.86,-36.3 875.43,-30.07 864.84,-29.6 866.86,-36.3\"/>\n", "<text text-anchor=\"middle\" x=\"778.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n", "</g>\n", "</g>\n", "</svg>\n" ], "text/plain": [ "<networkx.classes.multidigraph.MultiDiGraph at 0x7f2e843b44c0>" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bn.influence_graph()" ] }, { "cell_type": "markdown", "id": "77abe8f3", "metadata": {}, "source": [ "## Simulation with random walk\n", "\n", "With the asynchronous update mode, the activation of the genes can be made in any order. Here, we randomly sample one trajectory of this model, which essentially boils down to selecting a random ordering of genes that get activated.\n", "\n", "Let us first specify the initial state of the network:" ] }, { "cell_type": "code", "execution_count": 4, "id": "ee4a6403", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'tf': 1,\n", " 'gene1': 0,\n", " 'gene2': 0,\n", " 'gene3': 0,\n", " 'gene4': 0,\n", " 'gene5': 0,\n", " 'gene6': 0,\n", " 'gene7': 0,\n", " 'gene8': 0,\n", " 'gene9': 0,\n", " 'gene10': 0}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "initial_state = bn.zero()\n", "initial_state[\"tf\"] = 1\n", "initial_state" ] }, { "cell_type": "markdown", "id": "60cedafe", "metadata": {}, "source": [ "Then, we use `minibn` to generate a random walk in the asynchronous dynamics of the Boolean network from the given initial state:" ] }, { "cell_type": "code", "execution_count": 5, "id": "05646298", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>tf</th>\n", " <th>gene1</th>\n", " <th>gene2</th>\n", " <th>gene3</th>\n", " <th>gene4</th>\n", " <th>gene5</th>\n", " <th>gene6</th>\n", " <th>gene7</th>\n", " <th>gene8</th>\n", " <th>gene9</th>\n", " <th>gene10</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " tf gene1 gene2 gene3 gene4 gene5 gene6 gene7 gene8 gene9 gene10\n", "0 1 0 0 0 0 0 0 0 0 0 0\n", "1 1 0 0 0 0 0 0 0 1 0 0\n", "2 1 1 0 0 0 0 0 0 1 0 0\n", "3 1 1 0 1 0 0 0 0 1 0 0\n", "4 1 1 0 1 0 0 1 0 1 0 0\n", "5 1 1 0 1 0 0 1 0 1 1 0\n", "6 1 1 0 1 0 0 1 0 1 1 1\n", "7 1 1 0 1 1 0 1 0 1 1 1\n", "8 1 1 1 1 1 0 1 0 1 1 1\n", "9 1 1 1 1 1 0 1 1 1 1 1\n", "10 1 1 1 1 1 1 1 1 1 1 1" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dynamics = FullyAsynchronousDynamics(bn)\n", "random_walk_df = pd.DataFrame(dynamics.random_walk(initial_state, steps=10))\n", "random_walk_df" ] }, { "cell_type": "markdown", "id": "a835af87", "metadata": {}, "source": [ "## Retrieve statistics of real expression datasets\n", "\n", "In order to generate synthetic RNA counts, scBoolSeq relies on statistical criteria learnt from real scRNA-Seq datasets. Then, the nodes of the Boolean model used to generate the Boolean states need to be associated with real gene names: scBoolSeq will then generate RNA counts from the corresponding distribution, biased by the Boolean state of the gene.\n", "\n", "In this example, we re-use the simulation criteria learnt from the Nestorowa dataset in the [1 - Binarization and synthetic data generation](1%20-%20Binarization%20and%20synthetic%20data%20generation.ipynb) notebook:" ] }, { "cell_type": "code", "execution_count": 6, "id": "7e1f185f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Dip</th>\n", " <th>BI</th>\n", " <th>Kurtosis</th>\n", " <th>DropOutRate</th>\n", " <th>MeanNZ</th>\n", " <th>DenPeak</th>\n", " <th>Amplitude</th>\n", " <th>gaussian_prob1</th>\n", " <th>gaussian_prob2</th>\n", " <th>gaussian_mean1</th>\n", " <th>gaussian_mean2</th>\n", " <th>gaussian_variance</th>\n", " <th>mean</th>\n", " <th>variance</th>\n", " <th>unimodal_margin_quantile</th>\n", " <th>unimodal_low_quantile</th>\n", " <th>unimodal_high_quantile</th>\n", " <th>IQR</th>\n", " <th>q50</th>\n", " <th>bim_thresh_down</th>\n", " <th>bim_thresh_up</th>\n", " <th>Category</th>\n", " <th>dor_threshold</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>Clec1b</th>\n", " <td>9.948487e-01</td>\n", " <td>1.635698</td>\n", " <td>6.166711</td>\n", " <td>0.876208</td>\n", " <td>1.520978</td>\n", " <td>-0.007249</td>\n", " <td>8.852181</td>\n", " <td>0.986140</td>\n", " <td>0.013860</td>\n", " <td>0.111291</td>\n", " <td>5.666490</td>\n", " <td>0.157649</td>\n", " <td>1.520978</td>\n", " <td>2.666760</td>\n", " <td>0.25</td>\n", " <td>0.667271</td>\n", " <td>1.555290</td>\n", " <td>0.888020</td>\n", " <td>0.968776</td>\n", " <td>2.785740</td>\n", " <td>3.094168</td>\n", " <td>Unimodal</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Kdm3a</th>\n", " <td>0.000000e+00</td>\n", " <td>2.407548</td>\n", " <td>-0.784019</td>\n", " <td>0.326087</td>\n", " <td>3.847940</td>\n", " <td>0.209239</td>\n", " <td>10.126676</td>\n", " <td>0.714520</td>\n", " <td>0.285480</td>\n", " <td>0.872643</td>\n", " <td>6.899449</td>\n", " <td>1.278247</td>\n", " <td>2.593177</td>\n", " <td>8.692586</td>\n", " <td>0.25</td>\n", " <td>0.000000</td>\n", " <td>5.258984</td>\n", " <td>5.258984</td>\n", " <td>1.268040</td>\n", " <td>3.432251</td>\n", " <td>4.748643</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " <tr>\n", " <th>Coro2b</th>\n", " <td>4.684039e-03</td>\n", " <td>2.320060</td>\n", " <td>0.327060</td>\n", " <td>0.658213</td>\n", " <td>2.383819</td>\n", " <td>0.004597</td>\n", " <td>9.475577</td>\n", " <td>0.919508</td>\n", " <td>0.080492</td>\n", " <td>0.335546</td>\n", " <td>6.289079</td>\n", " <td>0.487372</td>\n", " <td>2.383819</td>\n", " <td>5.370521</td>\n", " <td>0.25</td>\n", " <td>0.827740</td>\n", " <td>2.912944</td>\n", " <td>2.085205</td>\n", " <td>1.290666</td>\n", " <td>3.183596</td>\n", " <td>3.879537</td>\n", " <td>Bimodal</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>8430408G22Rik</th>\n", " <td>7.236739e-08</td>\n", " <td>3.121069</td>\n", " <td>-0.993979</td>\n", " <td>0.884058</td>\n", " <td>2.983472</td>\n", " <td>0.005663</td>\n", " <td>9.067857</td>\n", " <td>0.964962</td>\n", " <td>0.035038</td>\n", " <td>0.098898</td>\n", " <td>7.148808</td>\n", " <td>0.172506</td>\n", " <td>2.983472</td>\n", " <td>8.154647</td>\n", " <td>0.25</td>\n", " <td>0.825298</td>\n", " <td>6.465074</td>\n", " <td>5.639776</td>\n", " <td>1.449779</td>\n", " <td>3.612061</td>\n", " <td>4.175572</td>\n", " <td>Bimodal</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Clec9a</th>\n", " <td>1.000000e+00</td>\n", " <td>2.081717</td>\n", " <td>140.089285</td>\n", " <td>0.965580</td>\n", " <td>2.280293</td>\n", " <td>-0.009361</td>\n", " <td>9.614233</td>\n", " <td>0.993961</td>\n", " <td>0.006039</td>\n", " <td>0.035599</td>\n", " <td>7.138099</td>\n", " <td>0.069870</td>\n", " <td>0.078488</td>\n", " <td>0.372878</td>\n", " <td>0.25</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " <td>3.113410</td>\n", " <td>4.607253</td>\n", " <td>Discarded</td>\n", " <td>0.95</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Dip BI Kurtosis DropOutRate MeanNZ \\\n", "Clec1b 9.948487e-01 1.635698 6.166711 0.876208 1.520978 \n", "Kdm3a 0.000000e+00 2.407548 -0.784019 0.326087 3.847940 \n", "Coro2b 4.684039e-03 2.320060 0.327060 0.658213 2.383819 \n", "8430408G22Rik 7.236739e-08 3.121069 -0.993979 0.884058 2.983472 \n", "Clec9a 1.000000e+00 2.081717 140.089285 0.965580 2.280293 \n", "\n", " DenPeak Amplitude gaussian_prob1 gaussian_prob2 \\\n", "Clec1b -0.007249 8.852181 0.986140 0.013860 \n", "Kdm3a 0.209239 10.126676 0.714520 0.285480 \n", "Coro2b 0.004597 9.475577 0.919508 0.080492 \n", "8430408G22Rik 0.005663 9.067857 0.964962 0.035038 \n", "Clec9a -0.009361 9.614233 0.993961 0.006039 \n", "\n", " gaussian_mean1 gaussian_mean2 gaussian_variance mean \\\n", "Clec1b 0.111291 5.666490 0.157649 1.520978 \n", "Kdm3a 0.872643 6.899449 1.278247 2.593177 \n", "Coro2b 0.335546 6.289079 0.487372 2.383819 \n", "8430408G22Rik 0.098898 7.148808 0.172506 2.983472 \n", "Clec9a 0.035599 7.138099 0.069870 0.078488 \n", "\n", " variance unimodal_margin_quantile unimodal_low_quantile \\\n", "Clec1b 2.666760 0.25 0.667271 \n", "Kdm3a 8.692586 0.25 0.000000 \n", "Coro2b 5.370521 0.25 0.827740 \n", "8430408G22Rik 8.154647 0.25 0.825298 \n", "Clec9a 0.372878 0.25 0.000000 \n", "\n", " unimodal_high_quantile IQR q50 bim_thresh_down \\\n", "Clec1b 1.555290 0.888020 0.968776 2.785740 \n", "Kdm3a 5.258984 5.258984 1.268040 3.432251 \n", "Coro2b 2.912944 2.085205 1.290666 3.183596 \n", "8430408G22Rik 6.465074 5.639776 1.449779 3.612061 \n", "Clec9a 0.000000 0.000000 0.000000 3.113410 \n", "\n", " bim_thresh_up Category dor_threshold \n", "Clec1b 3.094168 Unimodal NaN \n", "Kdm3a 4.748643 Bimodal 0.95 \n", "Coro2b 3.879537 Bimodal NaN \n", "8430408G22Rik 4.175572 Bimodal NaN \n", "Clec9a 4.607253 Discarded 0.95 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "criteria = pd.read_csv(\"cache_scBoolSeq_Nestorowa_simulation_criteria.csv\", index_col=0)\n", "criteria.head()" ] }, { "cell_type": "code", "execution_count": 7, "id": "c0d064c8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Bimodal 2987\n", "Unimodal 1580\n", "Discarded 201\n", "Name: Category, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "criteria.Category.value_counts()" ] }, { "cell_type": "markdown", "id": "8027f897", "metadata": {}, "source": [ "We randomly select bimodal genes for each of the nodes of the Boolean model to obtain simulation criteria:" ] }, { "cell_type": "code", "execution_count": 8, "id": "9e9b9663", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Dip</th>\n", " <th>BI</th>\n", " <th>Kurtosis</th>\n", " <th>DropOutRate</th>\n", " <th>MeanNZ</th>\n", " <th>DenPeak</th>\n", " <th>Amplitude</th>\n", " <th>gaussian_prob1</th>\n", " <th>gaussian_prob2</th>\n", " <th>gaussian_mean1</th>\n", " <th>gaussian_mean2</th>\n", " <th>gaussian_variance</th>\n", " <th>mean</th>\n", " <th>variance</th>\n", " <th>unimodal_margin_quantile</th>\n", " <th>unimodal_low_quantile</th>\n", " <th>unimodal_high_quantile</th>\n", " <th>IQR</th>\n", " <th>q50</th>\n", " <th>bim_thresh_down</th>\n", " <th>bim_thresh_up</th>\n", " <th>Category</th>\n", " <th>dor_threshold</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>tf</th>\n", " <td>0.000250</td>\n", " <td>1.982430</td>\n", " <td>-1.118390</td>\n", " <td>0.027778</td>\n", " <td>4.454848</td>\n", " <td>2.231509</td>\n", " <td>10.458465</td>\n", " <td>0.634321</td>\n", " <td>0.365679</td>\n", " <td>2.449688</td>\n", " <td>7.594680</td>\n", " <td>1.562365</td>\n", " <td>4.331102</td>\n", " <td>7.707162</td>\n", " <td>0.25</td>\n", " <td>2.070391</td>\n", " <td>6.920502</td>\n", " <td>4.850112</td>\n", " <td>3.321480</td>\n", " <td>4.277351</td>\n", " <td>6.081385</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " <tr>\n", " <th>gene1</th>\n", " <td>0.000000</td>\n", " <td>2.171544</td>\n", " <td>-1.148314</td>\n", " <td>0.021135</td>\n", " <td>6.223047</td>\n", " <td>8.243204</td>\n", " <td>11.193184</td>\n", " <td>0.363906</td>\n", " <td>0.636094</td>\n", " <td>2.579439</td>\n", " <td>8.100760</td>\n", " <td>1.496439</td>\n", " <td>6.091521</td>\n", " <td>8.558219</td>\n", " <td>0.25</td>\n", " <td>3.183970</td>\n", " <td>8.483865</td>\n", " <td>5.299895</td>\n", " <td>7.282884</td>\n", " <td>4.384796</td>\n", " <td>5.990926</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " <tr>\n", " <th>gene2</th>\n", " <td>0.000000</td>\n", " <td>1.957530</td>\n", " <td>-1.273249</td>\n", " <td>0.044082</td>\n", " <td>5.641087</td>\n", " <td>8.029818</td>\n", " <td>10.794851</td>\n", " <td>0.409065</td>\n", " <td>0.590935</td>\n", " <td>2.258246</td>\n", " <td>7.561991</td>\n", " <td>1.774518</td>\n", " <td>5.392416</td>\n", " <td>8.579517</td>\n", " <td>0.25</td>\n", " <td>2.585510</td>\n", " <td>7.994901</td>\n", " <td>5.409391</td>\n", " <td>6.129123</td>\n", " <td>3.802762</td>\n", " <td>5.780924</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " <tr>\n", " <th>gene3</th>\n", " <td>0.000000</td>\n", " <td>2.123630</td>\n", " <td>-1.315039</td>\n", " <td>0.036836</td>\n", " <td>5.724870</td>\n", " <td>8.076334</td>\n", " <td>10.811316</td>\n", " <td>0.400338</td>\n", " <td>0.599662</td>\n", " <td>2.237126</td>\n", " <td>7.701644</td>\n", " <td>1.589570</td>\n", " <td>5.513990</td>\n", " <td>8.763508</td>\n", " <td>0.25</td>\n", " <td>2.536712</td>\n", " <td>8.106769</td>\n", " <td>5.570057</td>\n", " <td>6.452676</td>\n", " <td>3.963904</td>\n", " <td>5.713590</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " <tr>\n", " <th>gene4</th>\n", " <td>0.000000</td>\n", " <td>2.161170</td>\n", " <td>-1.344687</td>\n", " <td>0.018116</td>\n", " <td>6.559400</td>\n", " <td>9.543728</td>\n", " <td>12.260558</td>\n", " <td>0.403231</td>\n", " <td>0.596769</td>\n", " <td>2.734372</td>\n", " <td>8.944811</td>\n", " <td>1.987133</td>\n", " <td>6.440570</td>\n", " <td>11.275156</td>\n", " <td>0.25</td>\n", " <td>3.109483</td>\n", " <td>9.439844</td>\n", " <td>6.330361</td>\n", " <td>7.471017</td>\n", " <td>4.771986</td>\n", " <td>6.660780</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " <tr>\n", " <th>gene5</th>\n", " <td>0.000000</td>\n", " <td>2.167416</td>\n", " <td>-1.386505</td>\n", " <td>0.038647</td>\n", " <td>5.661639</td>\n", " <td>8.154579</td>\n", " <td>10.498523</td>\n", " <td>0.436231</td>\n", " <td>0.563769</td>\n", " <td>2.322133</td>\n", " <td>7.857554</td>\n", " <td>1.604112</td>\n", " <td>5.442832</td>\n", " <td>9.145257</td>\n", " <td>0.25</td>\n", " <td>2.480269</td>\n", " <td>8.141719</td>\n", " <td>5.661451</td>\n", " <td>6.272476</td>\n", " <td>4.156733</td>\n", " <td>5.870370</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " <tr>\n", " <th>gene6</th>\n", " <td>0.000000</td>\n", " <td>2.355317</td>\n", " <td>-1.505761</td>\n", " <td>0.034420</td>\n", " <td>5.745117</td>\n", " <td>8.722360</td>\n", " <td>11.401288</td>\n", " <td>0.433551</td>\n", " <td>0.566449</td>\n", " <td>2.015794</td>\n", " <td>8.250379</td>\n", " <td>1.720748</td>\n", " <td>5.547368</td>\n", " <td>11.273438</td>\n", " <td>0.25</td>\n", " <td>2.068712</td>\n", " <td>8.581835</td>\n", " <td>6.513123</td>\n", " <td>6.533317</td>\n", " <td>4.226884</td>\n", " <td>5.879037</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " <tr>\n", " <th>gene7</th>\n", " <td>0.000000</td>\n", " <td>2.307954</td>\n", " <td>-0.861903</td>\n", " <td>0.005435</td>\n", " <td>6.800339</td>\n", " <td>8.655035</td>\n", " <td>10.931065</td>\n", " <td>0.303185</td>\n", " <td>0.696815</td>\n", " <td>2.875043</td>\n", " <td>8.455204</td>\n", " <td>1.234993</td>\n", " <td>6.763381</td>\n", " <td>7.818094</td>\n", " <td>0.25</td>\n", " <td>4.108445</td>\n", " <td>8.919568</td>\n", " <td>4.811123</td>\n", " <td>7.855844</td>\n", " <td>4.817005</td>\n", " <td>6.135923</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " <tr>\n", " <th>gene8</th>\n", " <td>0.000000</td>\n", " <td>2.296122</td>\n", " <td>-1.483351</td>\n", " <td>0.043478</td>\n", " <td>5.821394</td>\n", " <td>8.961177</td>\n", " <td>11.437198</td>\n", " <td>0.489454</td>\n", " <td>0.510546</td>\n", " <td>2.422383</td>\n", " <td>8.584236</td>\n", " <td>1.799614</td>\n", " <td>5.568290</td>\n", " <td>11.294318</td>\n", " <td>0.25</td>\n", " <td>2.438392</td>\n", " <td>8.763361</td>\n", " <td>6.324968</td>\n", " <td>5.836308</td>\n", " <td>4.631891</td>\n", " <td>6.354470</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " <tr>\n", " <th>gene9</th>\n", " <td>0.001038</td>\n", " <td>1.593895</td>\n", " <td>0.246388</td>\n", " <td>0.035628</td>\n", " <td>4.116075</td>\n", " <td>3.382096</td>\n", " <td>10.321775</td>\n", " <td>0.813953</td>\n", " <td>0.186047</td>\n", " <td>3.118572</td>\n", " <td>7.691919</td>\n", " <td>1.246723</td>\n", " <td>3.969428</td>\n", " <td>4.416692</td>\n", " <td>0.25</td>\n", " <td>2.683685</td>\n", " <td>4.560455</td>\n", " <td>1.876770</td>\n", " <td>3.506317</td>\n", " <td>4.993969</td>\n", " <td>6.611602</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " <tr>\n", " <th>gene10</th>\n", " <td>0.000000</td>\n", " <td>2.252029</td>\n", " <td>-1.216598</td>\n", " <td>0.006039</td>\n", " <td>6.281443</td>\n", " <td>8.383876</td>\n", " <td>10.891227</td>\n", " <td>0.365452</td>\n", " <td>0.634548</td>\n", " <td>2.963747</td>\n", " <td>8.132413</td>\n", " <td>1.221529</td>\n", " <td>6.243511</td>\n", " <td>7.421162</td>\n", " <td>0.25</td>\n", " <td>3.464664</td>\n", " <td>8.508720</td>\n", " <td>5.044056</td>\n", " <td>7.298677</td>\n", " <td>4.705917</td>\n", " <td>6.140867</td>\n", " <td>Bimodal</td>\n", " <td>0.95</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Dip BI Kurtosis DropOutRate MeanNZ DenPeak \\\n", "tf 0.000250 1.982430 -1.118390 0.027778 4.454848 2.231509 \n", "gene1 0.000000 2.171544 -1.148314 0.021135 6.223047 8.243204 \n", "gene2 0.000000 1.957530 -1.273249 0.044082 5.641087 8.029818 \n", "gene3 0.000000 2.123630 -1.315039 0.036836 5.724870 8.076334 \n", "gene4 0.000000 2.161170 -1.344687 0.018116 6.559400 9.543728 \n", "gene5 0.000000 2.167416 -1.386505 0.038647 5.661639 8.154579 \n", "gene6 0.000000 2.355317 -1.505761 0.034420 5.745117 8.722360 \n", "gene7 0.000000 2.307954 -0.861903 0.005435 6.800339 8.655035 \n", "gene8 0.000000 2.296122 -1.483351 0.043478 5.821394 8.961177 \n", "gene9 0.001038 1.593895 0.246388 0.035628 4.116075 3.382096 \n", "gene10 0.000000 2.252029 -1.216598 0.006039 6.281443 8.383876 \n", "\n", " Amplitude gaussian_prob1 gaussian_prob2 gaussian_mean1 \\\n", "tf 10.458465 0.634321 0.365679 2.449688 \n", "gene1 11.193184 0.363906 0.636094 2.579439 \n", "gene2 10.794851 0.409065 0.590935 2.258246 \n", "gene3 10.811316 0.400338 0.599662 2.237126 \n", "gene4 12.260558 0.403231 0.596769 2.734372 \n", "gene5 10.498523 0.436231 0.563769 2.322133 \n", "gene6 11.401288 0.433551 0.566449 2.015794 \n", "gene7 10.931065 0.303185 0.696815 2.875043 \n", "gene8 11.437198 0.489454 0.510546 2.422383 \n", "gene9 10.321775 0.813953 0.186047 3.118572 \n", "gene10 10.891227 0.365452 0.634548 2.963747 \n", "\n", " gaussian_mean2 gaussian_variance mean variance \\\n", "tf 7.594680 1.562365 4.331102 7.707162 \n", "gene1 8.100760 1.496439 6.091521 8.558219 \n", "gene2 7.561991 1.774518 5.392416 8.579517 \n", "gene3 7.701644 1.589570 5.513990 8.763508 \n", "gene4 8.944811 1.987133 6.440570 11.275156 \n", "gene5 7.857554 1.604112 5.442832 9.145257 \n", "gene6 8.250379 1.720748 5.547368 11.273438 \n", "gene7 8.455204 1.234993 6.763381 7.818094 \n", "gene8 8.584236 1.799614 5.568290 11.294318 \n", "gene9 7.691919 1.246723 3.969428 4.416692 \n", "gene10 8.132413 1.221529 6.243511 7.421162 \n", "\n", " unimodal_margin_quantile unimodal_low_quantile \\\n", "tf 0.25 2.070391 \n", "gene1 0.25 3.183970 \n", "gene2 0.25 2.585510 \n", "gene3 0.25 2.536712 \n", "gene4 0.25 3.109483 \n", "gene5 0.25 2.480269 \n", "gene6 0.25 2.068712 \n", "gene7 0.25 4.108445 \n", "gene8 0.25 2.438392 \n", "gene9 0.25 2.683685 \n", "gene10 0.25 3.464664 \n", "\n", " unimodal_high_quantile IQR q50 bim_thresh_down \\\n", "tf 6.920502 4.850112 3.321480 4.277351 \n", "gene1 8.483865 5.299895 7.282884 4.384796 \n", "gene2 7.994901 5.409391 6.129123 3.802762 \n", "gene3 8.106769 5.570057 6.452676 3.963904 \n", "gene4 9.439844 6.330361 7.471017 4.771986 \n", "gene5 8.141719 5.661451 6.272476 4.156733 \n", "gene6 8.581835 6.513123 6.533317 4.226884 \n", "gene7 8.919568 4.811123 7.855844 4.817005 \n", "gene8 8.763361 6.324968 5.836308 4.631891 \n", "gene9 4.560455 1.876770 3.506317 4.993969 \n", "gene10 8.508720 5.044056 7.298677 4.705917 \n", "\n", " bim_thresh_up Category dor_threshold \n", "tf 6.081385 Bimodal 0.95 \n", "gene1 5.990926 Bimodal 0.95 \n", "gene2 5.780924 Bimodal 0.95 \n", "gene3 5.713590 Bimodal 0.95 \n", "gene4 6.660780 Bimodal 0.95 \n", "gene5 5.870370 Bimodal 0.95 \n", "gene6 5.879037 Bimodal 0.95 \n", "gene7 6.135923 Bimodal 0.95 \n", "gene8 6.354470 Bimodal 0.95 \n", "gene9 6.611602 Bimodal 0.95 \n", "gene10 6.140867 Bimodal 0.95 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "random_criteria = criteria[\n", " (criteria.Category == \"Bimodal\") &\n", " (criteria.DropOutRate < 0.05)\n", "].sample(11, random_state=_rng_seed)\n", "random_criteria.set_index(random_walk_df.columns, inplace=True)\n", "random_criteria" ] }, { "cell_type": "markdown", "id": "4ac21fef", "metadata": {}, "source": [ "## Generate synthetic RNA-Seq data\n", "\n", "We instantiate scBoolSeq with the simulation criteria having the name matching with the column of the generated Boolean matrix." ] }, { "cell_type": "code", "execution_count": 9, "id": "1239447f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "scBoolSeq(has_data=False, can_binarize=False, can_simulate=True)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scbool = scBoolSeq(simulation_criteria=random_criteria)\n", "scbool" ] }, { "cell_type": "markdown", "id": "18e159e5", "metadata": {}, "source": [ "Then, we generate 300 samples per Boolean states using the `.simulate` method:" ] }, { "cell_type": "code", "execution_count": 10, "id": "f627ea46", "metadata": {}, "outputs": [], "source": [ "n_samples = 300 # number of samples per row" ] }, { "cell_type": "code", "execution_count": 11, "id": "163cdddb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>tf</th>\n", " <th>gene1</th>\n", " <th>gene2</th>\n", " <th>gene3</th>\n", " <th>gene4</th>\n", " <th>gene5</th>\n", " <th>gene6</th>\n", " <th>gene7</th>\n", " <th>gene8</th>\n", " <th>gene9</th>\n", " <th>gene10</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>7.343997</td>\n", " <td>4.423748</td>\n", " <td>1.455764</td>\n", " <td>2.666266</td>\n", " <td>3.622463</td>\n", " <td>2.339183</td>\n", " <td>3.145557</td>\n", " <td>2.355520</td>\n", " <td>4.226817</td>\n", " <td>2.450700</td>\n", " <td>4.954449</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>6.503459</td>\n", " <td>4.869516</td>\n", " <td>0.997902</td>\n", " <td>2.490826</td>\n", " <td>3.831914</td>\n", " <td>2.904771</td>\n", " <td>1.355320</td>\n", " <td>2.121728</td>\n", " <td>9.014141</td>\n", " <td>4.112613</td>\n", " <td>2.857409</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>7.194568</td>\n", " <td>8.060726</td>\n", " <td>1.803331</td>\n", " <td>3.395802</td>\n", " <td>2.964298</td>\n", " <td>2.039039</td>\n", " <td>2.856274</td>\n", " <td>3.078402</td>\n", " <td>10.793265</td>\n", " <td>2.710345</td>\n", " <td>1.965232</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>5.265355</td>\n", " <td>7.646248</td>\n", " <td>1.637069</td>\n", " <td>6.712579</td>\n", " <td>3.382125</td>\n", " <td>2.334264</td>\n", " <td>0.000000</td>\n", " <td>1.165221</td>\n", " <td>9.365778</td>\n", " <td>2.665426</td>\n", " <td>6.474557</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>7.519443</td>\n", " <td>8.754864</td>\n", " <td>2.471689</td>\n", " <td>7.591701</td>\n", " <td>3.109703</td>\n", " <td>2.208982</td>\n", " <td>7.996878</td>\n", " <td>1.137108</td>\n", " <td>10.456507</td>\n", " <td>2.603011</td>\n", " <td>4.516670</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " tf gene1 gene2 gene3 gene4 gene5 gene6 \\\n", "0 7.343997 4.423748 1.455764 2.666266 3.622463 2.339183 3.145557 \n", "1 6.503459 4.869516 0.997902 2.490826 3.831914 2.904771 1.355320 \n", "2 7.194568 8.060726 1.803331 3.395802 2.964298 2.039039 2.856274 \n", "3 5.265355 7.646248 1.637069 6.712579 3.382125 2.334264 0.000000 \n", "4 7.519443 8.754864 2.471689 7.591701 3.109703 2.208982 7.996878 \n", "\n", " gene7 gene8 gene9 gene10 \n", "0 2.355520 4.226817 2.450700 4.954449 \n", "1 2.121728 9.014141 4.112613 2.857409 \n", "2 3.078402 10.793265 2.710345 1.965232 \n", "3 1.165221 9.365778 2.665426 6.474557 \n", "4 1.137108 10.456507 2.603011 4.516670 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "counts = scbool.simulate(random_walk_df, n_samples=n_samples)\n", "counts.head()" ] }, { "cell_type": "markdown", "id": "a95920eb", "metadata": {}, "source": [ "To ease post-analysis with STREAM, we generate unique identifiers for each simulated row (cell):" ] }, { "cell_type": "code", "execution_count": 12, "id": "9ace2312", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>tf</th>\n", " <th>gene1</th>\n", " <th>gene2</th>\n", " <th>gene3</th>\n", " <th>gene4</th>\n", " <th>gene5</th>\n", " <th>gene6</th>\n", " <th>gene7</th>\n", " <th>gene8</th>\n", " <th>gene9</th>\n", " <th>gene10</th>\n", " </tr>\n", " <tr>\n", " <th>cellID</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>step0_0</th>\n", " <td>7.343997</td>\n", " <td>4.423748</td>\n", " <td>1.455764</td>\n", " <td>2.666266</td>\n", " <td>3.622463</td>\n", " <td>2.339183</td>\n", " <td>3.145557</td>\n", " <td>2.355520</td>\n", " <td>4.226817</td>\n", " <td>2.450700</td>\n", " <td>4.954449</td>\n", " </tr>\n", " <tr>\n", " <th>step1_0</th>\n", " <td>6.503459</td>\n", " <td>4.869516</td>\n", " <td>0.997902</td>\n", " <td>2.490826</td>\n", " <td>3.831914</td>\n", " <td>2.904771</td>\n", " <td>1.355320</td>\n", " <td>2.121728</td>\n", " <td>9.014141</td>\n", " <td>4.112613</td>\n", " <td>2.857409</td>\n", " </tr>\n", " <tr>\n", " <th>step2_0</th>\n", " <td>7.194568</td>\n", " <td>8.060726</td>\n", " <td>1.803331</td>\n", " <td>3.395802</td>\n", " <td>2.964298</td>\n", " <td>2.039039</td>\n", " <td>2.856274</td>\n", " <td>3.078402</td>\n", " <td>10.793265</td>\n", " <td>2.710345</td>\n", " <td>1.965232</td>\n", " </tr>\n", " <tr>\n", " <th>step3_0</th>\n", " <td>5.265355</td>\n", " <td>7.646248</td>\n", " <td>1.637069</td>\n", " <td>6.712579</td>\n", " <td>3.382125</td>\n", " <td>2.334264</td>\n", " <td>0.000000</td>\n", " <td>1.165221</td>\n", " <td>9.365778</td>\n", " <td>2.665426</td>\n", " <td>6.474557</td>\n", " </tr>\n", " <tr>\n", " <th>step4_0</th>\n", " <td>7.519443</td>\n", " <td>8.754864</td>\n", " <td>2.471689</td>\n", " <td>7.591701</td>\n", " <td>3.109703</td>\n", " <td>2.208982</td>\n", " <td>7.996878</td>\n", " <td>1.137108</td>\n", " <td>10.456507</td>\n", " <td>2.603011</td>\n", " <td>4.516670</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>step6_299</th>\n", " <td>7.019498</td>\n", " <td>11.833026</td>\n", " <td>4.191677</td>\n", " <td>7.636880</td>\n", " <td>4.569541</td>\n", " <td>2.178149</td>\n", " <td>8.812244</td>\n", " <td>2.681519</td>\n", " <td>8.103503</td>\n", " <td>8.350228</td>\n", " <td>8.577708</td>\n", " </tr>\n", " <tr>\n", " <th>step7_299</th>\n", " <td>6.869362</td>\n", " <td>8.494998</td>\n", " <td>0.987528</td>\n", " <td>6.191569</td>\n", " <td>8.565168</td>\n", " <td>1.742142</td>\n", " <td>7.830821</td>\n", " <td>0.221380</td>\n", " <td>10.301637</td>\n", " <td>4.230131</td>\n", " <td>7.762939</td>\n", " </tr>\n", " <tr>\n", " <th>step8_299</th>\n", " <td>9.318526</td>\n", " <td>8.263307</td>\n", " <td>5.660365</td>\n", " <td>6.102051</td>\n", " <td>8.717069</td>\n", " <td>4.543780</td>\n", " <td>6.495120</td>\n", " <td>2.918222</td>\n", " <td>8.388695</td>\n", " <td>6.171941</td>\n", " <td>4.913572</td>\n", " </tr>\n", " <tr>\n", " <th>step9_299</th>\n", " <td>7.768477</td>\n", " <td>8.415240</td>\n", " <td>8.041082</td>\n", " <td>5.957655</td>\n", " <td>9.119277</td>\n", " <td>2.507091</td>\n", " <td>10.057915</td>\n", " <td>8.073952</td>\n", " <td>8.294528</td>\n", " <td>5.635296</td>\n", " <td>8.548555</td>\n", " </tr>\n", " <tr>\n", " <th>step10_299</th>\n", " <td>9.363183</td>\n", " <td>7.610790</td>\n", " <td>6.505688</td>\n", " <td>6.694060</td>\n", " <td>6.872659</td>\n", " <td>7.633121</td>\n", " <td>7.101126</td>\n", " <td>7.106677</td>\n", " <td>8.166303</td>\n", " <td>7.856577</td>\n", " <td>8.372333</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>3300 rows × 11 columns</p>\n", "</div>" ], "text/plain": [ " tf gene1 gene2 gene3 gene4 gene5 \\\n", "cellID \n", "step0_0 7.343997 4.423748 1.455764 2.666266 3.622463 2.339183 \n", "step1_0 6.503459 4.869516 0.997902 2.490826 3.831914 2.904771 \n", "step2_0 7.194568 8.060726 1.803331 3.395802 2.964298 2.039039 \n", "step3_0 5.265355 7.646248 1.637069 6.712579 3.382125 2.334264 \n", "step4_0 7.519443 8.754864 2.471689 7.591701 3.109703 2.208982 \n", "... ... ... ... ... ... ... \n", "step6_299 7.019498 11.833026 4.191677 7.636880 4.569541 2.178149 \n", "step7_299 6.869362 8.494998 0.987528 6.191569 8.565168 1.742142 \n", "step8_299 9.318526 8.263307 5.660365 6.102051 8.717069 4.543780 \n", "step9_299 7.768477 8.415240 8.041082 5.957655 9.119277 2.507091 \n", "step10_299 9.363183 7.610790 6.505688 6.694060 6.872659 7.633121 \n", "\n", " gene6 gene7 gene8 gene9 gene10 \n", "cellID \n", "step0_0 3.145557 2.355520 4.226817 2.450700 4.954449 \n", "step1_0 1.355320 2.121728 9.014141 4.112613 2.857409 \n", "step2_0 2.856274 3.078402 10.793265 2.710345 1.965232 \n", "step3_0 0.000000 1.165221 9.365778 2.665426 6.474557 \n", "step4_0 7.996878 1.137108 10.456507 2.603011 4.516670 \n", "... ... ... ... ... ... \n", "step6_299 8.812244 2.681519 8.103503 8.350228 8.577708 \n", "step7_299 7.830821 0.221380 10.301637 4.230131 7.762939 \n", "step8_299 6.495120 2.918222 8.388695 6.171941 4.913572 \n", "step9_299 10.057915 8.073952 8.294528 5.635296 8.548555 \n", "step10_299 7.101126 7.106677 8.166303 7.856577 8.372333 \n", "\n", "[3300 rows x 11 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ids = [f\"step{x}_{y}\" for y in range(n_samples) for x in random_walk_df.index]\n", "counts.index = ids\n", "counts.index.name = \"cellID\"\n", "counts" ] }, { "cell_type": "markdown", "id": "56f449c4", "metadata": {}, "source": [ "We write the result as a TSV file:" ] }, { "cell_type": "code", "execution_count": 13, "id": "edb10f46", "metadata": {}, "outputs": [], "source": [ "counts.T.to_csv(\"synthetic_data_star_counts.tsv\", sep=\"\\t\")" ] }, { "cell_type": "markdown", "id": "962ad85f", "metadata": {}, "source": [ "The, we generate metadata to validate the trajectory reconstruction with STREAM:" ] }, { "cell_type": "code", "execution_count": 14, "id": "5ec64c76", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: '#A2F37E',\n", " 1: '#36873F',\n", " 2: '#81D278',\n", " 3: '#8CA2D4',\n", " 4: '#D0327B',\n", " 5: '#CDEF47',\n", " 6: '#CB6896',\n", " 7: '#590605',\n", " 8: '#3C27AB',\n", " 9: '#4A7BBC',\n", " 10: '#F0F94B'}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nb_active_genes = [cfg.sum()-1 for _, cfg in random_walk_df.iterrows()]\n", "_RGB_values = list(\"0123456789ABCDEF\")\n", "color_map = {nb: \"#\"+''.join([_rng.choice(_RGB_values) for _ in range(6)]) for nb in set(nb_active_genes)}\n", "color_map" ] }, { "cell_type": "code", "execution_count": 15, "id": "e5827936", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>label</th>\n", " <th>label_color</th>\n", " </tr>\n", " <tr>\n", " <th>cellID</th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>step0_0</th>\n", " <td>0</td>\n", " <td>#A2F37E</td>\n", " </tr>\n", " <tr>\n", " <th>step1_0</th>\n", " <td>1</td>\n", " <td>#36873F</td>\n", " </tr>\n", " <tr>\n", " <th>step2_0</th>\n", " <td>2</td>\n", " <td>#81D278</td>\n", " </tr>\n", " <tr>\n", " <th>step3_0</th>\n", " <td>3</td>\n", " <td>#8CA2D4</td>\n", " </tr>\n", " <tr>\n", " <th>step4_0</th>\n", " <td>4</td>\n", " <td>#D0327B</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>step6_299</th>\n", " <td>6</td>\n", " <td>#CB6896</td>\n", " </tr>\n", " <tr>\n", " <th>step7_299</th>\n", " <td>7</td>\n", " <td>#590605</td>\n", " </tr>\n", " <tr>\n", " <th>step8_299</th>\n", " <td>8</td>\n", " <td>#3C27AB</td>\n", " </tr>\n", " <tr>\n", " <th>step9_299</th>\n", " <td>9</td>\n", " <td>#4A7BBC</td>\n", " </tr>\n", " <tr>\n", " <th>step10_299</th>\n", " <td>10</td>\n", " <td>#F0F94B</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>3300 rows × 2 columns</p>\n", "</div>" ], "text/plain": [ " label label_color\n", "cellID \n", "step0_0 0 #A2F37E\n", "step1_0 1 #36873F\n", "step2_0 2 #81D278\n", "step3_0 3 #8CA2D4\n", "step4_0 4 #D0327B\n", "... ... ...\n", "step6_299 6 #CB6896\n", "step7_299 7 #590605\n", "step8_299 8 #3C27AB\n", "step9_299 9 #4A7BBC\n", "step10_299 10 #F0F94B\n", "\n", "[3300 rows x 2 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metadata = [[nb, color_map[nb]] for nb in nb_active_genes]*n_samples\n", "metadata = pd.DataFrame(metadata, columns=[\"label\", \"label_color\"])\n", "metadata.index = counts.index\n", "metadata" ] }, { "cell_type": "code", "execution_count": 16, "id": "8887d91d", "metadata": {}, "outputs": [], "source": [ "metadata.to_csv(\"synthetic_data_star_metadata.tsv\", sep=\"\\t\")" ] }, { "cell_type": "markdown", "id": "76d0f0c6", "metadata": {}, "source": [ "STREAM analysis is performed in a separate notebook: [3.1 - STREAM - Trajectory reconstruction for star network synthetic scRNA data](3.1%20-%20STREAM%20-%20Trajectory%20reconstruction%20for%20star%20network%20synthetic%20scRNA%20data.ipynb). Note that its execution should be performed in the adequate software environment (e.g., STREAM Docker image)" ] }, { "cell_type": "code", "execution_count": null, "id": "f81a6be2", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }