{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "04fa53a2",
   "metadata": {},
   "source": [
    "# Synthetic expression data from asynchronous random walks on star network\n",
    "\n",
    "In this series of notebooks, we demonstrate how scBoolSeq can be employed to generate synthetic scRNA-Seq datasets from Boolean states of trajectories of mechanistic Boolean models.\n",
    "\n",
    "This notebook focuses on a toy model where a transcription factor progressively activates its target genes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "19cbcd2e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "This notebook has been executed using the docker image `bnediction/scboolseq:v0`"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<script type=\"text/javascript\" id=\"colomoto-setup-5747392\" class=\"to-be-removed\">\n",
       "        if (typeof Jupyter != 'undefined') {\n",
       "            \n",
       "function detect_import(cell, module) {\n",
       "    var code = cell.get_text();\n",
       "    code = code.replace(/\\\\\\n/g, \"\");\n",
       "    var lines = code.split(\"\\n\");\n",
       "    var r_simple = new RegExp(\"^(\"+module+\")$\");\n",
       "    var r_alias = new RegExp(\"^\"+module+\"\\\\s+as\\\\s+(\\\\w+)$\");\n",
       "    for (var i = 0; i < lines.length; ++i) {\n",
       "        if (/^import\\s/.test(lines[i])) {\n",
       "            code = lines[i].substr(7);\n",
       "            var parts = code.split(\",\")\n",
       "            for (var j = 0; j < parts.length; ++j) {\n",
       "                code = parts[j].trim();\n",
       "                var m = code.match(r_simple);\n",
       "                if (!m) {\n",
       "                    m = code.match(r_alias);\n",
       "                }\n",
       "                if (m) {\n",
       "                    return m[1];\n",
       "                }\n",
       "            }\n",
       "        }\n",
       "    }\n",
       "    return module;\n",
       "}\n",
       "\n",
       "function colomoto_replace_call(cell, orig, dest, args, comment=false) {\n",
       "    var call_regexp = new RegExp(\"\\\\.\"+orig.replace(\".\",\"\\\\.\")+\"\\\\(\");\n",
       "    var call_replacer = new RegExp(\"\\\\.\"+orig.replace(\".\",\"\\\\.\")\n",
       "                    + \"\\\\(\\\\s*([^\\\\)]*)?\\\\)\");\n",
       "    var code = cell.get_text();\n",
       "    var lines = code.split(\"\\n\");\n",
       "    if (args) {\n",
       "        var strargs = \", \"+args.join(\", \");\n",
       "    } else {\n",
       "        var strargs = \"\";\n",
       "    }\n",
       "    for (var i = 0; i < lines.length; ++i) {\n",
       "        if (call_regexp.test(lines[i])) {\n",
       "            var code = \"\"\n",
       "            if (comment) {\n",
       "                code += \"#\"+lines[i]+\"\\n\";\n",
       "            }\n",
       "            code += lines[i].replace(call_replacer, \".\"+dest+\"($1\"+strargs+\")\")\n",
       "            lines[i] = code;\n",
       "        }\n",
       "    }\n",
       "    cell.set_text(code)\n",
       "}\n",
       "\n",
       "function colomoto_upload(Jupyter, ssid, input, py_callback_name, orig, dest) {\n",
       "\n",
       "    function callback(out_data) {\n",
       "        var cell_element = $(\"#\"+ssid).parents('.cell');\n",
       "        var cell_idx = Jupyter.notebook.get_cell_elements().index(cell_element);\n",
       "        var cell = Jupyter.notebook.get_cell(cell_idx);\n",
       "\n",
       "        var filename = out_data.content.text;\n",
       "\n",
       "        var code = cell.get_text();\n",
       "        code = code.replace(new RegExp(\"\\\\b\" + orig.replace('.', '\\\\.')\n",
       "                    + \"\\\\(\\\\s*((\\\\w+)=[^\\\\)]*)?\\\\)\"),\n",
       "                dest+\"(\\\"\"+filename+\"\\\",$1)\");\n",
       "        code = code.replace('\",)', '\")')\n",
       "        cell.set_text(code);\n",
       "\n",
       "        Jupyter.notebook.select(cell_idx);\n",
       "        Jupyter.notebook.execute_cell_and_select_below();\n",
       "    }\n",
       "\n",
       "    if (! (window.File && window.FileReader && window.FileList && window.Blob)) {\n",
       "        alert(\"Interactive file upload is not supported by your browser.\");\n",
       "        return;\n",
       "    }\n",
       "\n",
       "    input.disabled = true;\n",
       "    input.style.cursor = \"wait\";\n",
       "    input.parentElement.style.cursor = \"wait\";\n",
       "\n",
       "    var f = input.files[0];\n",
       "    var reader = new FileReader();\n",
       "    reader.onload = (function(f) {\n",
       "        return function (e) {\n",
       "            var obj = {\n",
       "                content: e.target.result,\n",
       "                name: f.name\n",
       "            };\n",
       "\n",
       "            //var pycb = py_callback_name+\"(\"+JSON.stringify(obj)+\")\"\n",
       "            // hack/workaround:\n",
       "            // it seems that Jupyter does not like very long lines\n",
       "            // so we split the data in chunks\n",
       "            var chunk_length = 100;\n",
       "            var pycb = \"__colomoto_upload_name = \" + JSON.stringify(obj.name) + \"\\n\";\n",
       "            pycb += \"__colomoto_upload_content = \\\\\\n\";\n",
       "            for (var i = 0; i < obj.content.length; i += chunk_length) {\n",
       "                pycb += \"\\\"\" + obj.content.substr(i, chunk_length)+\"\\\"\\\\\\n\"\n",
       "            }\n",
       "            pycb += \"\\n\"\n",
       "            pycb += py_callback_name+\"({'name':__colomoto_upload_name, 'content': __colomoto_upload_content})\\n\";\n",
       "            pycb += \"del __colomoto_upload_name, __colomoto_upload_content\"\n",
       "\n",
       "            IPython.notebook.kernel.execute(pycb, {iopub: {output: callback}});\n",
       "        };\n",
       "    })(f);\n",
       "    reader.readAsDataURL(f);\n",
       "}\n",
       "\n",
       "function resolve_function(tool_api, funcname) {\n",
       "    if (tool_api.hasOwnProperty(funcname)) {\n",
       "        return tool_api[funcname];\n",
       "    } else {\n",
       "        return window[funcname];\n",
       "    }\n",
       "}\n",
       "\n",
       "function colomoto_extension(Jupyter, ssid, name, menu, toolbar, tool_api) {\n",
       "\n",
       "    function insert_snippet_code(snippet) {\n",
       "        var cell = Jupyter.notebook.get_selected_cell();\n",
       "        Jupyter.notebook.edit_mode();\n",
       "        cell.code_mirror.replaceSelection(snippet, 'around');\n",
       "        //cell.focus_editor();\n",
       "    }\n",
       "\n",
       "    /**\n",
       "        from https://github.com/moble/jupyter_boilerplate/blob/master/main.js\n",
       "    */\n",
       "    function callback_insert_snippet (evt) {\n",
       "        // this (or event.currentTarget, see below) always refers to the DOM\n",
       "        // element the listener was attached to - see\n",
       "        // http://stackoverflow.com/questions/12077859\n",
       "        insert_snippet_code($(evt.currentTarget).data('snippet-code'));\n",
       "    }\n",
       "    function build_menu_element (menu_item_spec, direction) {\n",
       "        // Create the menu item html element\n",
       "        var element = $('<li/>');\n",
       "\n",
       "        if (typeof menu_item_spec == 'string') {\n",
       "            if (menu_item_spec != '---') {\n",
       "                return element.html(menu_item_spec)\n",
       "                        .addClass('ui-state-disabled')\n",
       "                        .attr({\"style\": \"padding:2px .4em\"})\n",
       "                       ;\n",
       "            }\n",
       "            return element.addClass('divider');\n",
       "        }\n",
       "\n",
       "        var a = $('<a/>')\n",
       "            .attr('href', '#')\n",
       "            .html(menu_item_spec.name)\n",
       "            .appendTo(element);\n",
       "        if (menu_item_spec.hasOwnProperty('snippet')) {\n",
       "            var snippet = menu_item_spec.snippet;\n",
       "            if (typeof snippet == 'string' || snippet instanceof String) {\n",
       "                snippet = [snippet];\n",
       "            }\n",
       "            a.attr({\n",
       "                'title' : \"\", // Do not remove this, even though it's empty!\n",
       "                'data-snippet-code' : snippet.join('\\n'),\n",
       "            })\n",
       "            .on('click', callback_insert_snippet)\n",
       "            .addClass('snippet');\n",
       "        }\n",
       "        else if (menu_item_spec.hasOwnProperty('internal-link')) {\n",
       "            a.attr('href', menu_item_spec['internal-link']);\n",
       "        }\n",
       "        else if (menu_item_spec.hasOwnProperty('external-link')) {\n",
       "            a.empty();\n",
       "            a.attr('href', menu_item_spec['external-link']);\n",
       "            a.attr({\n",
       "                'target' : '_blank',\n",
       "                'title' : 'Opens in a new window',\n",
       "            });\n",
       "            $('<i class=\"fa fa-external-link menu-icon pull-right\"/>').appendTo(a);\n",
       "            $('<span/>').html(menu_item_spec.name).appendTo(a);\n",
       "        }\n",
       "\n",
       "        if (menu_item_spec.hasOwnProperty('sub-menu')) {\n",
       "            element\n",
       "                .addClass('dropdown-submenu')\n",
       "                .toggleClass('dropdown-submenu-left', direction === 'left');\n",
       "            var sub_element = $('<ul class=\"dropdown-menu\"/>')\n",
       "                .toggleClass('dropdown-menu-compact', menu_item_spec.overlay === true) // For space-saving menus\n",
       "                .appendTo(element);\n",
       "\n",
       "            var new_direction = (menu_item_spec['sub-menu-direction'] === 'left') ? 'left' : 'right';\n",
       "            for (var j=0; j<menu_item_spec['sub-menu'].length; ++j) {\n",
       "                var sub_menu_item_spec = build_menu_element(menu_item_spec['sub-menu'][j], new_direction);\n",
       "                if(sub_menu_item_spec !== null) {\n",
       "                    sub_menu_item_spec.appendTo(sub_element);\n",
       "                }\n",
       "            }\n",
       "        }\n",
       "\n",
       "        return element;\n",
       "    }\n",
       "\n",
       "    function menu_setup (menu_item_specs, sibling, insert_before_sibling) {\n",
       "        for (var i=0; i<menu_item_specs.length; ++i) {\n",
       "            var menu_item_spec;\n",
       "            if (insert_before_sibling) {\n",
       "                menu_item_spec = menu_item_specs[i];\n",
       "            } else {\n",
       "                menu_item_spec = menu_item_specs[menu_item_specs.length-1-i];\n",
       "            }\n",
       "            var direction = (menu_item_spec['menu-direction'] == 'left') ? 'left' : 'right';\n",
       "            var menu_element = build_menu_element(menu_item_spec, direction);\n",
       "            // We need special properties if this item is in the navbar\n",
       "            if ($(sibling).parent().is('ul.nav.navbar-nav')) {\n",
       "                menu_element\n",
       "                    .addClass('dropdown')\n",
       "                    .removeClass('dropdown-submenu dropdown-submenu-left');\n",
       "                menu_element.children('a')\n",
       "                    .addClass('dropdown-toggle')\n",
       "                    .attr({\n",
       "                        'id': name+'_menu',\n",
       "                        'data-toggle' : 'dropdown',\n",
       "                        'aria-expanded' : 'false'\n",
       "                    });\n",
       "            }\n",
       "\n",
       "            // Insert the menu element into DOM\n",
       "            menu_element[insert_before_sibling ? 'insertBefore': 'insertAfter'](sibling);\n",
       "        }\n",
       "    }\n",
       "    /** end from */\n",
       "\n",
       "\n",
       "    function self_cleanup() {\n",
       "        var cell_element = $(\"script[class='to-be-removed']\").parents('.cell');\n",
       "        var cell_idx = Jupyter.notebook.get_cell_elements().index(cell_element);\n",
       "        var cell = Jupyter.notebook.get_cell(cell_idx);\n",
       "        var to_remove = -1;\n",
       "        for (var i = 0; i < cell.output_area.outputs.length; ++i) {\n",
       "            var oa = cell.output_area.outputs[i];\n",
       "            if (oa.output_type == \"display_data\"\n",
       "                && typeof oa.data[\"text/html\"] != 'undefined'\n",
       "                && oa.data[\"text/html\"].indexOf(' class=\"to-be-removed\"') >= 0) {\n",
       "                to_remove = i;\n",
       "                break;\n",
       "            }\n",
       "        }\n",
       "        if (to_remove == -1) {\n",
       "            console.log(\"cannot find toberemoved\");\n",
       "        } else {\n",
       "            cell.output_area.outputs.splice(to_remove, 1);\n",
       "        }\n",
       "    }\n",
       "\n",
       "    function toolbar_setup(actions) {\n",
       "        var buttons = [];\n",
       "        for (var i = 0; i < actions.length; ++i) {\n",
       "            var setup = actions[i].setup;\n",
       "            if (typeof setup.handler == 'string') {\n",
       "                setup.handler = resolve_function(tool_api, setup.handler);\n",
       "            }\n",
       "            buttons.push(Jupyter.actions.register(actions[i].setup,\n",
       "                actions[i].name, name));\n",
       "        }\n",
       "        $(\"#\"+name+\"-toolbar\").remove();\n",
       "        Jupyter.toolbar.add_buttons_group(buttons, name+\"-toolbar\");\n",
       "    }\n",
       "\n",
       "    function replace_menu_snippets(menu_spec, orig, dest) {\n",
       "        if (menu_spec.hasOwnProperty(\"snippet\")) {\n",
       "            var snippet = menu_spec.snippet;\n",
       "            if (typeof snippet == \"string\" || snippet instanceof String) {\n",
       "                menu_spec[\"snippet\"] = snippet.replace(orig, dest);\n",
       "            } else {\n",
       "                for (var i = 0; i < snippet.length; ++i) {\n",
       "                    menu_spec[\"snippet\"][i] = snippet[i].replace(orig, dest);\n",
       "                }\n",
       "            }\n",
       "        }\n",
       "        if (menu_spec.hasOwnProperty(\"sub-menu\")) {\n",
       "            for (var i = 0; i < menu_spec[\"sub-menu\"].length; ++i) {\n",
       "                replace_menu_snippets(menu_spec[\"sub-menu\"][i], orig, dest);\n",
       "            }\n",
       "        }\n",
       "    }\n",
       "\n",
       "    function load_ipython_extension() {\n",
       "\n",
       "        var mycellelt = $(\"#\"+ssid).parents('.cell');\n",
       "        var myidx = Jupyter.notebook.get_cell_elements().index(mycellelt);\n",
       "        var import_cell = Jupyter.notebook.get_cell(myidx);\n",
       "\n",
       "        var alias = detect_import(import_cell, name);\n",
       "        tool_api.module_alias = alias;\n",
       "        if (alias && alias != name) {\n",
       "            var orig = new RegExp(\"\\\\b\"+name+\"\\\\b\", \"g\");\n",
       "            replace_menu_snippets(menu, orig, alias);\n",
       "        }\n",
       "\n",
       "        if (toolbar) {\n",
       "            toolbar_setup(toolbar);\n",
       "        }\n",
       "\n",
       "        $(\"#\"+name+\"_menu\").parent().remove();\n",
       "        if (menu) {\n",
       "            menu_setup([menu], $(\"#help_menu\").parent(), true);\n",
       "        }\n",
       "\n",
       "        if (tool_api.hasOwnProperty(\"post_install_callback\")) {\n",
       "            tool_api.post_install_callback();\n",
       "        }\n",
       "\n",
       "        setTimeout(self_cleanup, 5000);\n",
       "    };\n",
       "\n",
       "    load_ipython_extension();\n",
       "}\n",
       "\n",
       "function resolve_toolbar_handlers(tool_api, toolbar_spec) {\n",
       "    for (var i = 0; i < toolbar_spec.length; ++i) {\n",
       "        func = resolve_function(tool_api, toolbar_spec[i][\"setup\"][\"handler\"]);\n",
       "        toolbar_spec[i][\"setup\"][\"handler\"] = func;\n",
       "    }\n",
       "    return toolbar_spec\n",
       "}\n",
       "\n",
       "\n",
       "    var minibn_jsapi = {  };\n",
       "    colomoto_extension(Jupyter, \"colomoto-setup-5747392\", \"minibn\", null, null, minibn_jsapi);\n",
       "     }</script>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import random\n",
    "from colomoto.minibn import * # for Boolean network manipulation\n",
    "from scboolseq import scBoolSeq\n",
    "\n",
    "# set seed for reproducible results\n",
    "_rng_seed = 19834650\n",
    "# use a Generator instead of numpy's singleton\n",
    "_rng = np.random.default_rng(_rng_seed)\n",
    "random.seed(_rng_seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a4b051c2",
   "metadata": {},
   "source": [
    "## Load Boolean network model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4a9e3bae",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "gene1 <- tf\n",
       "gene10 <- tf\n",
       "gene2 <- tf\n",
       "gene3 <- tf\n",
       "gene4 <- tf\n",
       "gene5 <- tf\n",
       "gene6 <- tf\n",
       "gene7 <- tf\n",
       "gene8 <- tf\n",
       "gene9 <- tf\n",
       "tf <- 1"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bn = BooleanNetwork.load(\"models/star.bnet\")\n",
    "bn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "09653812",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# computing graph layout...\n"
     ]
    },
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 3.0.0 (20220315.2325)\n",
       " -->\n",
       "<!-- Pages: 1 -->\n",
       "<svg width=\"962pt\" height=\"131pt\"\n",
       " viewBox=\"0.00 0.00 961.84 131.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 127)\">\n",
       "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-127 957.84,-127 957.84,4 -4,4\"/>\n",
       "<!-- tf -->\n",
       "<g id=\"node1\" class=\"node\">\n",
       "<title>tf</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"471\" cy=\"-105\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"471\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\">tf</text>\n",
       "</g>\n",
       "<!-- gene1 -->\n",
       "<g id=\"node2\" class=\"node\">\n",
       "<title>gene1</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"39\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"39\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene1</text>\n",
       "</g>\n",
       "<!-- tf&#45;&gt;gene1 -->\n",
       "<g id=\"edge3\" class=\"edge\">\n",
       "<title>tf&#45;&gt;gene1</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M444.5,-101.22C381.56,-94.2 218.49,-73.59 87,-36 84.03,-35.15 80.98,-34.22 77.93,-33.24\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"78.85,-29.86 68.26,-30.01 76.63,-36.5 78.85,-29.86\"/>\n",
       "<text text-anchor=\"middle\" x=\"229.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- gene2 -->\n",
       "<g id=\"node3\" class=\"node\">\n",
       "<title>gene2</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"135\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"135\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene2</text>\n",
       "</g>\n",
       "<!-- tf&#45;&gt;gene2 -->\n",
       "<g id=\"edge6\" class=\"edge\">\n",
       "<title>tf&#45;&gt;gene2</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M445.31,-98.95C394.67,-88.78 278.55,-64.35 183,-36 180.04,-35.12 177,-34.17 173.95,-33.18\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"174.88,-29.8 164.28,-29.92 172.64,-36.43 174.88,-29.8\"/>\n",
       "<text text-anchor=\"middle\" x=\"307.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- gene3 -->\n",
       "<g id=\"node4\" class=\"node\">\n",
       "<title>gene3</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"231\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"231\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene3</text>\n",
       "</g>\n",
       "<!-- tf&#45;&gt;gene3 -->\n",
       "<g id=\"edge5\" class=\"edge\">\n",
       "<title>tf&#45;&gt;gene3</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M447.55,-95.7C406.41,-81.13 320.84,-50.82 270.46,-32.98\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"271.47,-29.62 260.87,-29.58 269.13,-36.22 271.47,-29.62\"/>\n",
       "<text text-anchor=\"middle\" x=\"372.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- gene4 -->\n",
       "<g id=\"node5\" class=\"node\">\n",
       "<title>gene4</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"327\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"327\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene4</text>\n",
       "</g>\n",
       "<!-- tf&#45;&gt;gene4 -->\n",
       "<g id=\"edge1\" class=\"edge\">\n",
       "<title>tf&#45;&gt;gene4</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M451.28,-92.36C427.51,-78.33 387.24,-54.56 359,-37.89\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"360.68,-34.82 350.29,-32.75 357.12,-40.85 360.68,-34.82\"/>\n",
       "<text text-anchor=\"middle\" x=\"414.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- gene5 -->\n",
       "<g id=\"node6\" class=\"node\">\n",
       "<title>gene5</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"423\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"423\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene5</text>\n",
       "</g>\n",
       "<!-- tf&#45;&gt;gene5 -->\n",
       "<g id=\"edge8\" class=\"edge\">\n",
       "<title>tf&#45;&gt;gene5</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M461.97,-88.01C455.02,-75.7 445.28,-58.46 437.27,-44.28\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"440.28,-42.48 432.31,-35.5 434.18,-45.92 440.28,-42.48\"/>\n",
       "<text text-anchor=\"middle\" x=\"456.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- gene6 -->\n",
       "<g id=\"node7\" class=\"node\">\n",
       "<title>gene6</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"519\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"519\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene6</text>\n",
       "</g>\n",
       "<!-- tf&#45;&gt;gene6 -->\n",
       "<g id=\"edge7\" class=\"edge\">\n",
       "<title>tf&#45;&gt;gene6</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M480.02,-88.01C486.98,-75.7 496.72,-58.46 504.72,-44.28\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"507.81,-45.92 509.68,-35.5 501.72,-42.48 507.81,-45.92\"/>\n",
       "<text text-anchor=\"middle\" x=\"504.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- gene7 -->\n",
       "<g id=\"node8\" class=\"node\">\n",
       "<title>gene7</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"615\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"615\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene7</text>\n",
       "</g>\n",
       "<!-- tf&#45;&gt;gene7 -->\n",
       "<g id=\"edge10\" class=\"edge\">\n",
       "<title>tf&#45;&gt;gene7</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M490.72,-92.36C514.49,-78.33 554.75,-54.56 582.99,-37.89\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"584.87,-40.85 591.7,-32.75 581.31,-34.82 584.87,-40.85\"/>\n",
       "<text text-anchor=\"middle\" x=\"558.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- gene8 -->\n",
       "<g id=\"node9\" class=\"node\">\n",
       "<title>gene8</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"711\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"711\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene8</text>\n",
       "</g>\n",
       "<!-- tf&#45;&gt;gene8 -->\n",
       "<g id=\"edge9\" class=\"edge\">\n",
       "<title>tf&#45;&gt;gene8</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M494.45,-95.7C535.58,-81.13 621.15,-50.82 671.54,-32.98\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"672.86,-36.22 681.12,-29.58 670.52,-29.62 672.86,-36.22\"/>\n",
       "<text text-anchor=\"middle\" x=\"612.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- gene9 -->\n",
       "<g id=\"node10\" class=\"node\">\n",
       "<title>gene9</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"807\" cy=\"-18\" rx=\"38.99\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"807\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene9</text>\n",
       "</g>\n",
       "<!-- tf&#45;&gt;gene9 -->\n",
       "<g id=\"edge2\" class=\"edge\">\n",
       "<title>tf&#45;&gt;gene9</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M496.68,-98.97C547.33,-88.82 663.47,-64.42 759,-36 761.95,-35.12 764.99,-34.17 768.04,-33.17\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"769.35,-36.42 777.71,-29.91 767.11,-29.79 769.35,-36.42\"/>\n",
       "<text text-anchor=\"middle\" x=\"695.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- gene10 -->\n",
       "<g id=\"node11\" class=\"node\">\n",
       "<title>gene10</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"909\" cy=\"-18\" rx=\"44.69\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"909\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">gene10</text>\n",
       "</g>\n",
       "<!-- tf&#45;&gt;gene10 -->\n",
       "<g id=\"edge4\" class=\"edge\">\n",
       "<title>tf&#45;&gt;gene10</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M497.41,-100.92C560.18,-93.25 722.88,-71.35 855,-36 858.53,-35.06 862.17,-34.03 865.82,-32.96\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"866.86,-36.3 875.43,-30.07 864.84,-29.6 866.86,-36.3\"/>\n",
       "<text text-anchor=\"middle\" x=\"778.5\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<networkx.classes.multidigraph.MultiDiGraph at 0x7f2e843b44c0>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bn.influence_graph()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "77abe8f3",
   "metadata": {},
   "source": [
    "## Simulation with random walk\n",
    "\n",
    "With the asynchronous update mode, the activation of the genes can be made in any order. Here, we randomly sample one trajectory of this model, which essentially boils down to selecting a random ordering of genes that get activated.\n",
    "\n",
    "Let us first specify the initial state of the network:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ee4a6403",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'tf': 1,\n",
       " 'gene1': 0,\n",
       " 'gene2': 0,\n",
       " 'gene3': 0,\n",
       " 'gene4': 0,\n",
       " 'gene5': 0,\n",
       " 'gene6': 0,\n",
       " 'gene7': 0,\n",
       " 'gene8': 0,\n",
       " 'gene9': 0,\n",
       " 'gene10': 0}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "initial_state = bn.zero()\n",
    "initial_state[\"tf\"] = 1\n",
    "initial_state"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "60cedafe",
   "metadata": {},
   "source": [
    "Then, we use `minibn` to generate a random walk in the asynchronous dynamics of the Boolean network from the given initial state:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "05646298",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tf</th>\n",
       "      <th>gene1</th>\n",
       "      <th>gene2</th>\n",
       "      <th>gene3</th>\n",
       "      <th>gene4</th>\n",
       "      <th>gene5</th>\n",
       "      <th>gene6</th>\n",
       "      <th>gene7</th>\n",
       "      <th>gene8</th>\n",
       "      <th>gene9</th>\n",
       "      <th>gene10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    tf  gene1  gene2  gene3  gene4  gene5  gene6  gene7  gene8  gene9  gene10\n",
       "0    1      0      0      0      0      0      0      0      0      0       0\n",
       "1    1      0      0      0      0      0      0      0      1      0       0\n",
       "2    1      1      0      0      0      0      0      0      1      0       0\n",
       "3    1      1      0      1      0      0      0      0      1      0       0\n",
       "4    1      1      0      1      0      0      1      0      1      0       0\n",
       "5    1      1      0      1      0      0      1      0      1      1       0\n",
       "6    1      1      0      1      0      0      1      0      1      1       1\n",
       "7    1      1      0      1      1      0      1      0      1      1       1\n",
       "8    1      1      1      1      1      0      1      0      1      1       1\n",
       "9    1      1      1      1      1      0      1      1      1      1       1\n",
       "10   1      1      1      1      1      1      1      1      1      1       1"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dynamics = FullyAsynchronousDynamics(bn)\n",
    "random_walk_df = pd.DataFrame(dynamics.random_walk(initial_state, steps=10))\n",
    "random_walk_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a835af87",
   "metadata": {},
   "source": [
    "## Retrieve statistics of real expression datasets\n",
    "\n",
    "In order to generate synthetic RNA counts, scBoolSeq relies on statistical criteria learnt from real scRNA-Seq datasets. Then, the nodes of the Boolean model used to generate the Boolean states need to be associated with real gene names: scBoolSeq will then generate RNA counts from the corresponding distribution, biased by the Boolean state of the gene.\n",
    "\n",
    "In this example, we re-use the simulation criteria learnt from the Nestorowa dataset in the [1 - Binarization and synthetic data generation](1%20-%20Binarization%20and%20synthetic%20data%20generation.ipynb) notebook:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "7e1f185f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Dip</th>\n",
       "      <th>BI</th>\n",
       "      <th>Kurtosis</th>\n",
       "      <th>DropOutRate</th>\n",
       "      <th>MeanNZ</th>\n",
       "      <th>DenPeak</th>\n",
       "      <th>Amplitude</th>\n",
       "      <th>gaussian_prob1</th>\n",
       "      <th>gaussian_prob2</th>\n",
       "      <th>gaussian_mean1</th>\n",
       "      <th>gaussian_mean2</th>\n",
       "      <th>gaussian_variance</th>\n",
       "      <th>mean</th>\n",
       "      <th>variance</th>\n",
       "      <th>unimodal_margin_quantile</th>\n",
       "      <th>unimodal_low_quantile</th>\n",
       "      <th>unimodal_high_quantile</th>\n",
       "      <th>IQR</th>\n",
       "      <th>q50</th>\n",
       "      <th>bim_thresh_down</th>\n",
       "      <th>bim_thresh_up</th>\n",
       "      <th>Category</th>\n",
       "      <th>dor_threshold</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Clec1b</th>\n",
       "      <td>9.948487e-01</td>\n",
       "      <td>1.635698</td>\n",
       "      <td>6.166711</td>\n",
       "      <td>0.876208</td>\n",
       "      <td>1.520978</td>\n",
       "      <td>-0.007249</td>\n",
       "      <td>8.852181</td>\n",
       "      <td>0.986140</td>\n",
       "      <td>0.013860</td>\n",
       "      <td>0.111291</td>\n",
       "      <td>5.666490</td>\n",
       "      <td>0.157649</td>\n",
       "      <td>1.520978</td>\n",
       "      <td>2.666760</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.667271</td>\n",
       "      <td>1.555290</td>\n",
       "      <td>0.888020</td>\n",
       "      <td>0.968776</td>\n",
       "      <td>2.785740</td>\n",
       "      <td>3.094168</td>\n",
       "      <td>Unimodal</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kdm3a</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.407548</td>\n",
       "      <td>-0.784019</td>\n",
       "      <td>0.326087</td>\n",
       "      <td>3.847940</td>\n",
       "      <td>0.209239</td>\n",
       "      <td>10.126676</td>\n",
       "      <td>0.714520</td>\n",
       "      <td>0.285480</td>\n",
       "      <td>0.872643</td>\n",
       "      <td>6.899449</td>\n",
       "      <td>1.278247</td>\n",
       "      <td>2.593177</td>\n",
       "      <td>8.692586</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5.258984</td>\n",
       "      <td>5.258984</td>\n",
       "      <td>1.268040</td>\n",
       "      <td>3.432251</td>\n",
       "      <td>4.748643</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Coro2b</th>\n",
       "      <td>4.684039e-03</td>\n",
       "      <td>2.320060</td>\n",
       "      <td>0.327060</td>\n",
       "      <td>0.658213</td>\n",
       "      <td>2.383819</td>\n",
       "      <td>0.004597</td>\n",
       "      <td>9.475577</td>\n",
       "      <td>0.919508</td>\n",
       "      <td>0.080492</td>\n",
       "      <td>0.335546</td>\n",
       "      <td>6.289079</td>\n",
       "      <td>0.487372</td>\n",
       "      <td>2.383819</td>\n",
       "      <td>5.370521</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.827740</td>\n",
       "      <td>2.912944</td>\n",
       "      <td>2.085205</td>\n",
       "      <td>1.290666</td>\n",
       "      <td>3.183596</td>\n",
       "      <td>3.879537</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8430408G22Rik</th>\n",
       "      <td>7.236739e-08</td>\n",
       "      <td>3.121069</td>\n",
       "      <td>-0.993979</td>\n",
       "      <td>0.884058</td>\n",
       "      <td>2.983472</td>\n",
       "      <td>0.005663</td>\n",
       "      <td>9.067857</td>\n",
       "      <td>0.964962</td>\n",
       "      <td>0.035038</td>\n",
       "      <td>0.098898</td>\n",
       "      <td>7.148808</td>\n",
       "      <td>0.172506</td>\n",
       "      <td>2.983472</td>\n",
       "      <td>8.154647</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.825298</td>\n",
       "      <td>6.465074</td>\n",
       "      <td>5.639776</td>\n",
       "      <td>1.449779</td>\n",
       "      <td>3.612061</td>\n",
       "      <td>4.175572</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Clec9a</th>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>2.081717</td>\n",
       "      <td>140.089285</td>\n",
       "      <td>0.965580</td>\n",
       "      <td>2.280293</td>\n",
       "      <td>-0.009361</td>\n",
       "      <td>9.614233</td>\n",
       "      <td>0.993961</td>\n",
       "      <td>0.006039</td>\n",
       "      <td>0.035599</td>\n",
       "      <td>7.138099</td>\n",
       "      <td>0.069870</td>\n",
       "      <td>0.078488</td>\n",
       "      <td>0.372878</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.113410</td>\n",
       "      <td>4.607253</td>\n",
       "      <td>Discarded</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        Dip        BI    Kurtosis  DropOutRate    MeanNZ  \\\n",
       "Clec1b         9.948487e-01  1.635698    6.166711     0.876208  1.520978   \n",
       "Kdm3a          0.000000e+00  2.407548   -0.784019     0.326087  3.847940   \n",
       "Coro2b         4.684039e-03  2.320060    0.327060     0.658213  2.383819   \n",
       "8430408G22Rik  7.236739e-08  3.121069   -0.993979     0.884058  2.983472   \n",
       "Clec9a         1.000000e+00  2.081717  140.089285     0.965580  2.280293   \n",
       "\n",
       "                DenPeak  Amplitude  gaussian_prob1  gaussian_prob2  \\\n",
       "Clec1b        -0.007249   8.852181        0.986140        0.013860   \n",
       "Kdm3a          0.209239  10.126676        0.714520        0.285480   \n",
       "Coro2b         0.004597   9.475577        0.919508        0.080492   \n",
       "8430408G22Rik  0.005663   9.067857        0.964962        0.035038   \n",
       "Clec9a        -0.009361   9.614233        0.993961        0.006039   \n",
       "\n",
       "               gaussian_mean1  gaussian_mean2  gaussian_variance      mean  \\\n",
       "Clec1b               0.111291        5.666490           0.157649  1.520978   \n",
       "Kdm3a                0.872643        6.899449           1.278247  2.593177   \n",
       "Coro2b               0.335546        6.289079           0.487372  2.383819   \n",
       "8430408G22Rik        0.098898        7.148808           0.172506  2.983472   \n",
       "Clec9a               0.035599        7.138099           0.069870  0.078488   \n",
       "\n",
       "               variance  unimodal_margin_quantile  unimodal_low_quantile  \\\n",
       "Clec1b         2.666760                      0.25               0.667271   \n",
       "Kdm3a          8.692586                      0.25               0.000000   \n",
       "Coro2b         5.370521                      0.25               0.827740   \n",
       "8430408G22Rik  8.154647                      0.25               0.825298   \n",
       "Clec9a         0.372878                      0.25               0.000000   \n",
       "\n",
       "               unimodal_high_quantile       IQR       q50  bim_thresh_down  \\\n",
       "Clec1b                       1.555290  0.888020  0.968776         2.785740   \n",
       "Kdm3a                        5.258984  5.258984  1.268040         3.432251   \n",
       "Coro2b                       2.912944  2.085205  1.290666         3.183596   \n",
       "8430408G22Rik                6.465074  5.639776  1.449779         3.612061   \n",
       "Clec9a                       0.000000  0.000000  0.000000         3.113410   \n",
       "\n",
       "               bim_thresh_up   Category  dor_threshold  \n",
       "Clec1b              3.094168   Unimodal            NaN  \n",
       "Kdm3a               4.748643    Bimodal           0.95  \n",
       "Coro2b              3.879537    Bimodal            NaN  \n",
       "8430408G22Rik       4.175572    Bimodal            NaN  \n",
       "Clec9a              4.607253  Discarded           0.95  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "criteria = pd.read_csv(\"cache_scBoolSeq_Nestorowa_simulation_criteria.csv\", index_col=0)\n",
    "criteria.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c0d064c8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Bimodal      2987\n",
       "Unimodal     1580\n",
       "Discarded     201\n",
       "Name: Category, dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "criteria.Category.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8027f897",
   "metadata": {},
   "source": [
    "We randomly select bimodal genes for each of the nodes of the Boolean model to obtain simulation criteria:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "9e9b9663",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Dip</th>\n",
       "      <th>BI</th>\n",
       "      <th>Kurtosis</th>\n",
       "      <th>DropOutRate</th>\n",
       "      <th>MeanNZ</th>\n",
       "      <th>DenPeak</th>\n",
       "      <th>Amplitude</th>\n",
       "      <th>gaussian_prob1</th>\n",
       "      <th>gaussian_prob2</th>\n",
       "      <th>gaussian_mean1</th>\n",
       "      <th>gaussian_mean2</th>\n",
       "      <th>gaussian_variance</th>\n",
       "      <th>mean</th>\n",
       "      <th>variance</th>\n",
       "      <th>unimodal_margin_quantile</th>\n",
       "      <th>unimodal_low_quantile</th>\n",
       "      <th>unimodal_high_quantile</th>\n",
       "      <th>IQR</th>\n",
       "      <th>q50</th>\n",
       "      <th>bim_thresh_down</th>\n",
       "      <th>bim_thresh_up</th>\n",
       "      <th>Category</th>\n",
       "      <th>dor_threshold</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>tf</th>\n",
       "      <td>0.000250</td>\n",
       "      <td>1.982430</td>\n",
       "      <td>-1.118390</td>\n",
       "      <td>0.027778</td>\n",
       "      <td>4.454848</td>\n",
       "      <td>2.231509</td>\n",
       "      <td>10.458465</td>\n",
       "      <td>0.634321</td>\n",
       "      <td>0.365679</td>\n",
       "      <td>2.449688</td>\n",
       "      <td>7.594680</td>\n",
       "      <td>1.562365</td>\n",
       "      <td>4.331102</td>\n",
       "      <td>7.707162</td>\n",
       "      <td>0.25</td>\n",
       "      <td>2.070391</td>\n",
       "      <td>6.920502</td>\n",
       "      <td>4.850112</td>\n",
       "      <td>3.321480</td>\n",
       "      <td>4.277351</td>\n",
       "      <td>6.081385</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gene1</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.171544</td>\n",
       "      <td>-1.148314</td>\n",
       "      <td>0.021135</td>\n",
       "      <td>6.223047</td>\n",
       "      <td>8.243204</td>\n",
       "      <td>11.193184</td>\n",
       "      <td>0.363906</td>\n",
       "      <td>0.636094</td>\n",
       "      <td>2.579439</td>\n",
       "      <td>8.100760</td>\n",
       "      <td>1.496439</td>\n",
       "      <td>6.091521</td>\n",
       "      <td>8.558219</td>\n",
       "      <td>0.25</td>\n",
       "      <td>3.183970</td>\n",
       "      <td>8.483865</td>\n",
       "      <td>5.299895</td>\n",
       "      <td>7.282884</td>\n",
       "      <td>4.384796</td>\n",
       "      <td>5.990926</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gene2</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.957530</td>\n",
       "      <td>-1.273249</td>\n",
       "      <td>0.044082</td>\n",
       "      <td>5.641087</td>\n",
       "      <td>8.029818</td>\n",
       "      <td>10.794851</td>\n",
       "      <td>0.409065</td>\n",
       "      <td>0.590935</td>\n",
       "      <td>2.258246</td>\n",
       "      <td>7.561991</td>\n",
       "      <td>1.774518</td>\n",
       "      <td>5.392416</td>\n",
       "      <td>8.579517</td>\n",
       "      <td>0.25</td>\n",
       "      <td>2.585510</td>\n",
       "      <td>7.994901</td>\n",
       "      <td>5.409391</td>\n",
       "      <td>6.129123</td>\n",
       "      <td>3.802762</td>\n",
       "      <td>5.780924</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gene3</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.123630</td>\n",
       "      <td>-1.315039</td>\n",
       "      <td>0.036836</td>\n",
       "      <td>5.724870</td>\n",
       "      <td>8.076334</td>\n",
       "      <td>10.811316</td>\n",
       "      <td>0.400338</td>\n",
       "      <td>0.599662</td>\n",
       "      <td>2.237126</td>\n",
       "      <td>7.701644</td>\n",
       "      <td>1.589570</td>\n",
       "      <td>5.513990</td>\n",
       "      <td>8.763508</td>\n",
       "      <td>0.25</td>\n",
       "      <td>2.536712</td>\n",
       "      <td>8.106769</td>\n",
       "      <td>5.570057</td>\n",
       "      <td>6.452676</td>\n",
       "      <td>3.963904</td>\n",
       "      <td>5.713590</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gene4</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.161170</td>\n",
       "      <td>-1.344687</td>\n",
       "      <td>0.018116</td>\n",
       "      <td>6.559400</td>\n",
       "      <td>9.543728</td>\n",
       "      <td>12.260558</td>\n",
       "      <td>0.403231</td>\n",
       "      <td>0.596769</td>\n",
       "      <td>2.734372</td>\n",
       "      <td>8.944811</td>\n",
       "      <td>1.987133</td>\n",
       "      <td>6.440570</td>\n",
       "      <td>11.275156</td>\n",
       "      <td>0.25</td>\n",
       "      <td>3.109483</td>\n",
       "      <td>9.439844</td>\n",
       "      <td>6.330361</td>\n",
       "      <td>7.471017</td>\n",
       "      <td>4.771986</td>\n",
       "      <td>6.660780</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gene5</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.167416</td>\n",
       "      <td>-1.386505</td>\n",
       "      <td>0.038647</td>\n",
       "      <td>5.661639</td>\n",
       "      <td>8.154579</td>\n",
       "      <td>10.498523</td>\n",
       "      <td>0.436231</td>\n",
       "      <td>0.563769</td>\n",
       "      <td>2.322133</td>\n",
       "      <td>7.857554</td>\n",
       "      <td>1.604112</td>\n",
       "      <td>5.442832</td>\n",
       "      <td>9.145257</td>\n",
       "      <td>0.25</td>\n",
       "      <td>2.480269</td>\n",
       "      <td>8.141719</td>\n",
       "      <td>5.661451</td>\n",
       "      <td>6.272476</td>\n",
       "      <td>4.156733</td>\n",
       "      <td>5.870370</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gene6</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.355317</td>\n",
       "      <td>-1.505761</td>\n",
       "      <td>0.034420</td>\n",
       "      <td>5.745117</td>\n",
       "      <td>8.722360</td>\n",
       "      <td>11.401288</td>\n",
       "      <td>0.433551</td>\n",
       "      <td>0.566449</td>\n",
       "      <td>2.015794</td>\n",
       "      <td>8.250379</td>\n",
       "      <td>1.720748</td>\n",
       "      <td>5.547368</td>\n",
       "      <td>11.273438</td>\n",
       "      <td>0.25</td>\n",
       "      <td>2.068712</td>\n",
       "      <td>8.581835</td>\n",
       "      <td>6.513123</td>\n",
       "      <td>6.533317</td>\n",
       "      <td>4.226884</td>\n",
       "      <td>5.879037</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gene7</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.307954</td>\n",
       "      <td>-0.861903</td>\n",
       "      <td>0.005435</td>\n",
       "      <td>6.800339</td>\n",
       "      <td>8.655035</td>\n",
       "      <td>10.931065</td>\n",
       "      <td>0.303185</td>\n",
       "      <td>0.696815</td>\n",
       "      <td>2.875043</td>\n",
       "      <td>8.455204</td>\n",
       "      <td>1.234993</td>\n",
       "      <td>6.763381</td>\n",
       "      <td>7.818094</td>\n",
       "      <td>0.25</td>\n",
       "      <td>4.108445</td>\n",
       "      <td>8.919568</td>\n",
       "      <td>4.811123</td>\n",
       "      <td>7.855844</td>\n",
       "      <td>4.817005</td>\n",
       "      <td>6.135923</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gene8</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.296122</td>\n",
       "      <td>-1.483351</td>\n",
       "      <td>0.043478</td>\n",
       "      <td>5.821394</td>\n",
       "      <td>8.961177</td>\n",
       "      <td>11.437198</td>\n",
       "      <td>0.489454</td>\n",
       "      <td>0.510546</td>\n",
       "      <td>2.422383</td>\n",
       "      <td>8.584236</td>\n",
       "      <td>1.799614</td>\n",
       "      <td>5.568290</td>\n",
       "      <td>11.294318</td>\n",
       "      <td>0.25</td>\n",
       "      <td>2.438392</td>\n",
       "      <td>8.763361</td>\n",
       "      <td>6.324968</td>\n",
       "      <td>5.836308</td>\n",
       "      <td>4.631891</td>\n",
       "      <td>6.354470</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gene9</th>\n",
       "      <td>0.001038</td>\n",
       "      <td>1.593895</td>\n",
       "      <td>0.246388</td>\n",
       "      <td>0.035628</td>\n",
       "      <td>4.116075</td>\n",
       "      <td>3.382096</td>\n",
       "      <td>10.321775</td>\n",
       "      <td>0.813953</td>\n",
       "      <td>0.186047</td>\n",
       "      <td>3.118572</td>\n",
       "      <td>7.691919</td>\n",
       "      <td>1.246723</td>\n",
       "      <td>3.969428</td>\n",
       "      <td>4.416692</td>\n",
       "      <td>0.25</td>\n",
       "      <td>2.683685</td>\n",
       "      <td>4.560455</td>\n",
       "      <td>1.876770</td>\n",
       "      <td>3.506317</td>\n",
       "      <td>4.993969</td>\n",
       "      <td>6.611602</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>gene10</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.252029</td>\n",
       "      <td>-1.216598</td>\n",
       "      <td>0.006039</td>\n",
       "      <td>6.281443</td>\n",
       "      <td>8.383876</td>\n",
       "      <td>10.891227</td>\n",
       "      <td>0.365452</td>\n",
       "      <td>0.634548</td>\n",
       "      <td>2.963747</td>\n",
       "      <td>8.132413</td>\n",
       "      <td>1.221529</td>\n",
       "      <td>6.243511</td>\n",
       "      <td>7.421162</td>\n",
       "      <td>0.25</td>\n",
       "      <td>3.464664</td>\n",
       "      <td>8.508720</td>\n",
       "      <td>5.044056</td>\n",
       "      <td>7.298677</td>\n",
       "      <td>4.705917</td>\n",
       "      <td>6.140867</td>\n",
       "      <td>Bimodal</td>\n",
       "      <td>0.95</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Dip        BI  Kurtosis  DropOutRate    MeanNZ   DenPeak  \\\n",
       "tf      0.000250  1.982430 -1.118390     0.027778  4.454848  2.231509   \n",
       "gene1   0.000000  2.171544 -1.148314     0.021135  6.223047  8.243204   \n",
       "gene2   0.000000  1.957530 -1.273249     0.044082  5.641087  8.029818   \n",
       "gene3   0.000000  2.123630 -1.315039     0.036836  5.724870  8.076334   \n",
       "gene4   0.000000  2.161170 -1.344687     0.018116  6.559400  9.543728   \n",
       "gene5   0.000000  2.167416 -1.386505     0.038647  5.661639  8.154579   \n",
       "gene6   0.000000  2.355317 -1.505761     0.034420  5.745117  8.722360   \n",
       "gene7   0.000000  2.307954 -0.861903     0.005435  6.800339  8.655035   \n",
       "gene8   0.000000  2.296122 -1.483351     0.043478  5.821394  8.961177   \n",
       "gene9   0.001038  1.593895  0.246388     0.035628  4.116075  3.382096   \n",
       "gene10  0.000000  2.252029 -1.216598     0.006039  6.281443  8.383876   \n",
       "\n",
       "        Amplitude  gaussian_prob1  gaussian_prob2  gaussian_mean1  \\\n",
       "tf      10.458465        0.634321        0.365679        2.449688   \n",
       "gene1   11.193184        0.363906        0.636094        2.579439   \n",
       "gene2   10.794851        0.409065        0.590935        2.258246   \n",
       "gene3   10.811316        0.400338        0.599662        2.237126   \n",
       "gene4   12.260558        0.403231        0.596769        2.734372   \n",
       "gene5   10.498523        0.436231        0.563769        2.322133   \n",
       "gene6   11.401288        0.433551        0.566449        2.015794   \n",
       "gene7   10.931065        0.303185        0.696815        2.875043   \n",
       "gene8   11.437198        0.489454        0.510546        2.422383   \n",
       "gene9   10.321775        0.813953        0.186047        3.118572   \n",
       "gene10  10.891227        0.365452        0.634548        2.963747   \n",
       "\n",
       "        gaussian_mean2  gaussian_variance      mean   variance  \\\n",
       "tf            7.594680           1.562365  4.331102   7.707162   \n",
       "gene1         8.100760           1.496439  6.091521   8.558219   \n",
       "gene2         7.561991           1.774518  5.392416   8.579517   \n",
       "gene3         7.701644           1.589570  5.513990   8.763508   \n",
       "gene4         8.944811           1.987133  6.440570  11.275156   \n",
       "gene5         7.857554           1.604112  5.442832   9.145257   \n",
       "gene6         8.250379           1.720748  5.547368  11.273438   \n",
       "gene7         8.455204           1.234993  6.763381   7.818094   \n",
       "gene8         8.584236           1.799614  5.568290  11.294318   \n",
       "gene9         7.691919           1.246723  3.969428   4.416692   \n",
       "gene10        8.132413           1.221529  6.243511   7.421162   \n",
       "\n",
       "        unimodal_margin_quantile  unimodal_low_quantile  \\\n",
       "tf                          0.25               2.070391   \n",
       "gene1                       0.25               3.183970   \n",
       "gene2                       0.25               2.585510   \n",
       "gene3                       0.25               2.536712   \n",
       "gene4                       0.25               3.109483   \n",
       "gene5                       0.25               2.480269   \n",
       "gene6                       0.25               2.068712   \n",
       "gene7                       0.25               4.108445   \n",
       "gene8                       0.25               2.438392   \n",
       "gene9                       0.25               2.683685   \n",
       "gene10                      0.25               3.464664   \n",
       "\n",
       "        unimodal_high_quantile       IQR       q50  bim_thresh_down  \\\n",
       "tf                    6.920502  4.850112  3.321480         4.277351   \n",
       "gene1                 8.483865  5.299895  7.282884         4.384796   \n",
       "gene2                 7.994901  5.409391  6.129123         3.802762   \n",
       "gene3                 8.106769  5.570057  6.452676         3.963904   \n",
       "gene4                 9.439844  6.330361  7.471017         4.771986   \n",
       "gene5                 8.141719  5.661451  6.272476         4.156733   \n",
       "gene6                 8.581835  6.513123  6.533317         4.226884   \n",
       "gene7                 8.919568  4.811123  7.855844         4.817005   \n",
       "gene8                 8.763361  6.324968  5.836308         4.631891   \n",
       "gene9                 4.560455  1.876770  3.506317         4.993969   \n",
       "gene10                8.508720  5.044056  7.298677         4.705917   \n",
       "\n",
       "        bim_thresh_up Category  dor_threshold  \n",
       "tf           6.081385  Bimodal           0.95  \n",
       "gene1        5.990926  Bimodal           0.95  \n",
       "gene2        5.780924  Bimodal           0.95  \n",
       "gene3        5.713590  Bimodal           0.95  \n",
       "gene4        6.660780  Bimodal           0.95  \n",
       "gene5        5.870370  Bimodal           0.95  \n",
       "gene6        5.879037  Bimodal           0.95  \n",
       "gene7        6.135923  Bimodal           0.95  \n",
       "gene8        6.354470  Bimodal           0.95  \n",
       "gene9        6.611602  Bimodal           0.95  \n",
       "gene10       6.140867  Bimodal           0.95  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "random_criteria = criteria[\n",
    "    (criteria.Category == \"Bimodal\") &\n",
    "    (criteria.DropOutRate < 0.05)\n",
    "].sample(11, random_state=_rng_seed)\n",
    "random_criteria.set_index(random_walk_df.columns, inplace=True)\n",
    "random_criteria"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ac21fef",
   "metadata": {},
   "source": [
    "## Generate synthetic RNA-Seq data\n",
    "\n",
    "We instantiate scBoolSeq with the simulation criteria having the name matching with the column of the generated Boolean matrix."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "1239447f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "scBoolSeq(has_data=False, can_binarize=False, can_simulate=True)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scbool = scBoolSeq(simulation_criteria=random_criteria)\n",
    "scbool"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18e159e5",
   "metadata": {},
   "source": [
    "Then, we generate 300 samples per Boolean states using the `.simulate` method:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f627ea46",
   "metadata": {},
   "outputs": [],
   "source": [
    "n_samples = 300 # number of samples per row"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "163cdddb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tf</th>\n",
       "      <th>gene1</th>\n",
       "      <th>gene2</th>\n",
       "      <th>gene3</th>\n",
       "      <th>gene4</th>\n",
       "      <th>gene5</th>\n",
       "      <th>gene6</th>\n",
       "      <th>gene7</th>\n",
       "      <th>gene8</th>\n",
       "      <th>gene9</th>\n",
       "      <th>gene10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>7.343997</td>\n",
       "      <td>4.423748</td>\n",
       "      <td>1.455764</td>\n",
       "      <td>2.666266</td>\n",
       "      <td>3.622463</td>\n",
       "      <td>2.339183</td>\n",
       "      <td>3.145557</td>\n",
       "      <td>2.355520</td>\n",
       "      <td>4.226817</td>\n",
       "      <td>2.450700</td>\n",
       "      <td>4.954449</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6.503459</td>\n",
       "      <td>4.869516</td>\n",
       "      <td>0.997902</td>\n",
       "      <td>2.490826</td>\n",
       "      <td>3.831914</td>\n",
       "      <td>2.904771</td>\n",
       "      <td>1.355320</td>\n",
       "      <td>2.121728</td>\n",
       "      <td>9.014141</td>\n",
       "      <td>4.112613</td>\n",
       "      <td>2.857409</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7.194568</td>\n",
       "      <td>8.060726</td>\n",
       "      <td>1.803331</td>\n",
       "      <td>3.395802</td>\n",
       "      <td>2.964298</td>\n",
       "      <td>2.039039</td>\n",
       "      <td>2.856274</td>\n",
       "      <td>3.078402</td>\n",
       "      <td>10.793265</td>\n",
       "      <td>2.710345</td>\n",
       "      <td>1.965232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5.265355</td>\n",
       "      <td>7.646248</td>\n",
       "      <td>1.637069</td>\n",
       "      <td>6.712579</td>\n",
       "      <td>3.382125</td>\n",
       "      <td>2.334264</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.165221</td>\n",
       "      <td>9.365778</td>\n",
       "      <td>2.665426</td>\n",
       "      <td>6.474557</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7.519443</td>\n",
       "      <td>8.754864</td>\n",
       "      <td>2.471689</td>\n",
       "      <td>7.591701</td>\n",
       "      <td>3.109703</td>\n",
       "      <td>2.208982</td>\n",
       "      <td>7.996878</td>\n",
       "      <td>1.137108</td>\n",
       "      <td>10.456507</td>\n",
       "      <td>2.603011</td>\n",
       "      <td>4.516670</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         tf     gene1     gene2     gene3     gene4     gene5     gene6  \\\n",
       "0  7.343997  4.423748  1.455764  2.666266  3.622463  2.339183  3.145557   \n",
       "1  6.503459  4.869516  0.997902  2.490826  3.831914  2.904771  1.355320   \n",
       "2  7.194568  8.060726  1.803331  3.395802  2.964298  2.039039  2.856274   \n",
       "3  5.265355  7.646248  1.637069  6.712579  3.382125  2.334264  0.000000   \n",
       "4  7.519443  8.754864  2.471689  7.591701  3.109703  2.208982  7.996878   \n",
       "\n",
       "      gene7      gene8     gene9    gene10  \n",
       "0  2.355520   4.226817  2.450700  4.954449  \n",
       "1  2.121728   9.014141  4.112613  2.857409  \n",
       "2  3.078402  10.793265  2.710345  1.965232  \n",
       "3  1.165221   9.365778  2.665426  6.474557  \n",
       "4  1.137108  10.456507  2.603011  4.516670  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counts = scbool.simulate(random_walk_df, n_samples=n_samples)\n",
    "counts.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a95920eb",
   "metadata": {},
   "source": [
    "To ease post-analysis with STREAM, we generate unique identifiers for each simulated row (cell):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9ace2312",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tf</th>\n",
       "      <th>gene1</th>\n",
       "      <th>gene2</th>\n",
       "      <th>gene3</th>\n",
       "      <th>gene4</th>\n",
       "      <th>gene5</th>\n",
       "      <th>gene6</th>\n",
       "      <th>gene7</th>\n",
       "      <th>gene8</th>\n",
       "      <th>gene9</th>\n",
       "      <th>gene10</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cellID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>step0_0</th>\n",
       "      <td>7.343997</td>\n",
       "      <td>4.423748</td>\n",
       "      <td>1.455764</td>\n",
       "      <td>2.666266</td>\n",
       "      <td>3.622463</td>\n",
       "      <td>2.339183</td>\n",
       "      <td>3.145557</td>\n",
       "      <td>2.355520</td>\n",
       "      <td>4.226817</td>\n",
       "      <td>2.450700</td>\n",
       "      <td>4.954449</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step1_0</th>\n",
       "      <td>6.503459</td>\n",
       "      <td>4.869516</td>\n",
       "      <td>0.997902</td>\n",
       "      <td>2.490826</td>\n",
       "      <td>3.831914</td>\n",
       "      <td>2.904771</td>\n",
       "      <td>1.355320</td>\n",
       "      <td>2.121728</td>\n",
       "      <td>9.014141</td>\n",
       "      <td>4.112613</td>\n",
       "      <td>2.857409</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step2_0</th>\n",
       "      <td>7.194568</td>\n",
       "      <td>8.060726</td>\n",
       "      <td>1.803331</td>\n",
       "      <td>3.395802</td>\n",
       "      <td>2.964298</td>\n",
       "      <td>2.039039</td>\n",
       "      <td>2.856274</td>\n",
       "      <td>3.078402</td>\n",
       "      <td>10.793265</td>\n",
       "      <td>2.710345</td>\n",
       "      <td>1.965232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step3_0</th>\n",
       "      <td>5.265355</td>\n",
       "      <td>7.646248</td>\n",
       "      <td>1.637069</td>\n",
       "      <td>6.712579</td>\n",
       "      <td>3.382125</td>\n",
       "      <td>2.334264</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.165221</td>\n",
       "      <td>9.365778</td>\n",
       "      <td>2.665426</td>\n",
       "      <td>6.474557</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step4_0</th>\n",
       "      <td>7.519443</td>\n",
       "      <td>8.754864</td>\n",
       "      <td>2.471689</td>\n",
       "      <td>7.591701</td>\n",
       "      <td>3.109703</td>\n",
       "      <td>2.208982</td>\n",
       "      <td>7.996878</td>\n",
       "      <td>1.137108</td>\n",
       "      <td>10.456507</td>\n",
       "      <td>2.603011</td>\n",
       "      <td>4.516670</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step6_299</th>\n",
       "      <td>7.019498</td>\n",
       "      <td>11.833026</td>\n",
       "      <td>4.191677</td>\n",
       "      <td>7.636880</td>\n",
       "      <td>4.569541</td>\n",
       "      <td>2.178149</td>\n",
       "      <td>8.812244</td>\n",
       "      <td>2.681519</td>\n",
       "      <td>8.103503</td>\n",
       "      <td>8.350228</td>\n",
       "      <td>8.577708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step7_299</th>\n",
       "      <td>6.869362</td>\n",
       "      <td>8.494998</td>\n",
       "      <td>0.987528</td>\n",
       "      <td>6.191569</td>\n",
       "      <td>8.565168</td>\n",
       "      <td>1.742142</td>\n",
       "      <td>7.830821</td>\n",
       "      <td>0.221380</td>\n",
       "      <td>10.301637</td>\n",
       "      <td>4.230131</td>\n",
       "      <td>7.762939</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step8_299</th>\n",
       "      <td>9.318526</td>\n",
       "      <td>8.263307</td>\n",
       "      <td>5.660365</td>\n",
       "      <td>6.102051</td>\n",
       "      <td>8.717069</td>\n",
       "      <td>4.543780</td>\n",
       "      <td>6.495120</td>\n",
       "      <td>2.918222</td>\n",
       "      <td>8.388695</td>\n",
       "      <td>6.171941</td>\n",
       "      <td>4.913572</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step9_299</th>\n",
       "      <td>7.768477</td>\n",
       "      <td>8.415240</td>\n",
       "      <td>8.041082</td>\n",
       "      <td>5.957655</td>\n",
       "      <td>9.119277</td>\n",
       "      <td>2.507091</td>\n",
       "      <td>10.057915</td>\n",
       "      <td>8.073952</td>\n",
       "      <td>8.294528</td>\n",
       "      <td>5.635296</td>\n",
       "      <td>8.548555</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step10_299</th>\n",
       "      <td>9.363183</td>\n",
       "      <td>7.610790</td>\n",
       "      <td>6.505688</td>\n",
       "      <td>6.694060</td>\n",
       "      <td>6.872659</td>\n",
       "      <td>7.633121</td>\n",
       "      <td>7.101126</td>\n",
       "      <td>7.106677</td>\n",
       "      <td>8.166303</td>\n",
       "      <td>7.856577</td>\n",
       "      <td>8.372333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3300 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  tf      gene1     gene2     gene3     gene4     gene5  \\\n",
       "cellID                                                                    \n",
       "step0_0     7.343997   4.423748  1.455764  2.666266  3.622463  2.339183   \n",
       "step1_0     6.503459   4.869516  0.997902  2.490826  3.831914  2.904771   \n",
       "step2_0     7.194568   8.060726  1.803331  3.395802  2.964298  2.039039   \n",
       "step3_0     5.265355   7.646248  1.637069  6.712579  3.382125  2.334264   \n",
       "step4_0     7.519443   8.754864  2.471689  7.591701  3.109703  2.208982   \n",
       "...              ...        ...       ...       ...       ...       ...   \n",
       "step6_299   7.019498  11.833026  4.191677  7.636880  4.569541  2.178149   \n",
       "step7_299   6.869362   8.494998  0.987528  6.191569  8.565168  1.742142   \n",
       "step8_299   9.318526   8.263307  5.660365  6.102051  8.717069  4.543780   \n",
       "step9_299   7.768477   8.415240  8.041082  5.957655  9.119277  2.507091   \n",
       "step10_299  9.363183   7.610790  6.505688  6.694060  6.872659  7.633121   \n",
       "\n",
       "                gene6     gene7      gene8     gene9    gene10  \n",
       "cellID                                                          \n",
       "step0_0      3.145557  2.355520   4.226817  2.450700  4.954449  \n",
       "step1_0      1.355320  2.121728   9.014141  4.112613  2.857409  \n",
       "step2_0      2.856274  3.078402  10.793265  2.710345  1.965232  \n",
       "step3_0      0.000000  1.165221   9.365778  2.665426  6.474557  \n",
       "step4_0      7.996878  1.137108  10.456507  2.603011  4.516670  \n",
       "...               ...       ...        ...       ...       ...  \n",
       "step6_299    8.812244  2.681519   8.103503  8.350228  8.577708  \n",
       "step7_299    7.830821  0.221380  10.301637  4.230131  7.762939  \n",
       "step8_299    6.495120  2.918222   8.388695  6.171941  4.913572  \n",
       "step9_299   10.057915  8.073952   8.294528  5.635296  8.548555  \n",
       "step10_299   7.101126  7.106677   8.166303  7.856577  8.372333  \n",
       "\n",
       "[3300 rows x 11 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ids = [f\"step{x}_{y}\"  for y in range(n_samples) for x in random_walk_df.index]\n",
    "counts.index = ids\n",
    "counts.index.name = \"cellID\"\n",
    "counts"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "56f449c4",
   "metadata": {},
   "source": [
    "We write the result as a TSV file:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "edb10f46",
   "metadata": {},
   "outputs": [],
   "source": [
    "counts.T.to_csv(\"synthetic_data_star_counts.tsv\", sep=\"\\t\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "962ad85f",
   "metadata": {},
   "source": [
    "The, we generate metadata to validate the trajectory reconstruction with STREAM:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "5ec64c76",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: '#A2F37E',\n",
       " 1: '#36873F',\n",
       " 2: '#81D278',\n",
       " 3: '#8CA2D4',\n",
       " 4: '#D0327B',\n",
       " 5: '#CDEF47',\n",
       " 6: '#CB6896',\n",
       " 7: '#590605',\n",
       " 8: '#3C27AB',\n",
       " 9: '#4A7BBC',\n",
       " 10: '#F0F94B'}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nb_active_genes = [cfg.sum()-1 for _, cfg in random_walk_df.iterrows()]\n",
    "_RGB_values = list(\"0123456789ABCDEF\")\n",
    "color_map = {nb: \"#\"+''.join([_rng.choice(_RGB_values) for _ in range(6)]) for nb in set(nb_active_genes)}\n",
    "color_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "e5827936",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>label_color</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cellID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>step0_0</th>\n",
       "      <td>0</td>\n",
       "      <td>#A2F37E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step1_0</th>\n",
       "      <td>1</td>\n",
       "      <td>#36873F</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step2_0</th>\n",
       "      <td>2</td>\n",
       "      <td>#81D278</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step3_0</th>\n",
       "      <td>3</td>\n",
       "      <td>#8CA2D4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step4_0</th>\n",
       "      <td>4</td>\n",
       "      <td>#D0327B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step6_299</th>\n",
       "      <td>6</td>\n",
       "      <td>#CB6896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step7_299</th>\n",
       "      <td>7</td>\n",
       "      <td>#590605</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step8_299</th>\n",
       "      <td>8</td>\n",
       "      <td>#3C27AB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step9_299</th>\n",
       "      <td>9</td>\n",
       "      <td>#4A7BBC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>step10_299</th>\n",
       "      <td>10</td>\n",
       "      <td>#F0F94B</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3300 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            label label_color\n",
       "cellID                       \n",
       "step0_0         0     #A2F37E\n",
       "step1_0         1     #36873F\n",
       "step2_0         2     #81D278\n",
       "step3_0         3     #8CA2D4\n",
       "step4_0         4     #D0327B\n",
       "...           ...         ...\n",
       "step6_299       6     #CB6896\n",
       "step7_299       7     #590605\n",
       "step8_299       8     #3C27AB\n",
       "step9_299       9     #4A7BBC\n",
       "step10_299     10     #F0F94B\n",
       "\n",
       "[3300 rows x 2 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metadata = [[nb, color_map[nb]] for nb in nb_active_genes]*n_samples\n",
    "metadata = pd.DataFrame(metadata, columns=[\"label\", \"label_color\"])\n",
    "metadata.index = counts.index\n",
    "metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "8887d91d",
   "metadata": {},
   "outputs": [],
   "source": [
    "metadata.to_csv(\"synthetic_data_star_metadata.tsv\", sep=\"\\t\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "76d0f0c6",
   "metadata": {},
   "source": [
    "STREAM analysis is performed in a separate notebook: [3.1 - STREAM - Trajectory reconstruction for star network synthetic scRNA data](3.1%20-%20STREAM%20-%20Trajectory%20reconstruction%20for%20star%20network%20synthetic%20scRNA%20data.ipynb). Note that its execution should be performed in the adequate software environment (e.g., STREAM Docker image)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f81a6be2",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}