{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Please see https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Basics.html for introduction\n",
    "import ipywidgets as widgets\n",
    "from IPython.display import display, SVG\n",
    "from tempfile import TemporaryDirectory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# so that we can live-edit Python modules\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Choose the repository\n",
    "\n",
    "Some repositories to try:\n",
    "- https://github.com/krassowski/multi-omics-state-of-the-field\n",
    "- (please add more)\n",
    "\n",
    "3 repositories out of 10 most recent eLife papers:\n",
    "\n",
    "- https://github.com/amphilli/pleiotropy-dynamics\n",
    "  - article: https://elifesciences.org/articles/70918\n",
    "- https://github.com/BiyuHeLab/eLife_Podvalny2021\n",
    "  - article: https://elifesciences.org/articles/68265\n",
    "- https://github.com/vsbuffalo/paradox_variation/\n",
    "  - article: https://elifesciences.org/articles/67509"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from retrieval import find_repository_urls, fetch_repository"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nbpipeline.visualization.static_graph import static_graph\n",
    "from nbpipeline.graph import RulesGraph\n",
    "from nbpipeline.rules import Rule, Group, NotebookRule, is_tracked_in_version_control\n",
    "# from nbpipeline.rules import discover_notebooks\n",
    "from os import system, walk, sep"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "def discover_notebooks(\n",
    "    root_path=Path('.'), ignore=None, ignored_dirs=None, only_tracked_in_git=False,\n",
    "    ignore_prefixes=('__', '.')\n",
    "):\n",
    "    \"\"\"Useful when working with input/output auto-detection\"\"\"\n",
    "    ignored_dirs = ignored_dirs or set()\n",
    "    ignore = ignore or set()\n",
    "    names = {}\n",
    "    rules = []\n",
    "\n",
    "    groups: dict[str, Group] = {}\n",
    "    root_path = root_path.absolute()\n",
    "    print(root_path)\n",
    "\n",
    "    for dirpath, _, files in walk(root_path):\n",
    "\n",
    "        dirs = dirpath.split(sep)[1:]\n",
    "        if any(dir.startswith('.') or dir in ignored_dirs for dir in dirs):\n",
    "            continue\n",
    "        for file in files:\n",
    "            if any(file.startswith(prefix) for prefix in ignore_prefixes):\n",
    "                continue\n",
    "            if not file.endswith('.ipynb'):\n",
    "                continue\n",
    "            if only_tracked_in_git and not is_tracked_in_version_control(file):\n",
    "                continue\n",
    "            path = Path(dirpath) / file\n",
    "\n",
    "            if str(path) in ignore:\n",
    "                continue\n",
    "            name = file[:-6]\n",
    "            name = name[0] + name[1:].replace('_', ' ')\n",
    "            if name in names:\n",
    "                print(name, 'already registered', path, names[name])\n",
    "            else:\n",
    "                names[name] = path\n",
    "                group_id = sep.join(dirs) if dirs else None\n",
    "                try:\n",
    "                    rule = NotebookRule(name, notebook=path, group=group_id)\n",
    "                except Exception as e:\n",
    "                    print(e)\n",
    "                    continue\n",
    "                rules.append(rule)\n",
    "                if group_id and group_id not in groups:\n",
    "                    groups[group_id] = Group(id=group_id, name=dirs[-1], parent=sep.join(dirs[:-1]))\n",
    "    return {\n",
    "        'rules': rules,\n",
    "        'groups': groups\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting data_vault\n",
      "  Downloading data_vault-0.4.4-py3-none-any.whl (17 kB)\n",
      "Requirement already satisfied: IPython in /srv/conda/envs/notebook/lib/python3.9/site-packages (from data_vault) (7.27.0)\n",
      "Requirement already satisfied: pandas in /srv/conda/envs/notebook/lib/python3.9/site-packages (from data_vault) (1.3.3)\n",
      "Requirement already satisfied: matplotlib-inline in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.1.3)\n",
      "Requirement already satisfied: pickleshare in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.7.5)\n",
      "Requirement already satisfied: traitlets>=4.2 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (5.1.0)\n",
      "Requirement already satisfied: decorator in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (5.1.0)\n",
      "Requirement already satisfied: jedi>=0.16 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.18.0)\n",
      "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (3.0.20)\n",
      "Requirement already satisfied: backcall in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.2.0)\n",
      "Requirement already satisfied: setuptools>=18.5 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (58.0.4)\n",
      "Requirement already satisfied: pexpect>4.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (4.8.0)\n",
      "Requirement already satisfied: pygments in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (2.10.0)\n",
      "Requirement already satisfied: parso<0.9.0,>=0.8.0 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from jedi>=0.16->IPython->data_vault) (0.8.2)\n",
      "Requirement already satisfied: ptyprocess>=0.5 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pexpect>4.3->IPython->data_vault) (0.7.0)\n",
      "Requirement already satisfied: wcwidth in /srv/conda/envs/notebook/lib/python3.9/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->IPython->data_vault) (0.2.5)\n",
      "Requirement already satisfied: pytz>=2017.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pandas->data_vault) (2021.1)\n",
      "Requirement already satisfied: numpy>=1.17.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pandas->data_vault) (1.21.2)\n",
      "Requirement already satisfied: python-dateutil>=2.7.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pandas->data_vault) (2.8.2)\n",
      "Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas->data_vault) (1.16.0)\n",
      "Installing collected packages: data-vault\n",
      "Successfully installed data-vault-0.4.4\n"
     ]
    }
   ],
   "source": [
    "!pip install data_vault"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_dag(repository_url):\n",
    "    tmp_dir = Path('/tmp/nbpipeline')\n",
    "    cache_dir = Path('/tmp/nbpipeline/cache')\n",
    "\n",
    "    Rule.setup(tmp_dir=tmp_dir, cache_dir=cache_dir)\n",
    "\n",
    "    tmp_dir.mkdir(exist_ok=True, parents=True)\n",
    "    cache_dir.mkdir(exist_ok=True, parents=True)\n",
    "\n",
    "    rules = Rule.rules\n",
    "    # Rule.pipeline_config = self\n",
    "\n",
    "    for rule in rules.values():\n",
    "        rule.repository_url = repository_url\n",
    "\n",
    "    dag = RulesGraph(rules).graph\n",
    "    graph_svg = static_graph(dag, options='{\"graph\": {\"rankdir\": \"LR\"}}')\n",
    "    return graph_svg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a1f18608a7674d95bd6e2122e42a0973",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Text(value='https://github.com/krassowski/multi-omics-state-of-the-field')"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bac7d7e18d794108a52ad081c38199fc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Button(description='Analyze', style=ButtonStyle())"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0b1d240288c54c8ab946ea15b1e1e5bb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "repository_field = widgets.Text(value='https://github.com/krassowski/multi-omics-state-of-the-field')\n",
    "analyze_button = widgets.Button(description=\"Analyze\")\n",
    "output = widgets.Output()\n",
    "\n",
    "display(\n",
    "    repository_field, analyze_button, output\n",
    ")\n",
    "\n",
    "\n",
    "def on_download_button_clicked(b):\n",
    "    with output:\n",
    "        if not repository_field.value:\n",
    "            print('Please provide a repository address')\n",
    "            return\n",
    "\n",
    "        candidates = find_repository_urls(repository_field.value)\n",
    "        if len(candidates) > 2:\n",
    "            # TODO: offer choice\n",
    "            print('More than one candidate address found')\n",
    "\n",
    "        # for now just take the first one\n",
    "        address = next(iter(candidates))\n",
    "\n",
    "        # with TemporaryDirectory() as temp_dir:\n",
    "        temp_dir = 'tmp/test'\n",
    "        print(f\"Downloading: {address}\")\n",
    "        # fetch_repository(address=address, temp_dir=temp_dir)\n",
    "        print('Downloaded')\n",
    "        Rule.rules = {}\n",
    "        discover_notebooks(\n",
    "            root_path=Path(temp_dir),\n",
    "            # ignored_dirs={'backlog', 'archive'},\n",
    "            # ignore={'notebook_setup.ipynb'},\n",
    "            # only_tracked_in_git=True\n",
    "        )\n",
    "        svg_graph = generate_dag(repository_url=address)\n",
    "\n",
    "        display(SVG(data=svg_graph))\n",
    "\n",
    "\n",
    "analyze_button.on_click(on_download_button_clicked)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}