{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Please see https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Basics.html for introduction\n", "import ipywidgets as widgets\n", "from IPython.display import display, SVG\n", "from tempfile import TemporaryDirectory" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# so that we can live-edit Python modules\n", "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Choose the repository\n", "\n", "Some repositories to try:\n", "- https://github.com/krassowski/multi-omics-state-of-the-field\n", "- (please add more)\n", "\n", "3 repositories out of 10 most recent eLife papers:\n", "\n", "- https://github.com/amphilli/pleiotropy-dynamics\n", " - article: https://elifesciences.org/articles/70918\n", "- https://github.com/BiyuHeLab/eLife_Podvalny2021\n", " - article: https://elifesciences.org/articles/68265\n", "- https://github.com/vsbuffalo/paradox_variation/\n", " - article: https://elifesciences.org/articles/67509" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from retrieval import find_repository_urls, fetch_repository" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "from nbpipeline.visualization.static_graph import static_graph\n", "from nbpipeline.graph import RulesGraph\n", "from nbpipeline.rules import Rule, Group, NotebookRule, is_tracked_in_version_control\n", "# from nbpipeline.rules import discover_notebooks\n", "from os import system, walk, sep" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "def discover_notebooks(\n", " root_path=Path('.'), ignore=None, ignored_dirs=None, only_tracked_in_git=False,\n", " ignore_prefixes=('__', '.')\n", "):\n", " \"\"\"Useful when working with input/output auto-detection\"\"\"\n", " ignored_dirs = ignored_dirs or set()\n", " ignore = ignore or set()\n", " names = {}\n", " rules = []\n", "\n", " groups: dict[str, Group] = {}\n", " root_path = root_path.absolute()\n", " print(root_path)\n", "\n", " for dirpath, _, files in walk(root_path):\n", "\n", " dirs = dirpath.split(sep)[1:]\n", " if any(dir.startswith('.') or dir in ignored_dirs for dir in dirs):\n", " continue\n", " for file in files:\n", " if any(file.startswith(prefix) for prefix in ignore_prefixes):\n", " continue\n", " if not file.endswith('.ipynb'):\n", " continue\n", " if only_tracked_in_git and not is_tracked_in_version_control(file):\n", " continue\n", " path = Path(dirpath) / file\n", "\n", " if str(path) in ignore:\n", " continue\n", " name = file[:-6]\n", " name = name[0] + name[1:].replace('_', ' ')\n", " if name in names:\n", " print(name, 'already registered', path, names[name])\n", " else:\n", " names[name] = path\n", " group_id = sep.join(dirs) if dirs else None\n", " try:\n", " rule = NotebookRule(name, notebook=path, group=group_id)\n", " except Exception as e:\n", " print(e)\n", " continue\n", " rules.append(rule)\n", " if group_id and group_id not in groups:\n", " groups[group_id] = Group(id=group_id, name=dirs[-1], parent=sep.join(dirs[:-1]))\n", " return {\n", " 'rules': rules,\n", " 'groups': groups\n", " }" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting data_vault\n", " Downloading data_vault-0.4.4-py3-none-any.whl (17 kB)\n", "Requirement already satisfied: IPython in /srv/conda/envs/notebook/lib/python3.9/site-packages (from data_vault) (7.27.0)\n", "Requirement already satisfied: pandas in /srv/conda/envs/notebook/lib/python3.9/site-packages (from data_vault) (1.3.3)\n", "Requirement already satisfied: matplotlib-inline in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.1.3)\n", "Requirement already satisfied: pickleshare in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.7.5)\n", "Requirement already satisfied: traitlets>=4.2 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (5.1.0)\n", "Requirement already satisfied: decorator in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (5.1.0)\n", "Requirement already satisfied: jedi>=0.16 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.18.0)\n", "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (3.0.20)\n", "Requirement already satisfied: backcall in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (0.2.0)\n", "Requirement already satisfied: setuptools>=18.5 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (58.0.4)\n", "Requirement already satisfied: pexpect>4.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (4.8.0)\n", "Requirement already satisfied: pygments in /srv/conda/envs/notebook/lib/python3.9/site-packages (from IPython->data_vault) (2.10.0)\n", "Requirement already satisfied: parso<0.9.0,>=0.8.0 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from jedi>=0.16->IPython->data_vault) (0.8.2)\n", "Requirement already satisfied: ptyprocess>=0.5 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pexpect>4.3->IPython->data_vault) (0.7.0)\n", "Requirement already satisfied: wcwidth in /srv/conda/envs/notebook/lib/python3.9/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->IPython->data_vault) (0.2.5)\n", "Requirement already satisfied: pytz>=2017.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pandas->data_vault) (2021.1)\n", "Requirement already satisfied: numpy>=1.17.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pandas->data_vault) (1.21.2)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from pandas->data_vault) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas->data_vault) (1.16.0)\n", "Installing collected packages: data-vault\n", "Successfully installed data-vault-0.4.4\n" ] } ], "source": [ "!pip install data_vault" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "def generate_dag(repository_url):\n", " tmp_dir = Path('/tmp/nbpipeline')\n", " cache_dir = Path('/tmp/nbpipeline/cache')\n", "\n", " Rule.setup(tmp_dir=tmp_dir, cache_dir=cache_dir)\n", "\n", " tmp_dir.mkdir(exist_ok=True, parents=True)\n", " cache_dir.mkdir(exist_ok=True, parents=True)\n", "\n", " rules = Rule.rules\n", " # Rule.pipeline_config = self\n", "\n", " for rule in rules.values():\n", " rule.repository_url = repository_url\n", "\n", " dag = RulesGraph(rules).graph\n", " graph_svg = static_graph(dag, options='{\"graph\": {\"rankdir\": \"LR\"}}')\n", " return graph_svg" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a1f18608a7674d95bd6e2122e42a0973", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Text(value='https://github.com/krassowski/multi-omics-state-of-the-field')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bac7d7e18d794108a52ad081c38199fc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Button(description='Analyze', style=ButtonStyle())" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0b1d240288c54c8ab946ea15b1e1e5bb", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output()" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "repository_field = widgets.Text(value='https://github.com/krassowski/multi-omics-state-of-the-field')\n", "analyze_button = widgets.Button(description=\"Analyze\")\n", "output = widgets.Output()\n", "\n", "display(\n", " repository_field, analyze_button, output\n", ")\n", "\n", "\n", "def on_download_button_clicked(b):\n", " with output:\n", " if not repository_field.value:\n", " print('Please provide a repository address')\n", " return\n", "\n", " candidates = find_repository_urls(repository_field.value)\n", " if len(candidates) > 2:\n", " # TODO: offer choice\n", " print('More than one candidate address found')\n", "\n", " # for now just take the first one\n", " address = next(iter(candidates))\n", "\n", " # with TemporaryDirectory() as temp_dir:\n", " temp_dir = 'tmp/test'\n", " print(f\"Downloading: {address}\")\n", " # fetch_repository(address=address, temp_dir=temp_dir)\n", " print('Downloaded')\n", " Rule.rules = {}\n", " discover_notebooks(\n", " root_path=Path(temp_dir),\n", " # ignored_dirs={'backlog', 'archive'},\n", " # ignore={'notebook_setup.ipynb'},\n", " # only_tracked_in_git=True\n", " )\n", " svg_graph = generate_dag(repository_url=address)\n", "\n", " display(SVG(data=svg_graph))\n", "\n", "\n", "analyze_button.on_click(on_download_button_clicked)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 4 }