In [1]:
# Please see https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Basics.html for introduction
import ipywidgets as widgets
from IPython.display import display, SVG
from tempfile import TemporaryDirectory

In [2]:
# so that we can live-edit Python modules
%load_ext autoreload
%autoreload 2

## Choose the repository

Some repositories to try:
- https://github.com/krassowski/multi-omics-state-of-the-field
- (please add more)

3 repositories out of 10 most recent eLife papers:

- https://github.com/amphilli/pleiotropy-dynamics
 - article: https://elifesciences.org/articles/70918
- https://github.com/BiyuHeLab/eLife_Podvalny2021
 - article: https://elifesciences.org/articles/68265
- https://github.com/vsbuffalo/paradox_variation/
 - article: https://elifesciences.org/articles/67509

In [3]:
from retrieval import find_repository_urls, fetch_repository

In [4]:
from pathlib import Path

In [14]:
from nbpipeline.visualization.static_graph import static_graph
from nbpipeline.graph import RulesGraph
from nbpipeline.rules import Rule, Group, NotebookRule, is_tracked_in_version_control
# from nbpipeline.rules import discover_notebooks
from os import system, walk, sep

In [82]:
def discover_notebooks(
 root_path=Path('.'), ignore=None, ignored_dirs=None, only_tracked_in_git=False,
 ignore_prefixes=('__', '.')
):
 """Useful when working with input/output auto-detection"""
 ignored_dirs = ignored_dirs or set()
 ignore = ignore or set()
 names = {}
 rules = []

 groups: dict[str, Group] = {}
 root_path = root_path.absolute()
 print(root_path)

 for dirpath, _, files in walk(root_path):

 dirs = dirpath.split(sep)[1:]
 if any(dir.startswith('.') or dir in ignored_dirs for dir in dirs):
 continue
 for file in files:
 if any(file.startswith(prefix) for prefix in ignore_prefixes):
 continue
 if not file.endswith('.ipynb'):
 continue
 if only_tracked_in_git and not is_tracked_in_version_control(file):
 continue
 path = Path(dirpath) / file

 if str(path) in ignore:
 continue
 name = file[:-6]
 name = name[0] + name[1:].replace('_', ' ')
 if name in names:
 print(name, 'already registered', path, names[name])
 else:
 names[name] = path
 group_id = sep.join(dirs) if dirs else None
 try:
 rule = NotebookRule(name, notebook=path, group=group_id)
 except Exception as e:
 print(e)
 continue
 rules.append(rule)
 if group_id and group_id not in groups:
 groups[group_id] = Group(id=group_id, name=dirs[-1], parent=sep.join(dirs[:-1]))
 return {
 'rules': rules,
 'groups': groups
 }

In [85]:
!pip install data_vault

Collecting data_vault
 Downloading data_vault-0.4.4-py3-none-any.whl (17 kB)
Installing collected packages: data-vault
Successfully installed data-vault-0.4.4


In [86]:
def generate_dag(repository_url):
 tmp_dir = Path('/tmp/nbpipeline')
 cache_dir = Path('/tmp/nbpipeline/cache')

 Rule.setup(tmp_dir=tmp_dir, cache_dir=cache_dir)

 tmp_dir.mkdir(exist_ok=True, parents=True)
 cache_dir.mkdir(exist_ok=True, parents=True)

 rules = Rule.rules
 # Rule.pipeline_config = self

 for rule in rules.values():
 rule.repository_url = repository_url

 dag = RulesGraph(rules).graph
 graph_svg = static_graph(dag, options='{"graph": {"rankdir": "LR"}}')
 return graph_svg

In [87]:
repository_field = widgets.Text(value='https://github.com/krassowski/multi-omics-state-of-the-field')
analyze_button = widgets.Button(description="Analyze")
output = widgets.Output()

display(
 repository_field, analyze_button, output
)


def on_download_button_clicked(b):
 with output:
 if not repository_field.value:
 print('Please provide a repository address')
 return

 candidates = find_repository_urls(repository_field.value)
 if len(candidates) > 2:
 # TODO: offer choice
 print('More than one candidate address found')

 # for now just take the first one
 address = next(iter(candidates))

 # with TemporaryDirectory() as temp_dir:
 temp_dir = 'tmp/test'
 print(f"Downloading: {address}")
 # fetch_repository(address=address, temp_dir=temp_dir)
 print('Downloaded')
 Rule.rules = {}
 discover_notebooks(
 root_path=Path(temp_dir),
 # ignored_dirs={'backlog', 'archive'},
 # ignore={'notebook_setup.ipynb'},
 # only_tracked_in_git=True
 )
 svg_graph = generate_dag(repository_url=address)

 display(SVG(data=svg_graph))


analyze_button.on_click(on_download_button_clicked)

Text(value='https://github.com/krassowski/multi-omics-state-of-the-field')

Button(description='Analyze', style=ButtonStyle())

Output()