#!/usr/bin/env sos-runner #fileformat=SOS1.0 [global] # List of tutorial to launch # see https://hub.docker.com/u/statisticalgenetics/ for a list of options parameter: tutorial = [] import getpass parameter: my_name = getpass.getuser() parameter: mac_chip = False my_name = my_name.lower() # base notebook version parameter: version = '1.0.0' # tutorial tag parameter: tag = 'latest' import socket, random import os if os.uname().machine == 'arm64' and not mac_chip: print("Mac CPU chip detected.") mac_chip = True def is_port_in_use(port): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: return s.connect_ex(('localhost', port)) == 0 def get_ports(num): all_ports = list(range(1001, 8888)) random.shuffle(all_ports) ports = [] for idx, item in enumerate(all_ports): if not is_port_in_use(item): ports.append(item) if len(ports) >= num: break return ports [build] parameter: root_dir = path('./') input: None, for_each = 'tutorial', concurrent = False bash: expand = True, workdir = root_dir git pull docker build {"--platform linux/amd64" if mac_chip else ""} --build-arg DUMMY=`date +%s` -t statisticalgenetics/{_tutorial}:{tag} -f docker/{_tutorial}.dockerfile docker && \ (docker push statisticalgenetics/{_tutorial}:{tag} || true) [launch_1] output: "~/.cache/cull_idle_servers.py", "~/.cache/jupyterhub_config.py" bash: expand = True docker pull gaow/base-notebook:{version} mkdir -p $HOME/.cache curl -fsSL https://raw.githubusercontent.com/statgenetics/statgen-courses/master/src/cull_idle_servers.py -o $HOME/.cache/cull_idle_servers.py curl -fsSL https://raw.githubusercontent.com/statgenetics/statgen-courses/master/src/jupyterhub_config.py -o $HOME/.cache/jupyterhub_config.py [launch_2] fail_if(len(tutorial) == 0, msg = 'Please specify a list of tutorial to launch, via ``--tutorial`` option.') fail_if(len(my_name) == 0, msg = 'Please assign yourself a username, via ``--my-name`` option.') fail_if(not all((a.isalnum() or a == '_') and not a.isspace() for a in my_name), msg = 'Please use only alphanumeric letters and no space for ``--my-name`` input.') import os home = os.path.expanduser("~").replace('/', '\\/') input: None, for_each = 'tutorial', concurrent = False script: interpreter=f'docker build . --tag {_tutorial}_{my_name} {"--platform linux/amd64" if mac_chip else ""} -f', expand = '$[ ]', workdir = '~/.cache' FROM gaow/base-notebook:$[version] USER root RUN mkdir -p /srv/jupyterhub/ COPY jupyterhub_config.py /srv/jupyterhub/jupyterhub_config.py COPY cull_idle_servers.py /srv/jupyterhub/cull_idle_servers.py # modify the script to create a unique instance RUN sed -i "s/CONTAINER_NAME/$[_tutorial]_$[my_name]/g; s/NETWORK_NAME/$[my_name]/g; s/IMAGE_NAME/statisticalgenetics\/$[_tutorial]:$[tag]/g; s/HOST_DIR/$[home]\/$[_tutorial]_$[my_name]/g" /srv/jupyterhub/jupyterhub_config.py EXPOSE 8000 WORKDIR /srv/jupyterhub/ LABEL org.jupyter.service="jupyterhub" CMD ["jupyterhub"] [launch_3] parameter: unify_link = False ports = get_ports(len(tutorial)) #ip = get_output("hostname -I | awk '{print $1}'").strip() ip = get_output("curl -so - http://checkip.amazonaws.com/").strip() # input: None, for_each = 'tutorial', concurrent = False bash: expand = True, workdir = '~/' set -e # Get the relevant image docker pull statisticalgenetics/{_tutorial}:{tag} # Stop currently running servers to start from scratch if they are the same name; but don't stop any running instances (line below commented out) # docker container stop $(docker container ls -q --filter ancestor=statisticalgenetics/{_tutorial}) &> /dev/null || true docker container stop $(docker container ls -q --filter name={_tutorial}_{my_name}) &> /dev/null || true # Create a user folder for notebook instance and set the permission mkdir -p $HOME/{_tutorial}_{my_name} && (chmod g=u $HOME/{_tutorial}_{my_name} || true) # We create a network for per student user, and add different tutorials to it. docker network inspect {my_name} &>/dev/null || docker network create {my_name} echo -e "To track the server status, please open a terminal on your computer outside of the Jupyter Lab sever, and run command:\n\ndocker logs -f \\" > $HOME/{_tutorial}_{my_name}/{_tutorial}_{my_name}.server_status.txt # user ID is hard-coded in https://github.com/jupyter/docker-stacks/blob/f3079808ca8cf7a0520a0845fb954e1866913942/base-notebook/Dockerfile docker run --rm -it -d {"--platform linux/amd64" if mac_chip else ""} -v /tmp:/tmp -v /var/run/docker.sock:/var/run/docker.sock --net {my_name} --name {_tutorial}_{my_name} -p {ports[_index]}:8000 -e NB_UID=$UID --user root {_tutorial}_{my_name} >> $HOME/{_tutorial}_{my_name}/{_tutorial}_{my_name}.server_status.txt # Create a shortcut to access from browser ({"true" if unify_link else "false"} && mkdir -p /var/www/html/{my_name}/{_tutorial} && echo '' 2> /dev/null 1> /var/www/html/{my_name}/{_tutorial}/index.html && printf '\nDone! Users can now access {_tutorial} tutorial at http://{ip}/{my_name}/{_tutorial}\n\n') || printf '\nDone! Users with proper web service set up can now access {_tutorial} tutorial at http://{ip}:{ports[_index]} (remote) or http://localhost:{ports[_index]} (local). Alternatively you can find the instance in your Docker Desktop GUI and run it from there.\n\n' bash: expand = True, workdir = '~/' if [ $(command -v wslview) ] ; then wslview http://{ip}:{ports[_index]} ; fi [serve] parameter: docker = True if docker: sos_run("serve_with_docker") else: sos_run("serve_without_docker") [serve_with_docker] port = 8888 bash: expand = True, workdir = '~/' set -e # Get the relevant image docker pull yiweizh/rockefeller-jupyter:latest # Stop currently running servers to start from scratch if they are the same name; but don't stop any running instances (line below commented out) docker container stop $(docker container ls -q --filter name=statgen_course_{my_name}) &> /dev/null || true # Create a user folder for notebook instance and set the permission mkdir -p $HOME/statgen_course_{my_name} && (chmod g=u $HOME/statgen_course_{my_name} || true) docker run --rm -it {"--platform linux/amd64" if mac_chip else ""} -p 8888:8888 -v /tmp:/tmp -v $HOME/statgen_course_{my_name}:/home/jovyan/handson-tutorial --name statgen_course_{my_name} -e NB_UID=1000 yiweizh/rockefeller-jupyter [serve_without_docker] parameter: repo_url = "https://github.com/cumc/handson-tutorials.git" depends: executable("synapse"), executable("parallel") bash: expand = "$[ ]" repo_dir="handson-tutorials" git clone $[repo_url] # Download external resources mkdir -p $repo_dir/contents/ldpred2 curl -o "$repo_dir/contents/ldpred2/ldpred.ipynb" https://raw.githubusercontent.com/cumc/bioworkflows/master/ldpred/ldpred.ipynb curl -o "$repo_dir/contents/ldpred2/ldpred2_example.ipynb" https://raw.githubusercontent.com/cumc/bioworkflows/master/ldpred/ldpred2_example.ipynb mkdir -p $repo_dir/contents/archive/clumping curl -o "$repo_dir/contents/archive/clumping/clumping.ipynb" https://raw.githubusercontent.com/cumc/bioworkflows/master/GWAS/LD_Clumping.ipynb mkdir -p $repo_dir/contents/ngs_qc_annotation/ mkdir -p $repo_dir/contents/ngs_qc_annotation/pipelines curl -o "$repo_dir/contents/ngs_qc_annotation/pipelines/VCF_QC.ipynb" https://raw.githubusercontent.com/cumc/xqtl-protocol/main/code/data_preprocessing/genotype/VCF_QC.ipynb curl -o "$repo_dir/contents/ngs_qc_annotation/pipelines/genotype_formatting.ipynb" https://raw.githubusercontent.com/cumc/xqtl-protocol/main/code/data_preprocessing/genotype/genotype_formatting.ipynb curl -o "$repo_dir/contents/ngs_qc_annotation/pipelines/GWAS_QC.ipynb" https://raw.githubusercontent.com/cumc/xqtl-protocol/main/code/data_preprocessing/genotype/GWAS_QC.ipynb curl -o "$repo_dir/contents/ngs_qc_annotation/pipelines/PCA.ipynb" https://raw.githubusercontent.com/cumc/xqtl-protocol/main/code/data_preprocessing/genotype/PCA.ipynb curl -o "$repo_dir/contents/ngs_qc_annotation/pipelines/annovar.ipynb" https://raw.githubusercontent.com/cumc/bioworkflows/master/variant-annotation/annovar.ipynb # Remove outputs from notebooks mkdir -p ~/.parallel && touch ~/.parallel/will-cite find "$repo_dir/contents" -type f -name "*.ipynb" | parallel -j $(nproc) "jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace {}" # Download data command curl https://raw.githubusercontent.com/cumc/handson-tutorials/main/setup/.synapseConfig > ~/.synapseConfig synapse get -r syn18700992 --downloadLocation $repo_dir/contents # start server cd $repo_dir/contents && jupyter-lab [login] # use --restart to false restart the container parameter: restart = False parameter: gemini_data_dir = path('/opt/annotation_db') fail_if(len(tutorial) != 1, msg = 'Please specify a tutorial to launch, via ``--tutorial`` option.') fail_if(len(my_name) == 0, msg = 'Please assign yourself a username, via ``--my-name`` option.') fail_if(not all((a.isalnum() or a == '_') and not a.isspace() for a in my_name), msg = 'Please use only alphanumeric letters and no space for ``--my-name`` input.') tutorial = tutorial[0] port = ip = None if tutorial == 'igv': port = get_ports(1)[0] # ip = get_output("hostname -I | awk '{print $1}'").strip() ip = get_output("curl -so - http://checkip.amazonaws.com/").strip() extra_docker_args = '' if tutorial == 'gemini' and gemini_data_dir.is_dir(): extra_docker_args = f'-v {gemini_data_dir:a}:/home/jovyan/annotation_db' bash: expand = '$[ ]', workdir = '~/' if [ "$[tutorial]" == "igv" ] then # Stop currently running instances to start from scratch docker stop sandbox_$[tutorial]_$[my_name] &> /dev/null || true # Run it in the background, `-d` option, force it to run under amd64 docker run -d $["--platform linux/amd64" if mac_chip else ""] --rm --security-opt label:disable -t --name sandbox_$[tutorial]_$[my_name] -h $[tutorial] -p $[port]:8080 -e NB_UID=1000 --user root statisticalgenetics/$[tutorial]:$[tag] &> /dev/null printf '\nPlease access the IGV server at http://$[ip]:$[port]' printf '\nor, if you run this server locally (on your computer), at http://localhost:$[port]\n\n' else mkdir -p $HOME/$[tutorial]_$[my_name] && chmod g=u $HOME/$[tutorial]_$[my_name] cmd="docker exec --user jovyan -it sandbox_$[tutorial]_$[my_name] bash" echo "INFO: Logging in via '$cmd'" echo "INFO: '~/work' folder is mounted to '$HOME/$[tutorial]_$[my_name]' on your machine" # Stop currently running instances to start a new container if [ "$[restart]" == "True" ] then docker stop sandbox_$[tutorial]_$[my_name] &> /dev/null || true fi { $cmd 2> /dev/null } || { # Run it in the background, `-d` option, force it to run under amd64 docker run -d $["--platform linux/amd64" if mac_chip else ""] --rm --security-opt label:disable -v $HOME/$[tutorial]_$[my_name]:/home/jovyan/work -v /tmp:/tmp $[extra_docker_args] -t --name sandbox_$[tutorial]_$[my_name] -e NB_UID=1000 --user root -h $[tutorial] statisticalgenetics/$[tutorial]:$[tag] &> /dev/null # Then login echo "Started a new container at `date`" $cmd } fi [update] input: None, for_each = 'tutorial', concurrent = False bash: expand = True docker pull statisticalgenetics/{_tutorial}:{tag} [useradd] fail_if(len(my_name) == 0 or my_name == getpass.getuser(), msg = 'Please assign a username prefix, via ``--my-name`` option.') parameter: num_users = 1 passwd = [random.randint(10000, 99999) for i in range(num_users)] input: None, for_each = 'passwd', concurrent = False bash: expand = True set -e if [ `id -u {my_name}_{_index+1} 2>/dev/null || echo -1` -ge 0 ]; then userdel -r {my_name}_{_index+1} 2>/dev/null fi # Group `users` have group ID 100 which agrees with group ID in our docker images useradd -g users {my_name}_{_index+1} -d /home/{my_name}_{_index+1} -m -s /bin/bash # Add group `docker` to the user usermod -aG docker {my_name}_{_index+1} # Use `-m` for chpasswd option to avoid PAM's check for simple passwords echo "{my_name}_{_index+1}:{my_name}_{_passwd}" | chpasswd -m printf "New user created: {my_name}_{_index+1}:{my_name}_{_passwd}\n" [clean] # to only prune unused resource (eg IP) and leave servers running parameter: prune_only = False import os fail_if(not os.geteuid() == 0, msg = 'This command is only meant for root user (eg, use ``sudo`` to run it)') bash: # claim unused resources docker system prune -f stop_if(prune_only) bash: docker stop $(docker ps -a -q) || true docker rm $(docker ps -a -q) || true docker rmi $(docker images -f "dangling=true" -q) || true # install gemini database to server [gemini-db] parameter: gemini_data_dir = path('/opt/annotation_db') bash: expand = True mkdir -p {gemini_data_dir:a} bash: container='statisticalgenetics/gemini', volumes = [f"{gemini_data_dir:a}:/home/jovyan/annotation_db"] source activate py2 \ && gemini update --dataonly --extra cadd_score \ && gemini update --dataonly --extra gerp_bp # gemini update command is flawed, without running the line below the downloaded data will not be usable. mv annotation_db/gemini/data/* annotation_db/ && rm -rf annotation_db/gemini