{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<!--NOTEBOOK_HEADER-->\n",
    "*This notebook contains material from [PyRosetta](https://RosettaCommons.github.io/PyRosetta.notebooks);\n",
    "content is available [on Github](https://github.com/RosettaCommons/PyRosetta.notebooks.git).*"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<!--NAVIGATION-->\n",
    "< [Examples Using the `dask` Module](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.04-dask.delayed-Via-Slurm.ipynb) | [Contents](toc.ipynb) | [Index](index.ipynb) | [PyRosettaCluster Tutorial 1A. Simple protocol](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.06-PyRosettaCluster-Simple-protocol.ipynb) ><p><a href=\"https://colab.research.google.com/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.05-Ligand-Docking-dask.ipynb\"><img align=\"left\" src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open in Colab\" title=\"Open in Google Colaboratory\"></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part I: Parallelized Global Ligand Docking with `pyrosetta.distributed`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "*Warning*: This notebook uses `pyrosetta.distributed.viewer` code, which runs in `jupyter notebook` and might not run if you're using `jupyterlab`."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "*Note:* This Jupyter notebook uses parallelization and is **not** meant to be executed within a Google Colab environment."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "*Note:* This Jupyter notebook requires the PyRosetta distributed layer which is obtained by building PyRosetta with the `--serialization` flag or installing PyRosetta from the RosettaCommons conda channel\n",
    "\n",
    "**Please see the setup instructions in Chapter 16.00**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "logging.basicConfig(level=logging.INFO)\n",
    "import json\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import numpy as np\n",
    "import os\n",
    "import pandas as pd\n",
    "import pyrosetta\n",
    "import pyrosetta.distributed.dask\n",
    "import pyrosetta.distributed.io as io\n",
    "import pyrosetta.distributed.packed_pose as packed_pose\n",
    "import pyrosetta.distributed.tasks.rosetta_scripts as rosetta_scripts\n",
    "import pyrosetta.distributed.tasks.score as score\n",
    "import pyrosetta.distributed.viewer as viewer\n",
    "import seaborn\n",
    "seaborn.set()\n",
    "import sys\n",
    "\n",
    "from dask_jobqueue import SLURMCluster\n",
    "from dask.distributed import Client, progress, as_completed\n",
    "from IPython import display\n",
    "\n",
    "if 'google.colab' in sys.modules:\n",
    "    print(\"This Jupyter notebook uses parallelization and is therefore not set up for the Google Colab environment.\")\n",
    "    sys.exit(0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Setup PyRosetta command line flags:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ligand_params = \"inputs/TPA.am1-bcc.fa.params\"\n",
    "flags = f\"\"\"\n",
    "-extra_res_fa {ligand_params}\n",
    "-ignore_unrecognized_res 1\n",
    "-out:level 200\n",
    "\"\"\"\n",
    "pyrosetta.distributed.init(flags)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Setup `dask` workers to run ligand docking simulations:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.getenv(\"DEBUG\"):\n",
    "    scratch_dir = os.path.join(\"/net/scratch\", os.environ[\"USER\"]) # Change to your scratch directory\n",
    "    cluster = SLURMCluster(cores=1,\n",
    "                           processes=1,\n",
    "                           job_cpu=1,\n",
    "                           memory=\"3GB\",\n",
    "                           queue=\"short\",\n",
    "                           walltime=\"02:59:00\",\n",
    "                           local_directory=scratch_dir,\n",
    "                           job_extra=[\"-o {}\".format(os.path.join(scratch_dir, \"slurm-%j.out\"))],\n",
    "                           extra=pyrosetta.distributed.dask.worker_extra(init_flags=flags))\n",
    "    n_workers = 20\n",
    "    cluster.scale(n_workers)\n",
    "    client = Client(cluster)\n",
    "else:\n",
    "    cluster, client = None, None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Setup global ligand docking RosettaScripts protocol within `pyrosetta.distributed`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "xml = \"\"\"\n",
    "<ROSETTASCRIPTS>\n",
    "  <SCOREFXNS>\n",
    "    <ScoreFunction name=\"fa_standard\" weights=\"ref2015.wts\"/>\n",
    "  </SCOREFXNS>\n",
    "  <RESIDUE_SELECTORS>\n",
    "    <Chain name=\"chX\" chains=\"X\"/>\n",
    "  </RESIDUE_SELECTORS>\n",
    "  <SIMPLE_METRICS>\n",
    "    <RMSDMetric name=\"rmsd_chX\" residue_selector=\"chX\" reference_name=\"store_native\" residue_selector_ref=\"chX\" robust=\"true\" rmsd_type=\"rmsd_all\" />\n",
    "  </SIMPLE_METRICS>\n",
    "  <SCORINGGRIDS ligand_chain=\"X\" width=\"25\">\n",
    "    <ClassicGrid grid_name=\"vdw\" weight=\"1.0\"/>\n",
    "  </SCORINGGRIDS>\n",
    "  <LIGAND_AREAS>\n",
    "    <LigandArea name=\"docking_sidechain_X\" chain=\"X\" cutoff=\"6.0\" add_nbr_radius=\"true\" all_atom_mode=\"true\" minimize_ligand=\"10\"/>\n",
    "    <LigandArea name=\"final_sidechain_X\" chain=\"X\" cutoff=\"6.0\" add_nbr_radius=\"true\" all_atom_mode=\"true\"/>\n",
    "    <LigandArea name=\"final_backbone_X\" chain=\"X\" cutoff=\"7.0\" add_nbr_radius=\"false\" all_atom_mode=\"true\" Calpha_restraints=\"0.3\"/>\n",
    "  </LIGAND_AREAS>\n",
    "  <INTERFACE_BUILDERS>\n",
    "    <InterfaceBuilder name=\"side_chain_for_docking\" ligand_areas=\"docking_sidechain_X\"/>\n",
    "    <InterfaceBuilder name=\"side_chain_for_final\" ligand_areas=\"final_sidechain_X\"/>\n",
    "    <InterfaceBuilder name=\"backbone\" ligand_areas=\"final_backbone_X\" extension_window=\"3\"/>\n",
    "  </INTERFACE_BUILDERS>\n",
    "  <MOVEMAP_BUILDERS>\n",
    "    <MoveMapBuilder name=\"docking\" sc_interface=\"side_chain_for_docking\" minimize_water=\"true\"/>\n",
    "    <MoveMapBuilder name=\"final\" sc_interface=\"side_chain_for_final\" bb_interface=\"backbone\" minimize_water=\"true\"/>\n",
    "  </MOVEMAP_BUILDERS>\n",
    "  <MOVERS>\n",
    "    <SavePoseMover name=\"spm\" restore_pose=\"0\" reference_name=\"store_native\"/>\n",
    "    <Transform name=\"transform\" chain=\"X\" box_size=\"20.0\" move_distance=\"10\" angle=\"360\" initial_perturb=\"2\" cycles=\"500\" repeats=\"5\" temperature=\"1000\"/>\n",
    "    <HighResDocker name=\"high_res_docker\" cycles=\"9\" repack_every_Nth=\"3\" scorefxn=\"fa_standard\" movemap_builder=\"docking\"/>\n",
    "    <FinalMinimizer name=\"final\" scorefxn=\"fa_standard\" movemap_builder=\"final\"/>\n",
    "  </MOVERS>\n",
    "  <FILTERS>\n",
    "      <LigInterfaceEnergy name=\"interfE\" scorefxn=\"fa_standard\" energy_cutoff=\"0.0\" confidence=\"0\"/>\n",
    "      <SimpleMetricFilter name=\"rmsd_chX\" metric=\"rmsd_chX\" cutoff=\"999999.\" comparison_type=\"lt\" confidence=\"0\"/>\n",
    "  </FILTERS>\n",
    "  <PROTOCOLS>\n",
    "    <Add mover=\"spm\"/>\n",
    "    <Add mover=\"transform\"/>\n",
    "    <Add mover=\"high_res_docker\"/>\n",
    "    <Add mover=\"final\"/>\n",
    "    <Add filter=\"interfE\"/>\n",
    "    <Add filter=\"rmsd_chX\"/>\n",
    "  </PROTOCOLS>\n",
    "</ROSETTASCRIPTS>\n",
    "\"\"\"\n",
    "xml_obj = rosetta_scripts.SingleoutputRosettaScriptsTask(xml)\n",
    "xml_obj.setup()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Setup input pose as `PackedPose` object:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pose_obj = io.pose_from_file(filename=\"inputs/test_lig.pdb\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Submit 100 global ligand docking trajectories, very similar to using command line `-nstruct` flag:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.getenv(\"DEBUG\"):\n",
    "    futures = [client.submit(xml_obj, pose_obj) for i in range(100)]\n",
    "    results = [future.result() for future in futures]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "As results accumulate, you may wish to keep an eye on the progress bar in the `dask` dashboard.\n",
    "\n",
    "The called `future.result()` transfers the `PackedPose` objects back to this Jupyter session, so we can inspect the scores in memory!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.getenv(\"DEBUG\"):\n",
    "    df = pd.DataFrame.from_records(packed_pose.to_dict(results))\n",
    "else:\n",
    "    df = pd.DataFrame()\n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now plot the ligand binding energy landscape:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.getenv(\"DEBUG\"):\n",
    "    matplotlib.rcParams['figure.figsize'] = [12.0, 8.0]\n",
    "    seaborn.scatterplot(x=\"rmsd_chX\", y=\"interfE\", data=df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's look at the lowest energy model according to `interfE`!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.getenv(\"DEBUG\"):\n",
    "    lowest_energy_df = df[\"interfE\"].sort_values()\n",
    "    lowest_energy_index = lowest_energy_df.index[-1]\n",
    "    lowest_energy_pose = results[lowest_energy_index]\n",
    "\n",
    "    view = viewer.init(lowest_energy_pose)\n",
    "    view.add(viewer.setStyle())\n",
    "    view.add(viewer.setStyle(command=({\"hetflag\": True}, {\"stick\": {\"colorscheme\": \"brownCarbon\", \"radius\": 0.2}})))\n",
    "    view.add(viewer.setHydrogenBonds())\n",
    "    view.add(viewer.setZoomTo(residue_selector=pyrosetta.rosetta.core.select.residue_selector.ChainSelector(\"X\")))\n",
    "    view()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "View the five lowest energy poses according to `interfE`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.getenv(\"DEBUG\"):\n",
    "    lowest_energy_poses = list(packed_pose.dict_to_pose(df.sort_values(by=\"interfE\").head(5).to_dict()).values())\n",
    "    view = viewer.init(lowest_energy_poses)\n",
    "    view.add(viewer.setStyle())\n",
    "    view.add(viewer.setStyle(command=({\"hetflag\": True}, {\"stick\": {\"colorscheme\": \"brownCarbon\", \"radius\": 0.2}})))\n",
    "    view.add(viewer.setHydrogenBonds())\n",
    "    view.add(viewer.setZoomTo(residue_selector=pyrosetta.rosetta.core.select.residue_selector.ChainSelector(\"X\")))\n",
    "    view()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you wish to save any `PackedPose` objects as `.pdb` files:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for i, p in enumerate(results):\n",
    "#     with open(\"outputs/RESULT_%i.pdb\" % i, \"w\") as f:\n",
    "#         f.write(io.to_pdbstring(p))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you wish to save a scorefile:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# with open(os.path.join(\"outputs\", \"ligand_docking_scores.fasc\"), \"w\") as f:\n",
    "#     for result in results:\n",
    "#         json.dump(result.scores, f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part II: Parallelized Global Ligand Docking with `dask.distributed.as_completed` and `pyrosetta.distributed`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Example using `dask.distributed.as_completed()` function:\n",
    "\n",
    "\"Give me at least 5 global ligand docks where the ligand RMSD is at least 0.4 Angstroms from the input ligand coordinates.\":"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython import display\n",
    "import matplotlib.pyplot as plt\n",
    "if not os.getenv(\"DEBUG\"):\n",
    "    with seaborn.color_palette(\"Blues_d\", n_colors=1):\n",
    "        nstruct = n_workers\n",
    "        futures = [client.submit(xml_obj, pose_obj) for j in range(nstruct)]\n",
    "        seq = as_completed(futures, with_results=True)\n",
    "\n",
    "        results = []\n",
    "        for i, (future, result) in enumerate(seq, start=1):\n",
    "\n",
    "            # Update dataset\n",
    "            results.append(result)\n",
    "            df = pd.DataFrame.from_records(packed_pose.to_dict(results))\n",
    "            lowest_rmsd_chX = df[\"rmsd_chX\"].sort_values().values[0]\n",
    "\n",
    "            # Update display\n",
    "            display.clear_output(wait=True)\n",
    "            print(f\"After {i} dock(s), the lowest rmsd_chX is {lowest_rmsd_chX}\")\n",
    "            seaborn.scatterplot(x=\"rmsd_chX\", y=\"interfE\", data=df)\n",
    "            display.display(plt.gcf())\n",
    "\n",
    "            # Submit more futures if condition is not met\n",
    "            if (i >= nstruct) and (not lowest_rmsd_chX <= 0.4):\n",
    "                nstruct += n_workers\n",
    "                for j in range(n_workers):\n",
    "                    seq.add(client.submit(xml_obj, pose_obj))\n",
    "else:\n",
    "    df = pd.DataFrame()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "View resulting scores in the order they completed:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<!--NAVIGATION-->\n",
    "< [Examples Using the `dask` Module](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.04-dask.delayed-Via-Slurm.ipynb) | [Contents](toc.ipynb) | [Index](index.ipynb) | [PyRosettaCluster Tutorial 1A. Simple protocol](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.06-PyRosettaCluster-Simple-protocol.ipynb) ><p><a href=\"https://colab.research.google.com/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.05-Ligand-Docking-dask.ipynb\"><img align=\"left\" src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open in Colab\" title=\"Open in Google Colaboratory\"></a>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:PyRosetta.notebooks]",
   "language": "python",
   "name": "conda-env-PyRosetta.notebooks-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}