{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "from htmd.ui import *\n", "config(viewer='ngl')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Adaptive sampling" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "-" } }, "source": [ "## Stefan Doerr\n", "Universitat Pompeu Fabra & Acellera" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "We demonstrate how to use the HTMD code to run adaptive sampling.\n", "\n", "You can download the generators from the following link:\n", "\n", "* [Generators](http://pub.htmd.org/tutorials/adaptive-sampling/generators.tar.gz).\n", "\n", "Alternatively, you can download the generators using `wget`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.system('wget -q http://pub.htmd.org/tutorials/adaptive-sampling/generators.tar.gz; tar -xf generators.tar.gz && rm generators.tar.gz')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Generators folder structure" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "generators\r\n", "|-- ntl9_1ns_0\r\n", "| |-- input\r\n", "| |-- input.coor\r\n", "| |-- input.xsc\r\n", "| |-- parameters\r\n", "| |-- run.sh\r\n", "| |-- structure.pdb\r\n", "| `-- structure.psf\r\n", "|-- ntl9_1ns_1\r\n", "| |-- input\r\n", "| |-- input.coor\r\n", "| |-- input.xsc\r\n", "| |-- parameters\r\n", "| |-- run.sh\r\n", "| |-- structure.pdb\r\n", "| `-- structure.psf\r\n", "`-- ntl9_1ns_2\r\n", " |-- input\r\n", " |-- input.coor\r\n" ] } ], "source": [ "!tree generators | head -20" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Adaptive classes" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "-" } }, "source": [ "* AdaptiveMD (free exploration)\n", "* AdaptiveGoal (exploration + exploitation)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a directory for each type of adaptive and copy the generators into them:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "os.makedirs('./adaptivemd', exist_ok=True)\n", "os.makedirs('./adaptivegoal', exist_ok=True)\n", "shutil.copytree('./generators', './adaptivemd/generators')\n", "shutil.copytree('./generators', './adaptivegoal/generators')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## AdaptiveMD" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "os.chdir('./adaptivemd')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Setup the queue that will be used for simulations. \n", "* Tell it to store completed trajectories in the data folder as this is where `AdaptiveMD` expects them to be by default" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "queue = LocalGPUQueue()\n", "queue.datadir = './data'" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ad = AdaptiveMD()\n", "ad.app = queue" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Set the `nmin`, `nmax` and `nepochs`" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ad.nmin = 1\n", "ad.nmax = 3\n", "ad.nepochs = 3" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "* Choose what projection to use for the construction of the Markov model" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "protsel = 'protein and name CA'\n", "ad.projection = MetricSelfDistance(protsel)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Set the `updateperiod` of the Adaptive to define how often it will poll for completed simulations and redo the analysis" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ad.updateperiod = 300 # execute every 5 minutes" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Launch the `AdaptiveMD` run:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ad.run()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "os.chdir('../adaptivegoal')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## AdaptiveGoal" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "os.chdir('../adaptivegoal')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "-" } }, "source": [ "* Most of the class arguments are identical to AdaptiveMD" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "adg = AdaptiveGoal()\n", "adg.app = queue\n", "adg.nmin = 1\n", "adg.nmax = 3\n", "adg.nepochs = 2\n", "adg.generatorspath = './generators'\n", "adg.projection = MetricSelfDistance('protein and name CA')\n", "adg.updateperiod = 300 # execute every 5 minutes\n", "adg.goalfunction = None # set to None just as an example" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* It requires the `goalfunction` argument which defines a goal\n", "* We can define a variety of different goal functions" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## The goal function" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The goal function will:\n", "* take as input a `Molecule` object of a simulation and \n", "* produce as output a score for each frame of that simulation. \n", "* The higher the score, the more desirable that simulation frame for being respawned." ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## RMSD goal function" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For this goal function, we will use a crystal structure of NTL9.\n", "\n", "You can download the structure from the following link and save it on the `adaptivegoal` directory:\n", "\n", "* [NTL9 crystal structure](http://pub.htmd.org/tutorials/adaptive-sampling/ntl9_crystal.pdb).\n", "\n", "Alternatively, you can download the structure using `wget`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "os.system('wget -q http://pub.htmd.org/tutorials/adaptive-sampling/ntl9_crystal.pdb')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "We can define a simple goal function that uses the RMSD between the conformation sampled and a reference (in this case, the crystal structure), and returns a score to be evaluated by the `AdaptiveGoal` algorithm:" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ref = Molecule('./ntl9_crystal.pdb')\n", "\n", "def mygoalfunction(mol):\n", " rmsd = MetricRmsd(ref, 'protein and name CA').project(mol)\n", " return -rmsd # or even 1/rmsd\n", "\n", "adg.goalfunction = mygoalfunction" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`AdaptiveGoal` ranks conformations from a high to low score. For the case of RMSD, since we want lower RMSD to give higher score, the symetric value is returned instead (the inverse would also work)." ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Launch the `AdaptiveGoal` run:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "adg.run()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Functions with multiple arguments" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The goal function can also take multiple arguments. This allows flexibility and on-the-fly comparisons to non-static conformations (i.e. compare with different references as the run progresses). Here, we redefine the previous goal function with multiple arguments:" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "def newgoalfunction(mol, crystal):\n", " rmsd = MetricRmsd(crystal, 'protein and name CA').project(mol)\n", " return -rmsd # or even 1/rmsd" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Now we clean the previous `AdaptiveGoal` run, and start a new one with the new goal function:" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "# clean previous run\n", "shutil.rmtree('./input')\n", "shutil.rmtree('./data')\n", "shutil.rmtree('./filtered')\n", "\n", "# run with new goal\n", "ref = Molecule('./ntl9_crystal.pdb')\n", "adg.goalfunction = (newgoalfunction, (ref,))\n", "adg.run()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Other goal function examples" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Secondary structure goal function" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ref = Molecule('./ntl9_crystal.pdb')\n", "\n", "def ssGoal(mol, crystal):\n", " crystalSS = MetricSecondaryStructure().project(crystal)[0]\n", " proj = MetricSecondaryStructure().project(mol)\n", " # How many crystal SS match with simulation SS\n", " ss_score = np.sum(proj == crystalSS, axis=1) / proj.shape[1] \n", " return ss_score\n", "\n", "adg.goalfunction = (ssGoal, (ref,))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Contacts goal function" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ref = Molecule('./ntl9_crystal.pdb')\n", "\n", "def contactGoal(mol, crystal):\n", " crystalCO = MetricSelfDistance('protein and name CA', pbc=False,\n", " metric='contacts', \n", " threshold=10).project(crystal)\n", " proj = MetricSelfDistance('protein and name CA', \n", " metric='contacts', \n", " threshold=10).project(mol)\n", " # How many crystal contacts are seen?\n", " co_score = np.sum(proj[:, crystalCO] == 1, axis=1)\n", " co_score /= np.sum(crystalCO)\n", " return ss_score\n", "\n", "adg.goalfunction = (contactGoal, (ref,))" ] } ], "metadata": { "anaconda-cloud": {}, "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 1 }