{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Introduction"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook demonstrates pymatgen's functionality in terms of creating and editing molecules, as well as its integration with OpenBabel. For the latter, please note that you will need to have openbabel with python bindings installed. Please refer to pymatgen's documentation for installation details."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Molecules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Molecule Summary (H4 C1)\n",
      "Reduced Formula: H4C\n",
      "Charge = 0, Spin Mult = 1\n",
      "Sites (5)\n",
      "1 C     0.000000     0.000000     0.000000\n",
      "2 H     0.000000     0.000000     1.089000\n",
      "3 H     1.026719     0.000000    -0.363000\n",
      "4 H    -0.513360    -0.889165    -0.363000\n",
      "5 H    -0.513360     0.889165    -0.363000\n"
     ]
    }
   ],
   "source": [
    "from pymatgen import Molecule\n",
    "# Create a methane molecule.\n",
    "coords = [[0.000000, 0.000000, 0.000000],\n",
    "          [0.000000, 0.000000, 1.089000],\n",
    "          [1.026719, 0.000000, -0.363000],\n",
    "          [-0.513360, -0.889165, -0.363000],\n",
    "          [-0.513360, 0.889165, -0.363000]]\n",
    "mol = Molecule([\"C\", \"H\", \"H\", \"H\", \"H\"], coords)\n",
    "print(mol)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.0, 0.0, 0.0] C\n",
      "[0.0, 0.0, 1.089] H\n"
     ]
    }
   ],
   "source": [
    "# A Molecule is simply a list of Sites.\n",
    "print(mol[0])\n",
    "print(mol[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Molecule Summary (H3 C1)\n",
      "Reduced Formula: H3C\n",
      "Charge = 0, Spin Mult = 2\n",
      "Sites (4)\n",
      "1 C     0.000000     0.000000     0.000000\n",
      "2 H     1.026719     0.000000    -0.363000\n",
      "3 H    -0.513360    -0.889165    -0.363000\n",
      "4 H    -0.513360     0.889165    -0.363000\n",
      "Molecule Summary (H1)\n",
      "Reduced Formula: H2\n",
      "Charge = 0, Spin Mult = 2\n",
      "Sites (1)\n",
      "1 H     0.000000     0.000000     1.089000\n"
     ]
    }
   ],
   "source": [
    "# Break a Molecule into two by breaking a bond.\n",
    "for frag in mol.break_bond(0, 1):\n",
    "    print(frag)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(Site: H (0.0000, 0.0000, 1.0890), 1.089), (Site: H (1.0267, 0.0000, -0.3630), 1.0889999563640946), (Site: H (-0.5134, -0.8892, -0.3630), 1.0890004071739368), (Site: H (-0.5134, 0.8892, -0.3630), 1.0890004071739368)]\n"
     ]
    }
   ],
   "source": [
    "# Getting neighbors that are within 3 angstroms from C atom.\n",
    "print(mol.get_neighbors(mol[0], 3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[Covalent bond between [0.0, 0.0, 0.0] C and [0.0, 0.0, 1.089] H, Covalent bond between [0.0, 0.0, 0.0] C and [1.026719, 0.0, -0.363] H, Covalent bond between [0.0, 0.0, 0.0] C and [-0.51336, -0.889165, -0.363] H, Covalent bond between [0.0, 0.0, 0.0] C and [-0.51336, 0.889165, -0.363] H]\n"
     ]
    }
   ],
   "source": [
    "#Detecting bonds\n",
    "print(mol.get_covalent_bonds())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Structure Summary (H4 C1)\n",
      "Reduced Formula: H4C\n",
      "abc   :  10.000000  10.000000  10.000000\n",
      "angles:  90.000000  90.000000  90.000000\n",
      "Sites (5)\n",
      "1 H     0.500000     0.500000     0.608900\n",
      "2 H     0.602672     0.500000     0.463700\n",
      "3 H     0.448664     0.411083     0.463700\n",
      "4 H     0.448664     0.588917     0.463700\n",
      "5 C     0.500000     0.500000     0.500000\n"
     ]
    }
   ],
   "source": [
    "# If you need to run the molecule in a box with a periodic boundary condition\n",
    "# code, you can generate the boxed structure as follows (in a 10Ax10Ax10A box)\n",
    "structure = mol.get_boxed_structure(10, 10, 10)\n",
    "print(structure)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Writing to XYZ files (easy to open with many molecule file viewers)\n",
    "from pymatgen.io.xyzio import XYZ\n",
    "xyz = XYZ(mol)\n",
    "xyz.write_file(\"methane.xyz\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Openbabel interface"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This section demonstrates pymatgen's integration with openbabel."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Canonical SMILES = C\t\n",
      "\n",
      "Inchi= InChI=1S/CH4/h1H4\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from pymatgen.io.babelio import BabelMolAdaptor\n",
    "import pybel as pb\n",
    "a = BabelMolAdaptor(mol)\n",
    "# Create a pybel.Molecule, which simplifies a lot of access\n",
    "pm = pb.Molecule(a.openbabel_mol)\n",
    "# Print canonical SMILES representation (unique and comparable).\n",
    "print(\"Canonical SMILES = {}\".format(pm.write(\"can\")))\n",
    "# Print Inchi representation\n",
    "print(\"Inchi= {}\".format(pm.write(\"inchi\")))\n",
    "# pb.outformats provides a listing of available formats. \n",
    "# Let's do a write to the commonly used PDB file.\n",
    "pm.write(\"pdb\", filename=\"methane.pdb\", overwrite=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<svg height=\"200px\" id=\"topsvg\" version=\"1.1\" viewBox=\"0 0 100 100\" width=\"200px\" x=\"0\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:cml=\"http://www.xml-cml.org/schema\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" y=\"0\">\n",
       "<title>OBDepict</title>\n",
       "<rect fill=\"white\" height=\"100\" width=\"100\" x=\"0\" y=\"0\"/>\n",
       "<text fill=\"black\" font-family=\"sans-serif\" font-size=\"6\" text-anchor=\"middle\" x=\"50\" y=\"98\"/>\n",
       "<g transform=\"translate(0,0)\">\n",
       "<svg font-family=\"sans-serif\" height=\"100\" stroke=\"rgb(0,0,0)\" stroke-linecap=\"round\" stroke-width=\"2\" viewBox=\"0 0 221.89 187.549\" width=\"100\" x=\"0\" y=\"0\">\n",
       "<line stroke=\"rgb(0,0,0)\" stroke-width=\"2.0\" x1=\"79.0\" x2=\"53.3\" y1=\"120.7\" y2=\"129.0\"/>\n",
       "<line stroke=\"rgb(0,0,0)\" stroke-width=\"2.0\" x1=\"77.1\" x2=\"51.4\" y1=\"115.0\" y2=\"123.3\"/>\n",
       "<line stroke=\"rgb(0,0,0)\" stroke-width=\"2.0\" x1=\"130.4\" x2=\"155.1\" y1=\"79.8\" y2=\"68.8\"/>\n",
       "<line stroke=\"rgb(0,0,0)\" stroke-width=\"2.0\" x1=\"130.4\" x2=\"133.2\" y1=\"79.8\" y2=\"52.9\"/>\n",
       "<line stroke=\"rgb(0,0,0)\" stroke-width=\"2.0\" x1=\"142.8\" x2=\"160.8\" y1=\"117.8\" y2=\"137.9\"/>\n",
       "<line stroke=\"rgb(0,0,0)\" stroke-width=\"2.0\" x1=\"142.8\" x2=\"169.2\" y1=\"117.8\" y2=\"112.2\"/>\n",
       "<line stroke=\"rgb(0,0,0)\" stroke-width=\"2.0\" x1=\"130.4\" x2=\"142.8\" y1=\"79.8\" y2=\"117.8\"/>\n",
       "<line stroke=\"rgb(0,0,0)\" stroke-width=\"2.0\" x1=\"142.8\" x2=\"120.9\" y1=\"117.8\" y2=\"133.7\"/>\n",
       "<line stroke=\"rgb(0,0,0)\" stroke-width=\"2.0\" x1=\"99.9\" x2=\"78.0\" y1=\"133.7\" y2=\"117.8\"/>\n",
       "<line stroke=\"rgb(0,0,0)\" stroke-width=\"2.0\" x1=\"78.0\" x2=\"86.4\" y1=\"117.8\" y2=\"92.1\"/>\n",
       "<line stroke=\"rgb(0,0,0)\" stroke-width=\"2.0\" x1=\"103.4\" x2=\"130.4\" y1=\"79.8\" y2=\"79.8\"/>\n",
       "<text fill=\"rgb(255,12,12)\" font-size=\"16\" stroke=\"rgb(255,12,12)\" stroke-width=\"1\" x=\"104.402940\" y=\"149.334547\">O</text>\n",
       "<text fill=\"rgb(255,12,12)\" font-size=\"16\" stroke=\"rgb(255,12,12)\" stroke-width=\"1\" x=\"34.000000\" y=\"138.183816\">O</text>\n",
       "<text fill=\"rgb(255,12,12)\" font-size=\"16\" stroke=\"rgb(255,12,12)\" stroke-width=\"1\" x=\"84.402940\" y=\"87.780876\">O</text>\n",
       "<text fill=\"rgb(191,191,191)\" font-size=\"16\" stroke=\"rgb(191,191,191)\" stroke-width=\"1\" x=\"160.944759\" y=\"71.511410\">H</text>\n",
       "<text fill=\"rgb(191,191,191)\" font-size=\"16\" stroke=\"rgb(191,191,191)\" stroke-width=\"1\" x=\"128.584079\" y=\"48.000000\">H</text>\n",
       "<text fill=\"rgb(191,191,191)\" font-size=\"16\" stroke=\"rgb(191,191,191)\" stroke-width=\"1\" x=\"163.528844\" y=\"155.548929\">H</text>\n",
       "<text fill=\"rgb(191,191,191)\" font-size=\"16\" stroke=\"rgb(191,191,191)\" stroke-width=\"1\" x=\"175.889524\" y=\"117.506669\">H</text>\n",
       "</svg>\n",
       "</g>\n",
       "</svg>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Generating ethylene carbonate (SMILES obtained from Wikipedia)\n",
    "# And displaying the svg.\n",
    "ec = pb.readstring(\"smi\", \"C1COC(=O)O1\")\n",
    "ec.make3D()\n",
    "from IPython.core.display import SVG, display_svg\n",
    "svg = SVG(ec.write(\"svg\"))\n",
    "display_svg(svg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Input/Output"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pymatgen has built-in support for the XYZ and Gaussian, NWchem file formats. It also has support for most other file formats if you have openbabel with Python bindings installed."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5\n",
      "H4 C1\n",
      "C 0.000000 0.000000 0.000000\n",
      "H 0.000000 0.000000 1.089000\n",
      "H 1.026719 0.000000 -0.363000\n",
      "H -0.513360 -0.889165 -0.363000\n",
      "H -0.513360 0.889165 -0.363000\n",
      "#P HF/6-31G(d)  Test\n",
      "\n",
      "H4 C1\n",
      "\n",
      "0 1\n",
      "C\n",
      "H 1 B1\n",
      "H 1 B2 2 A2\n",
      "H 1 B3 2 A3 3 D3\n",
      "H 1 B4 2 A4 4 D4\n",
      "\n",
      "B1=1.089000\n",
      "B2=1.089000\n",
      "A2=109.471221\n",
      "B3=1.089000\n",
      "A3=109.471213\n",
      "D3=120.000017\n",
      "B4=1.089000\n",
      "A4=109.471213\n",
      "D4=119.999966\n",
      "\n",
      "\n",
      "\n",
      "COMPND    UNNAMED\n",
      "AUTHOR    GENERATED BY OPEN BABEL 2.3.2\n",
      "HETATM    1  C   LIG     1       0.000   0.000   0.000  1.00  0.00           C  \n",
      "HETATM    2  H   LIG     1       0.000   0.000   1.089  1.00  0.00           H  \n",
      "HETATM    3  H   LIG     1       1.027   0.000  -0.363  1.00  0.00           H  \n",
      "HETATM    4  H   LIG     1      -0.513  -0.889  -0.363  1.00  0.00           H  \n",
      "HETATM    5  H   LIG     1      -0.513   0.889  -0.363  1.00  0.00           H  \n",
      "CONECT    1    3    4    5    2                                       \n",
      "CONECT    1                                                           \n",
      "CONECT    2    1                                                      \n",
      "CONECT    3    1                                                      \n",
      "CONECT    4    1                                                      \n",
      "CONECT    5    1                                                      \n",
      "MASTER        0    0    0    0    0    0    0    0    5    0    5    0\n",
      "END\n",
      "\n",
      "Molecule Summary (H4 C1)\n",
      "Reduced Formula: H4C\n",
      "Charge = 0, Spin Mult = 1\n",
      "Sites (5)\n",
      "1 C     0.000000     0.000000     0.000000\n",
      "2 H     0.000000     0.000000     1.089000\n",
      "3 H     1.027000     0.000000    -0.363000\n",
      "4 H    -0.513000    -0.889000    -0.363000\n",
      "5 H    -0.513000     0.889000    -0.363000\n"
     ]
    }
   ],
   "source": [
    "print(mol.to(fmt=\"xyz\"))\n",
    "print(mol.to(fmt=\"g09\"))\n",
    "print(mol.to(fmt=\"pdb\")) #Needs Openbabel.\n",
    "\n",
    "mol.to(filename=\"methane.xyz\")\n",
    "mol.to(filename=\"methane.pdb\") #Needs Openbabel.\n",
    "\n",
    "print(Molecule.from_file(\"methane.pdb\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For more fine-grained control over output, you can use the underlying IO classes Gaussian and Nwchem, two commonly used computational chemistry programs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "%mem=1000MW\n",
      "#P B3LYP/6-31G(d) Opt SCF=Tight Test\n",
      "\n",
      "methane\n",
      "\n",
      "0 1\n",
      "C\n",
      "H 1 B1\n",
      "H 1 B2 2 A2\n",
      "H 1 B3 2 A3 3 D3\n",
      "H 1 B4 2 A4 4 D4\n",
      "\n",
      "B1=1.089000\n",
      "B2=1.089000\n",
      "A2=109.471221\n",
      "B3=1.089000\n",
      "A3=109.471213\n",
      "D3=120.000017\n",
      "B4=1.089000\n",
      "A4=109.471213\n",
      "D4=119.999966\n",
      "\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from pymatgen.io.gaussianio import GaussianInput\n",
    "gau = GaussianInput(mol, charge=0, spin_multiplicity=1, title=\"methane\", \n",
    "                    functional=\"B3LYP\", basis_set=\"6-31G(d)\", \n",
    "                    route_parameters={'Opt': \"\", \"SCF\": \"Tight\"},\n",
    "                    link0_parameters={\"%mem\": \"1000MW\"})\n",
    "print(gau)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "geometry units angstroms\n",
      " C 0.0 0.0 0.0\n",
      " H 0.0 0.0 1.089\n",
      " H 1.026719 0.0 -0.363\n",
      " H -0.51336 -0.889165 -0.363\n",
      " H -0.51336 0.889165 -0.363\n",
      "end\n",
      "\n",
      "title \"H4C1 dft optimize\"\n",
      "charge 0\n",
      "basis\n",
      " C library \"6-31G\"\n",
      " H library \"6-31G\"\n",
      "end\n",
      "dft\n",
      " mult 1\n",
      " xc b3lyp\n",
      "end\n",
      "task dft optimize\n",
      "\n",
      "title \"H4C1 dft freq\"\n",
      "charge 0\n",
      "basis\n",
      " C library \"6-31G\"\n",
      " H library \"6-31G\"\n",
      "end\n",
      "dft\n",
      " mult 1\n",
      " xc b3lyp\n",
      "end\n",
      "task dft freq\n",
      "\n",
      "title \"H4C1 dft energy\"\n",
      "charge 0\n",
      "basis\n",
      " C library \"6-311G\"\n",
      " H library \"6-311G\"\n",
      "end\n",
      "dft\n",
      " mult 1\n",
      " xc b3lyp\n",
      "end\n",
      "task dft energy\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# A standard relaxation + SCF energy nwchem calculation input file for methane.\n",
    "from pymatgen.io.nwchemio import NwTask, NwInput\n",
    "tasks = [\n",
    "    NwTask.dft_task(mol, operation=\"optimize\", xc=\"b3lyp\",\n",
    "                    basis_set=\"6-31G\"),\n",
    "    NwTask.dft_task(mol, operation=\"freq\", xc=\"b3lyp\",\n",
    "                    basis_set=\"6-31G\"),\n",
    "    NwTask.dft_task(mol, operation=\"energy\", xc=\"b3lyp\",\n",
    "                    basis_set=\"6-311G\"),\n",
    "]\n",
    "nwi = NwInput(mol, tasks, geometry_options=[\"units\", \"angstroms\"])\n",
    "print(nwi)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## This concludes the demo on pymatgen's basic capabilities for molecules."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}