{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "5fefb8f1", "metadata": {}, "outputs": [], "source": [ "import psi4\n", "import rdkit\n", "from rdkit.Chem import AllChem\n", "import numpy as np\n", "from matplotlib import pyplot as plt\n", "\n", "# If you get \"kernel died\" errors, try changing the order of psi4 and rdkit inputs.\n", "\n", "n2 = rdkit.Chem.MolFromSmiles(\"N#N\")\n", "AllChem.EmbedMolecule(n2)\n", "n2" ] }, { "cell_type": "code", "execution_count": null, "id": "24a2a335", "metadata": {}, "outputs": [], "source": [ "print(rdkit.Chem.MolToXYZBlock(n2))" ] }, { "cell_type": "code", "execution_count": null, "id": "1352e391", "metadata": {}, "outputs": [], "source": [ "psi4.set_memory('4096 MB')\n", "psi4.core.set_output_file('n2.txt',False)\n", "\n", "n2_p4 = psi4.geometry(rdkit.Chem.MolToXYZBlock(n2))" ] }, { "cell_type": "code", "execution_count": null, "id": "acf848f0", "metadata": {}, "outputs": [], "source": [ "#compute the electronic energy for N2\n", "psi4.energy('B3LYP/6-311G**',molecule = n2_p4)" ] }, { "cell_type": "markdown", "id": "2af709b3", "metadata": {}, "source": [ "In this cell, we're specifying the geometry of the molecule using a [Z-matrix](https://gaussian.com/zmat/)." ] }, { "cell_type": "code", "execution_count": null, "id": "207861e9", "metadata": {}, "outputs": [], "source": [ "x = np.linspace(0.6,5,30)\n", "hfegy = []\n", "calc = 'HF/3-21G'\n", "\n", "psi4.core.set_output_file('n2-pec-hf-321g.txt',False)\n", "count = 1\n", "for R in x:\n", " print(f'Performing calculation {count}/{len(x)}')\n", " count +=1\n", " n2_p4 = psi4.geometry(f\"\"\"\n", " N\n", " N 1 {R}\n", " \"\"\")\n", " \n", " hfegy.append(psi4.energy(calc,molecule=n2_p4))\n", " \n", "fig, ax = plt.subplots()\n", "ax.plot(x,hfegy,marker='o',linestyle='solid')\n", "ax.set_xlabel(r'N-N Bond length ($\\AA$)')\n", "ax.set_ylabel('Energy (Hartree)')" ] }, { "cell_type": "code", "execution_count": null, "id": "7177c60a", "metadata": {}, "outputs": [], "source": [ "x = np.linspace(0.6,5,30)\n", "b3lypegy = []\n", "calc = 'B3LYP/3-21G'\n", "\n", "psi4.core.set_output_file('n2-pec-b3lyp-321g.txt',False)\n", "count = 1\n", "for R in x:\n", " print(f'Performing calculation {count}/{len(x)}')\n", " count +=1\n", " n2_p4 = psi4.geometry(f\"\"\"\n", " N\n", " N 1 {R}\n", " \"\"\")\n", " \n", " b3lypegy.append(psi4.energy(calc,molecule=n2_p4))\n", " \n", "fig, ax = plt.subplots()\n", "ax.plot(x,b3lypegy,marker='o',linestyle='solid',label='B3LYP/3-21G')\n", "ax.plot(x,hfegy,marker='o',linestyle='solid',label='HF/3-21G')\n", "ax.set_xlabel(r'N-N Bond length ($\\AA$)')\n", "ax.set_ylabel('Energy (Hartree)')\n", "ax.legend()" ] }, { "cell_type": "code", "execution_count": null, "id": "8a15cadf", "metadata": {}, "outputs": [], "source": [ "x = np.linspace(0.6,5,30)\n", "b3lypegy2 = []\n", "calc = 'B3LYP/6-31G*'\n", "\n", "psi4.core.set_output_file('n2-pec-b3lyp-631gs.txt',False)\n", "count = 1\n", "for R in x:\n", " print(f'Performing calculation {count}/{len(x)}')\n", " count +=1\n", " n2_p4 = psi4.geometry(f\"\"\"\n", " N\n", " N 1 {R}\n", " \"\"\")\n", " \n", " b3lypegy2.append(psi4.energy(calc,molecule=n2_p4))\n", " \n", "fig, ax = plt.subplots()\n", "ax.plot(x,b3lypegy,marker='o',linestyle='solid',label='B3LYP/3-21G')\n", "ax.plot(x,hfegy,marker='o',linestyle='solid',label='HF/3-21G')\n", "ax.plot(x,b3lypegy2,marker='o',linestyle='solid',label='B3LYP/6-31G*')\n", "ax.set_xlabel(r'N-N Bond length ($\\AA$)')\n", "ax.set_ylabel('Energy (Hartree)')\n", "ax.legend()" ] }, { "cell_type": "markdown", "id": "eb591606", "metadata": {}, "source": [ "How do we find out what the optimized structure is?" ] }, { "cell_type": "code", "execution_count": null, "id": "a4df3806", "metadata": {}, "outputs": [], "source": [ "calc = 'B3LYP/6-31G*'\n", "psi4.core.set_output_file('n2-opt-b3lyp-631gs.txt',False)\n", "\n", "#pick a starting geometry (use rdkit value)\n", "n2_geo = psi4.geometry(rdkit.Chem.MolToXYZBlock(n2))\n", "\n", "#perform optimization with psi4.optimize\n", "egy = psi4.optimize(calc,molecule=n2_geo)\n", "\n", "#these functions will print things to the output file\n", "n2_geo.print_distances()\n", "n2_geo.print_bond_angles()\n", "\n", "#this returns an acual matrix in units of Bohr\n", "print(n2_geo.distance_matrix().to_array())\n", "\n", "#convert to Angstrom\n", "print(n2_geo.distance_matrix().to_array()*psi4.constants.bohr2angstroms)" ] }, { "cell_type": "code", "execution_count": null, "id": "d2b1f6b7", "metadata": {}, "outputs": [], "source": [ "egy" ] }, { "cell_type": "code", "execution_count": null, "id": "20e1acb7", "metadata": { "tags": [ "raises-exception" ] }, "outputs": [], "source": [ "psi4.core.set_output_file('n2-freq-b3lyp-631gs.txt',False)\n", "\n", "# The results of the frequency calculation can be seen in the output file.\n", "psi4.frequency(calc,molecule=n2_geo)" ] }, { "cell_type": "code", "execution_count": null, "id": "7d6d419b-9ca0-4e90-88fd-806761bbf0aa", "metadata": {}, "outputs": [], "source": [ "help(psi4.frequency)" ] }, { "cell_type": "code", "execution_count": null, "id": "dccc815c", "metadata": {}, "outputs": [], "source": [ "calc = 'HF/6-31G*'\n", "psi4.core.set_output_file('n2-opt-hf-631gs.txt',False)\n", "\n", "#A frequency calculation must start from an optimized geometry at the same level of theory.\n", "n2_geo = psi4.geometry(rdkit.Chem.MolToXYZBlock(n2))\n", "egy = psi4.optimize(calc,molecule=n2_geo)\n", "\n", "#Hartree-Fock frequency calculations do work\n", "#Prof. Wang reported this problem as a bug, and it should be fixed in an upcoming release.\n", "psi4.frequency(calc,molecule=n2_geo)" ] }, { "cell_type": "code", "execution_count": null, "id": "5a8cb626", "metadata": {}, "outputs": [], "source": [ "# Now let's look at a larger molecule, bonzoquinone\n", "m1 = rdkit.Chem.MolFromSmiles('C1=CC(=O)C=CC1=O')\n", "m1" ] }, { "cell_type": "code", "execution_count": null, "id": "e6f24488", "metadata": {}, "outputs": [], "source": [ "#add H atoms, embed, visualize\n", "m1 = rdkit.Chem.AddHs(m1)\n", "AllChem.EmbedMolecule(m1)\n", "m1" ] }, { "cell_type": "code", "execution_count": null, "id": "830d635a", "metadata": {}, "outputs": [], "source": [ "import nglview\n", "nglview.show_rdkit(m1)" ] }, { "cell_type": "code", "execution_count": null, "id": "54960655", "metadata": {}, "outputs": [], "source": [ "bq = psi4.geometry(rdkit.Chem.MolToXYZBlock(m1))\n", "psi4.core.set_output_file('bq-egy-b3lyp-631gs.txt')\n", "calc = 'B3LYP/6-31G*'\n", "egy = psi4.energy(calc,molecule=bq)\n", "egy" ] }, { "cell_type": "code", "execution_count": null, "id": "36bb7f2d", "metadata": {}, "outputs": [], "source": [ "psi4.core.set_output_file('bq-opt-b3lyp-631gs.txt')\n", "calc = 'B3LYP/6-31G*'\n", "optegy = psi4.optimize(calc,molecule=bq)\n", "optegy" ] }, { "cell_type": "code", "execution_count": null, "id": "516f303e", "metadata": {}, "outputs": [], "source": [ "print(abs(egy-optegy)*psi4.constants.hartree2kcalmol)" ] }, { "cell_type": "code", "execution_count": null, "id": "d8af704d", "metadata": {}, "outputs": [], "source": [ "#get optimized structure into rdkit... not as straightforward as it might seem!\n", "#the xyz coordinates don't contain bonding information, which rdkit requires to construct the molecular graph\n", "#however, as long as we initialize the psi4 calculation with an rdkit XYZBlock\n", "#we can simply replace the coordinates with the optimized ones, because the atoms will be in the same order\n", "\n", "#get geometry, and reshape it into an Nx3 array\n", "psi4_geom = bq.to_dict()['geom'].reshape(-1,3)\n", "\n", "from rdkit.Chem import rdchem\n", "\n", "#create a new version of the molecule\n", "m2 = rdkit.Chem.MolFromSmiles('C1=CC(=O)C=CC1=O')\n", "m2 = rdkit.Chem.AddHs(m2)\n", "AllChem.EmbedMolecule(m2)\n", "conf = m2.GetConformer(0)\n", "for i in range(m2.GetNumAtoms()):\n", " conf.SetAtomPosition(i,psi4_geom[i])\n", " \n", "#add this as a second conformer to the molecule, then remove the original\n", "m2.AddConformer(conf,assignId=True)\n", "m2.RemoveConformer(0)\n", "nglview.show_rdkit(m2)" ] }, { "cell_type": "code", "execution_count": null, "id": "e384644e", "metadata": {}, "outputs": [], "source": [ "#Compute RMSD of the optimized structure with the conformer generated by rdkit\n", "rdkit.Chem.rdMolAlign.AlignMol(m1, m2)" ] }, { "cell_type": "code", "execution_count": null, "id": "d3de11be", "metadata": {}, "outputs": [], "source": [ "# Now let's try to calculate what the energy of this structure would be in water\n", "# This is done using a polarizable continuum model -- approximating\n", "# a solvation environment by surrounding the molecule with charges that resemble\n", "# the charge distribution of water molecules, but not using a quantum mechanical treatment\n", "# of water molecules themselves.\n", "\n", "# https://pubs.acs.org/doi/abs/10.1021/cr9904009\n", "\n", "# Set implicit solvent options. Due to a quirk, this cell\n", "# may only be called once. If you want to use a different solvent, you have to restart the kernel!\n", "# The details of the \"Cavity\" are fairly standard values that should only be \n", "# adjusted if you have more expertise\n", "pcm_string = \"\"\"\n", "Units = Angstrom\n", "Medium {\n", " SolverType = IEFPCM\n", " Solvent = Water\n", "}\n", "Cavity {\n", " RadiiSet = UFF\n", " Type = GePol\n", " Scaling = False\n", " Area = 0.3\n", " Mode = Implicit\n", "}\n", "\"\"\"\n", "psi4.pcm_helper(pcm_string)\n", "psi4.set_options({'pcm':True, 'pcm_scf_type':'total'})" ] }, { "cell_type": "code", "execution_count": null, "id": "3465016f", "metadata": {}, "outputs": [], "source": [ "# Compute implicit solvent energy.\n", "psi4.core.set_output_file('bq-solvation-b3lyp-631gs.txt',False)\n", "solvegy = psi4.energy('b3lyp/6-31g*',molecule = bq)" ] }, { "cell_type": "code", "execution_count": null, "id": "3d440c55", "metadata": {}, "outputs": [], "source": [ "# The energy of solvation is approximately the difference between the gas-phase energy and the solvated energy\n", "# Ideally one would optimize the molecule's geometry inside the solvent, but that is very challenging as\n", "# a standard method is not readily available. It is possible to do, but the details are well beyond the scope\n", "# of this course\n", "print((solvegy-optegy)*psi4.constants.hartree2kcalmol)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.18" } }, "nbformat": 4, "nbformat_minor": 5 }