{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "from rdkit import Chem" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "def clogSw(molObj):\n", " \"\"\"\n", " Calculate an estimation of water solubility.\n", "\n", " Input: RDKit molecule object\n", "\n", " Output: clogSw estimation\n", " \"\"\"\n", " from rdkit import Chem\n", " from rdkit.Chem import Descriptors\n", "\n", " # calculate MolWeight\n", " MolWeight = Descriptors.MolWt(molObj)\n", " # calculate clogP\n", " clogP = Descriptors.MolLogP(molObj)\n", " # calculate RotBonds\n", " RotBonds = Descriptors.NumRotatableBonds(molObj)\n", " # calculate the number of aromatic heavyatoms in the molecule\n", " aromaticHeavyatoms = len(molObj.GetSubstructMatches(\n", " Chem.MolFromSmarts(\"[a]\")))\n", " # calculate total number of atoms in the molecule\n", " numAtoms = molObj.GetNumAtoms()\n", " # calculate Aromatic Proportion\n", " AromProp = float(aromaticHeavyatoms) / numAtoms\n", "\n", " # then calculate clogSw...\n", " #clogSw_value = 0.16 \\\n", " # - 0.63 * clogP \\\n", " # - 0.0062 * MolWeight \\\n", " # + 0.066 * RotBonds \\\n", " # - 0.74 * AromProp\n", " \n", " # New clogSw with coefficients calculated using Ridge Regression with Crossvalidation\n", " clogSw_value = 0.233743817233 \\\n", " -0.74253027 * clogP \\\n", " -0.00676305 * MolWeight \\\n", " +0.01580559 * RotBonds \\\n", " -0.35483585 * AromProp\n", "\n", " return clogSw_value" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "logSw = dict()\n", "supplier = Chem.SmilesMolSupplier(\"Delaney_SupplData.smi\", delimiter=\"\\t\", titleLine=True)\n", "for mol in supplier:\n", " if mol:\n", " mol_id = \"ESOL_\" + mol.GetProp(\"_Name\")\n", " mol.SetProp(\"_Name\", mol_id)\n", " logSw[mol_id] = dict()\n", " logSw[mol_id][\"CANSMILES\"] = Chem.MolToSmiles(mol)\n", " # RDKit clogSw\n", " logSw[mol_id][\"RDKit_clogSw\"] = clogSw(mol)\n", " mol.SetProp(\"RDKit_clogSw\", str(logSw[mol_id][\"RDKit_clogSw\"]))\n", " # ESOL\n", " logSw[mol_id][\"ESOL predicted log(solubility:mol/L)\"] = mol.GetProp(\"ESOL predicted log(solubility:mol/L)\")\n", " # Measured Solubility\n", " logSw[mol_id][\"measured log(solubility:mol/L)\"] = mol.GetProp(\"measured log(solubility:mol/L)\")\n", " # end if\n", "# end for\n", "\n", "# Dump logSw in csv\n", "import csv\n", "f = open('ESOL_Results.csv','wb')\n", "w = csv.writer(f)\n", "w.writerow([\"Canonical SMILES\", \"Mol_Id\", \"measured log(solubility:mol/L)\", \"ESOL predicted log(solubility:mol/L)\", \"RDKit_clogSw\"])\n", "for mol_id in logSw:\n", " w.writerow([logSw[mol_id][\"CANSMILES\"], \n", " mol_id, \n", " logSw[mol_id][\"measured log(solubility:mol/L)\"], \n", " logSw[mol_id][\"ESOL predicted log(solubility:mol/L)\"],\n", " logSw[mol_id][\"RDKit_clogSw\"]])\n", "# end for\n", "f.flush()\n", "f.close()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "1143\n", "1143" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }