{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "import pandas as pd\n", "from rdkit import Chem\n", "from rdkit.Chem import rdFMCS\n", "from rdkit.Chem import rdFingerprintGenerator\n", "from rdkit.Chem import AllChem\n", "from rdkit.Chem import DataStructs\n", "from rdkit.Chem import rdRGroupDecomposition\n", "from rdkit.Chem import rdmolops\n", "from rdkit.Chem import RDConfig\n", "from rdkit.Chem import PandasTools\n", "from rdkit.Chem.Draw import IPythonConsole\n", "from rdkit.Chem.Scaffolds import MurckoScaffold\n", "\n", "from collections import defaultdict\n", "from itertools import product\n", "import igraph\n", "from rdkit.Chem import Draw\n", "from rdkit.Chem.Draw import IPythonConsole\n", "import re\n", "IPythonConsole.ipython_useSVG = True" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def gengraph(mols,fpgen , threshold=0.7):\n", " fps = [fpgen.GetFingerprint(m) for m in mols]\n", " num_v = len(mols)\n", " graph = igraph.Graph()\n", " graph.add_vertices(num_v)\n", " for i in range(num_v):\n", " for j in range(i):\n", " if DataStructs.TanimotoSimilarity(fps[i], fps[j]) >= threshold:\n", " graph.add_edge(i, j)\n", " return graph" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def makebond(target, chain):\n", " newmol = Chem.RWMol(rdmolops.CombineMols(target, chain))\n", " atoms = newmol.GetAtoms()\n", " mapper = defaultdict(list)\n", " for idx, atm in enumerate(atoms):\n", " atom_map_num = atm.GetAtomMapNum()\n", " mapper[atom_map_num].append(idx)\n", " for idx, a_list in mapper.items():\n", " if len(a_list) == 2:\n", " atm1, atm2 = a_list\n", " rm_atoms = [newmol.GetAtomWithIdx(atm1),newmol.GetAtomWithIdx(atm2)]\n", " nbr1 = [x.GetOtherAtom(newmol.GetAtomWithIdx(atm1)) for x in newmol.GetAtomWithIdx(atm1).GetBonds()][0]\n", " nbr1.SetAtomMapNum(idx)\n", " nbr2 = [x.GetOtherAtom(newmol.GetAtomWithIdx(atm2)) for x in newmol.GetAtomWithIdx(atm2).GetBonds()][0]\n", " nbr2.SetAtomMapNum(idx)\n", " newmol.AddBond(nbr1.GetIdx(), nbr2.GetIdx(), order=Chem.rdchem.BondType.SINGLE)\n", " nbr1.SetAtomMapNum(0)\n", " nbr2.SetAtomMapNum(0)\n", " newmol.RemoveAtom(rm_atoms[0].GetIdx())\n", " newmol.RemoveAtom(rm_atoms[1].GetIdx())\n", " newmol = newmol.GetMol()\n", " return newmol" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def enumeratemol(core,rg, maxmol=10000):\n", " dataset = rg.GetRGroupsAsColumns()\n", " labels = list(dataset.keys())\n", " pat = re.compile(\"R\\d+\")\n", " labels = [label for label in labels if pat.match(label)]\n", " rgs = np.asarray([dataset[label] for label in labels])\n", " i, j = rgs.shape\n", " combs = [k for k in product(range(j), repeat=i)]\n", " res = []\n", " for i in combs:\n", " mol = core\n", " for idx,j in enumerate(i):\n", " mol = makebond(mol, rgs[idx][j])\n", " AllChem.Compute2DCoords(mol)\n", " mol = Chem.RemoveHs(mol)\n", " res.append(mol)\n", " return res" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "fpgen = rdFingerprintGenerator.GetMorganGenerator(2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "mols = [m for m in Chem.SDMolSupplier(os.path.join(RDConfig.RDDocsDir,'Book/data/cdk2.sdf'))]\n", "for mol in mols:\n", " AllChem.Compute2DCoords(mol)\n", "fps = [fpgen.GetFingerprint(m) for m in mols]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "graph = gengraph(mols, fpgen, 0.4)\n", "blks=graph.blocks()\n", "simmols_idx = sorted(list(blks), key=lambda x: len(x), reverse=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "simmols = [mols[i] for i in simmols_idx[0]]\n", "scaff = [MurckoScaffold.GetScaffoldForMol(m) for m in simmols]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "mcs1 = rdFMCS.FindMCS(scaff, threshold=0.7)\n", "mcs2 = rdFMCS.FindMCS(scaff, threshold=0.7, \n", " completeRingsOnly=True,\n", " matchValences=True,\n", " bondCompare=rdFMCS.BondCompare.CompareOrderExact,\n", " atomCompare=rdFMCS.AtomCompare.CompareElements,\n", " )\n", "mcs3 = rdFMCS.FindMCS(scaff, threshold=0.7,\n", " ringMatchesRingOnly=True,\n", " completeRingsOnly=True,\n", " atomCompare=rdFMCS.AtomCompare.CompareAny)\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAUcUlEQVR4nO3de1CVdf4H8I8HuSiYCGoqeUnloklqXsBaxQtWKOVYUbM/pZx0SFtlZ7chndnZIad2ZaydZWptJZ1tWNxKa6bxkJoDeFk1EUUyFUFM8gIqCl4Q1AOcz++PL/twQji373Oe5/Dwfv1Vh3Oe833gOe/z/X6+3+9jD2YmAABwl0nvBgAAdG2IUQAAKYhRAAApiFEAACmIUQAAKYhRAAApiFEAACmIUQAAKYhRAAApiFEAACmIUQAAKYhRAAApiFEAACmIUQAAKYhRAAApiFEAACmIUQAAKYhRAAApiFEAACmIUQAAKYhRAAApiFEAACmIUQAAKYhRAAApiFEAACmIUQAAKYhRAAApiFEAACmIUQAAKYhRAAApiFEAACmIUQAAKYhRAAApiFEAACmIUQAAKYhRAAApiFEAACmIUQAAKT31bgAY19mzVFREP/9M4eE0cSKNGaN3gzR16tSpysrKF154Qe+GgMehNwoewExpaRQZSa+/Tv/8Jy1eTGPH0ttvk8Wid8u0s2rVqhdffDE+Pv706dN6t8VZBQV0+PCvHqmtpYICamjQqUHq8eypcRdRV1d369Ytq9Wqd0PACX/4AxPx8uV88yYz8+3bvHIlE/Gbb+rdMo20tLRkZmYGBwcTkZ+fX1pa2p07d/RulGNETMT79rU9smMHE/GpU/q1SSUePbUuEKN1dXWrV68OCgqaPn36lClTCgsL9W6RmhobG9evX5+Xl2c2m/Vui0qqq9nfnxMS2j8+fz77+PAvv+jRJn3U1NQsW7bMZDIR0dq1a/VujmMia6Ki+MGD1kecyZrt2zVomiz3Ts3Zg6twDI+xWCyffPJJaGgoEfn4+DzyyCPiP5YvX37jxg29W6cCs9n8+OOPE9HAgQOJaM6cOacM8L3/n/8wEe/e3f7xXbuYiL/+Wo82qe/YsWNFRUVffvllS0uL/WcePXp04cKF9fX12jRMBhHPmcNErGS+w6z54Qdetkyb1klx49RcOLgKx/CMvLy8cePGicrDrFmzSkpK7t69m56e7ufnR0T9+vXLzMxsamrSu5luOnz4cGxsrDi7CRMmrFq1qssNADv1/vtMxJWV7R+vqGAi/tvfdGiSqqqqqlJSUnx8fIYNG0ZEkyZNOnTokN6NUgcRr1/Pixaxvz+fPcvsRNYUFvK1a5o10H1unJoLB1fhGGo7c+bM/PnzRcSEh4dv27bN9qfl5eUJCQnip1FRUbsf7vV4t8uXL6ekpIiBXv/+/TMzM5ubm5m5trY2NTXVx8eHiAYPHpyVleWwm+Ol1q/vOEaPHWMi/vxz7VukloaGhrVr1wYGBhKRv7//ggULhgwZQkQmk+nNN9+8evWq84fKz8//+OOPva0fILKmpoZDQ3nOHGZ3s0YZOHsPtU6t44OrcAz1iCjp2bMnEQUHB2dkZNy/f7/DZ5rN5pEjR4owTUxM/KUrVNwaGhoyMjKCgoJErzM1NfX27dvtnlNcXPzMM8+I85o8efLhw4d1aao7LBbOyuIZM/jrr5mIv/22/RP+8Q8m4oMHecmSDn7q3axW67Zt20aMGKFccufOnWPmhoaG9PT0gIAAIgoMDExPT3/gRIRYLJaoqCgiio6O3rt3r8db7wTRapE1zPz550zEOTnuZE1xMQ8cyJmZ7FXdAFVOrdODq3AMNVgsFmVms2fPnikpKdccDRUePHiQmZkpUql3797p6en37t3TprWuEh9CMQYUH8Lz58/bf/LQoUOJqEePHsnJyQ5/FfozmzkysrWMv2UL9+3L06axbVervp4jIjgykrdvb33a3Ll85ox+LXZBUVGR8t321FNP7bOd7mVm5oqKiqSkJPGEiIiInTt3Ojxmu35A5cOdd62UlXFSUmt9U8kaZp49mwcM4H//m4m4pIRjYjgzk5ubHR/wd79r/QvHxnJRkQdb7oxLl/j0aebOT804MWo2m0eNGiUuqfj4+J9++sn5116+fDk5OblHjx5ENGrUqHYVAG9w+PDhadOmKR3MAwcOOPMqUQj29/dXOubOdHN0UFrK8+a1fm4iIlj8/j/8kIl4+nT+7js+fZrNZp46lf38eMcObmnh7GweMICJuGdPTk3lW7f0PodOXbp0Sbm6hgwZkpWV1dx5kOTl5Y0dO9aZb0rh3r1777//fu/evYlozJgx2i/mu3GDV67knj2ZiENC+ObNX2XN2bMcEMBDhzJRa7mbiCdPZocDJKuVc3J4yBAmYpOJly7la9ecSF8P2LOHAwM5Joat1k5PzQgxWlxcHBcXJ668yMjI3Nxc946zd+/eJ598UhzHe+a7L1y4oHwIw8LC3Ch3nj17VikTR0ZGfv/99x5qqjtu3ODUVPbxYSLu148zMti2ApOdzcHBrR8+Ih45km3nYWpr214bGup1I8D/fY316tWLiHr16rV69Wpn5v3EoEosKRGvcjhBf/HixVdfffVrbRcwiAJM//6tSZeczKKua5s1zPyXv7T+9U6dYrOZhw9nIu7Rg5OS+OJFB29x9y6np7O/PwcHWxMSkj14MnbbEBbGSUl85469U5OnW4xWV1eL6U4iCgkJkZ92b2pqysrK6t+/PxH5+vp2WHnUTH19vVIy6927tzMfJzvy8vJEKU33AWAri4UzM7lvXyZiX19OSeGamg6eZrVyRQUXFHBncy8lJTx9euvlPGkS//CDR1vtpJaWluzs7MGDB4uiSlJSkqu/8Orqatuvz+zsbM+01E15efzEE62/9Tlz+MSJth8R8Ycftv2vxcLR0UzEpaXMzA0NnJ7OAQFMxIGBnJ7OncxctCkv5xUrvtuzZ48HzsMptbWt/2H/1CTpEKONjY0ZGRniG1vk3U2x10UNus93iw/ho48+qnwIL1y4IH9Y0c3p06eP6Oakp6c3NjbKH9YdZjOPGtX6KYyP55MnVTjgsGGt/RylX6STvXv3Tpw4UXxjTZky5eDBg24fqqioKCYmRhxq5syZLpWq7t+//8EHH6jeDygt5YSE9gUYV507x0lJrQcJD+cdO9Rto/sqK3UrxWoao2LyRCw4Fx2riooKT7yRXvPd+fn5Sm0hJibmB7W7V1VVVUo357HHHtO6m1NczDNmtG0HUfEDJPo5/v5MxEFBnJ6u/ZIZ22ki8buVL1aK71Sxt0JMnF6/ft2ZF65bt46IQkNDMzMzVekHXL/eVkQJCeGMDNlfcH5+W5c2MZF//tmF196+fXvdunXq9gMOHuSAAI6KYotFxaM6S7sYLSoq+s1vfiMu04kTJ3p6nYfG893l5eXKh3Do0KGqfAg7s3///vHjx4v3mjVr1kn5/qBDVVWckvKrUqYnFjxWVHBiYltnyYn5blXcvHlz9erVYjZPLFpy+Am3M9H0sLq6OmUZn6hfOXz58ePHlQ/LpEmTZL6PHzzgzEx+5BEHBRg3iNKOOLKfH6emspO7Rt555x3V+wEWC0dF8eLFrN7I1gVaxKjtgnMx0HbpKpShwXy32PIv9lYFBQVps+5KdHMGDBggujmpqam3PDPffb+hgdeu5cBAJmJ/f373XfZ0xTkvj8eObevnOJrvlmHbWzSZTMnJydXV1Q5fJRaWFBcXu/ReZ86cefbZZ5VVUw7LBVarNScnR1nef/z4cZfe7n9N5ZEj2wownph5ra7mlBQ2mZiIw8I4O5sd9h+OHDkydepUpR/gUrnDjrt3VTmMOzwbo84sONeAh+a7LRZLVlaWyDLxIXRpH4s80c0RhWAVB4CCsuB81+TJrYl27pxaB3fg/n1et46Dgpjo1qRJHioE5+XlKRWYmTNnOpNThYWFytq1FStWuPGmZrN5+PDhSt38oqMJb9EPmD9/vqtvdPTo/WeeaQ3Q8eO5oMCNxrr0dhwb2/p2cXG/mrbqUHNz86effhoSEiImSFzdPlNezvrNWnXAUzHq0oJzbag7352Xl/fEE08oS6x+/PFHlZrpspKSkunTpysDQFX2dx86dEjpL/xfYiL/97/yx3TZ5cu8aFHy2LFENHz4cBWXBJWVlSkVmGHDhjkztLx69eqiRYtEVXrQoEGbN292+xvr4Y1PnW3VU7hUIBKDv7FjJ5tMLaIA43Dsp8qXb0sLZ2W1Lgh+/HHLqlW/r6urs/8S0Q94/fXXnX+Xurq6f/3rhK8vP/YYNzTItVg9HolR2/tuOL/gXAOqzHeXlpbOmzdPnF1ERISXLPg3m83iS0sUgt3uF7u04FwDMvPdD6utrW1XgXEYYUJNTU3fvn1VHFGdO3dOifLw8PAdaszXia6rWM8fEBCQkVHisFjZ1MRZWRwRwWrdMa2ujlet4tjYD4iof//+ziyVcf5LorKyMjQ0dODAQZMnNy9bxo5SWjsqx6j8gnMNuD3ffePGDWUQ3a9fPztb/nXh3v5uhXsLzjXQrhDs/Hy3LVGBEcuK3avA5Obmqn7rhvz8fGVMk5iY+LNLE942xOBPlAucP1RubtsO3k8+ce+dO1ZWVvbcc88p88kqdqTi4uJmz55dUaHCIkIVqRaj6i4414BL891i/37fvn1FKSclJaVGrflOtbmxv1vk1KBBg8jdBecacGO+W9GuAnPCYemOuUGrEaPtxifR23X12+vIkSNPP/20UtXZv3+/w5eUlfHcua0BGhnJHrpjuNlsFjdzUXEBtYemUiWpEKMeWnCuASfnu23vIhEfH+8lO03tc35/9549eyZMmCCeKbngXAOuzneXlpYqt1WMjIx0pgIjRlSxsbFabnIXm/rEahax8cmZd7948WK7CoyTg7/jx9lk6mAHr+oaGxtdLQQrampqdu3a5cHGqUc2Rj294FwDdua7i4uLZ8yYIc4uKipKlQKWZhzu7/bEgnNt2M53JycnX7ly5eHnXL9+XfmzhoSEOLPW7datW2lpacoC0tPi1kAaOnr0qDKpEBcXZ6fXLCowtoM/V/uwOTmq1UMdEgV3cV6jR4925tYZVVVVwcHBQUFBzixB051UjL700kviVzNq1KhvvvlGrTbpot189/bt25Ut/yJbve0Ou07qcH+3GwvOvY2d+W5RgVF2GztZS83JyVHWri1ZsqSqqsrDZ9Axq9Xabilru/LRwxUYZyq23vDnLSgoUP49i/j4+FJHu9kXLFiQkJDQJW4lLBWja9as0WzBuQasVuuWLVvCwsLEJ5OI/P393333XR1vcaKWAwcOKFvFx40bJ9brmUympUuXdtiV6yoenu92uwKzYcMG7xlRie85saLAthBsW4GZOnWqM4vbjh079tvf/n7pUq8YZzQ1NdnOMdgvBHehr3apGL1z547XzrS4rb6+fs2aNRs2bHjttdfcnjn1QsqOndGjR4tZNfc2xnihnTt3RkREkI3x48cXuLjivKmpye37NHrIyZMnZ82aJc4oOjpauaXkiBEjtm7d6rACU1VV9cYbb4h66/PPf+M9nQGx4kU0bMiQIdnZ2VVVVVu3btW7Xe7zits2g2Zqa2vLysq8ZyWvWh48ePDee+8FBgYGBQVt2rTJ4SR+bW2tFy7F65CY7zaZTH369BHlC4eDP3ETNbE+WnT6vHCCu7CwcMqUKeKLwd/f38/Pz0M3KtIAYhQM4tq1a0Q0cOBA+09TtvBu3rxZm4bJa2ho+Pbbb3fv3u1kBWbjxo0inl5++WVvHlGJQvCIESOSkpIWLlzYJcqgHerBzATQ9V29enXw4MGDBg26cuVKZ8/Jzc1NS0srLy8nosWLF+fk5GjYQO00NTUlJye//fbbyjoTbyaGBWJFcBfVhZsOYMtqtRKRqLh1KCUlZdOmTUQUGRn50UcfJSYmatc4bfn6+n711Vd6t8JZYj1Ml9bpNQfQtTiM0Xnz5oktvCdOnDBShu7bt++LL77QuxXdGnqjYBB2YjQ/P//8+fPx8fGVlZVitY2RxMTEzJ0795VXXhELpEB76I2CQdiJ0Y0bN7711lslJSXGy1Ai6tWr186dO5GhOkKMgkHYiVGH4/2uTmzZAr0Y9sKC7qY7xyjoCxcWGARiFPSCCwsMAjEKesFMPRiE2Egi7mXVzvPPPx8WFqb8y2AA6kKMgkHY6XKuXLlS8+ZAN4JhDhgERu6gF1xzYBCIUdALrjkwCMQo6AXXHBgEYhT0gmsODMJOjObm5n722Wd2bqAHIAMz9WAY48aMaQgLa3r4B19+ObW83Dc62nfwYO1bBcaHGAWDaG7udeYMBQd38KMLFx49fpxwg3LwEAzqwSCsViKiDkujdn4EIA9XFhgEYhT0gisLDAIxCnrBlQUGgRgFvWCKCQzCTlZ++inV11N4uMYtgu4CMQoGYSdGY2I0bgt0LxjngEGI9Uwd3ScPwLMQo2AQKICCXnDRgUEgRkEvuOjAIBCjoBdcdGAQdmI0NZVefZWuXtW4RdBdYKYeDMJOjH78scZtge4FvVEwCAzqQS+46MAgEKOgF1x0YBCIUdALLjowCMQo6KUH42a2YAiXLlFhIQ0dSrGxejcFuhnEKHR5BQXUuzdNm9b2SG0t/fgjxcZSYKB+zYJuA0Mg6PLi4+npp2n//rZHjhyh+Hj65RfdmgTdCmIUDGL5crJY9G4EdEuIUTCCOXOorIwyMvRuB3RLiFEwgueeo0WL6K9/pYoKvZsC3Q9iFAzi73+noCBasULvdkD3gxgFgxgwgD76iAoKaMsWvZsC3QxiFIxjyRKaPZv++EeqrdW7KdCdIEbBUDZupPp6+tOf9G4HdCeIUTCU8HD685/p0iW92wHdCWIUjMD2X7JLS6PoaCLsrwetYDMoAIAUfF8DAEhBjAIASEGMAgBIQYwCAEhBjAIASEGMAgBIQYwCAEhBjAIASEGMAgBIQYwCAEhBjAIASEGMAgBIQYwCAEhBjAIASEGMAgBIQYwCAEhBjAIASEGMAgBIQYwCAEhBjAIASEGMAgBIQYwCAEhBjAIASEGMAgBIQYwCAEhBjAIASEGMAgBIQYwCAEhBjAIASEGMAgBIQYwCAEhBjAIASEGMAgBIQYwCAEhBjAIASEGMAgBI+X+/X7ZUeY4FYgAAAABJRU5ErkJggg==\n", "image/svg+xml": [ "\n", "\n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "N\n", "O\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Chem.MolFromSmarts(mcs1.smartsString)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "image/svg+xml": [ "\n", "\n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "N\n", "N\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Chem.MolFromSmarts(mcs2.smartsString)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "image/svg+xml": [ "\n", "\n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mols_has_core = []\n", "core = Chem.MolFromSmarts(mcs2.smartsString)\n", "for mol in mols:\n", " if mol.HasSubstructMatch(core):\n", " AllChem.Compute2DCoords(mol)\n", " mols_has_core.append(mol)\n", "def getMCSSmiles(mol, mcs):\n", " mcsp = Chem.MolFromSmarts(mcs.smartsString)\n", " match = mol.GetSubstructMatch(mcsp)\n", " smi = Chem.MolFragmentToSmiles(mol, atomsToUse=match)\n", " return smi\n", "mcs_smi = getMCSSmiles(mols_has_core[0], mcs2)\n", "core = Chem.MolFromSmiles(mcs_smi)\n", "core" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "rgp = rdRGroupDecomposition.RGroupDecompositionParameters()\n", "rgp.removeHydrogensPostMatch = True\n", "rgp.alignment =True\n", "rgp.removeAllHydrogenRGroups=True\n", "rg = rdRGroupDecomposition.RGroupDecomposition(core, rgp)\n", "for mol in mols_has_core:\n", " rg.Add(mol)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rg.Process()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "frame = pd.DataFrame(rg.GetRGroupsAsColumns())\n", "frame[\"Smiles\"] = [Chem.MolToSmiles(mol) for mol in mols_has_core]\n", "PandasTools.AddMoleculeColumnToFrame(frame)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ROMolSmilesCoreR1R2R3
0\"Mol\"/CC(C)C(=O)COc1nc(N)nc2[nH]cnc12\"Mol\"/\"Mol\"/\"Mol\"/\"Mol\"/
1\"Mol\"/Nc1nc(OCC2CCCO2)c2nc[nH]c2n1\"Mol\"/\"Mol\"/\"Mol\"/\"Mol\"/
" ], "text/plain": [ " ROMol Smiles Core R1 R2 R3\n", "0 \"Mol\"/ CC(C)C(=O)COc1nc(N)nc2[nH]cnc12 \"Mol\"/ \"Mol\"/ \"Mol\"/ \"Mol\"/\n", "1 \"Mol\"/ Nc1nc(OCC2CCCO2)c2nc[nH]c2n1 \"Mol\"/ \"Mol\"/ \"Mol\"/ \"Mol\"/" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frame = frame[[\"ROMol\", \"Smiles\", \"Core\", \"R1\", \"R2\", \"R3\"]]\n", "frame['Core']=frame['Core'].apply(Chem.RemoveHs)\n", "frame.head(2)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "dataset = rg.GetRGroupsAsColumns()\n", "core = Chem.RemoveHs(dataset[\"Core\"][0])" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "image/svg+xml": [ "\n", "\n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "N\n", "N\n", "R1:1\n", "R2:2\n", "R3:3\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "core" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "res = enumeratemol(core,rg)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH2\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH2\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH2\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH2\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH2\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH\n", "OH\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "OH\n", "NH\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "N\n", "OH\n", "HO\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH\n", "S\n", "O\n", "O\n", "NH2\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "OH\n", "NH\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "OH\n", "NH\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH\n", "NH3+\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH\n", "NH3+\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH2\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH2\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH2\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH2\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH2\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "N\n", "N\n", "NH\n", "N\n", "O\n", "O\n", "NH\n", "OH\n", "" ], "text/plain": [ "" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Draw.MolsToGridImage(res[:20])" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2744 2744\n" ] } ], "source": [ "print(14**3, len(res))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }