{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Reading and writing molecules 1\n", "\n", "This is a short overview of creating molecules from and writing molecules to various file formats. It is intended to be a complement to, not replacement for, the contents of the [main RDKit documentation](http://rdkit.org/docs/index.html)\n", "\n", "@TAGS: #basics #molecule_input " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from rdkit import Chem\n", "from rdkit.Chem.Draw import IPythonConsole\n", "from rdkit.Chem import Draw\n", "# uncomment this if you try the tutorial and end up with low-quality images\n", "# IPythonConsole.ipython_useSVG=True" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sun Oct 9 07:11:37 2016\n" ] } ], "source": [ "import time\n", "print(time.asctime()) # doctest: IGNORE" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## Working with SMILES" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you have a SMILES string, the easiest thing to use is MolFromSmiles:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElE\nQVR4nO3dd1xTV/8H8G9CgDBFIILiLMpWnLQ4KlUQ63hqrdQ64mOtxY040VaNW6wr1jp4Wm1RUUv7\nahW1PlZ4LOVXV7UORmUIVEFApgyBkOT7++PSCGHIuLknSc/75R+ac8n9EMOXk3PPOZeHiEBRFEW1\nFZ90AIqiKN1GyyhFUVS70DJKURTVLrSMUhRFtQstoxRFUe1CyyhFUVS70DJKURTLzp8/379//6ys\nLNJBOELLKEVRLIuMjLx///6KFStIB+EIj06/pyiKXdnZ2S4uLuXl5b/88svIkSNJx9E42hulKIpl\nDg4OISEhABAcHKxQKEjH0TjaG6Uoin1VVVVubm4ZGRlhYWGBgYGk42gWLaMURWnE999/HxAQIBKJ\nUlJSrKysSMfRIPqhnqIojZgyZYqPj09+fv6WLVtIZ9Es2hulKEpTEhMT+/fvz+Px4uPjnZ2dScfR\nFNobpShKU9zd3efOnVtTU7N8+XLSWTSI9kYpitKgoqIiJyenwsLCixcvjhs3jnQcjaC9UYqiNMja\n2nrdunUAsHTp0urqatJxNIKWUYqiNGvx4sUeHh5paWkHDx4knUUj6Id6iqI0Ljo62s/Pz9LSMjk5\n2d7ennQcltHeKEVRGufr6zthwoTS0lKJREI6C/toGaUoigs7d+4U2dp6pKRAfDzpLCyjH+opiuJI\n1YoVwr17YdQoiIkhnYVNtIxSFMWVsjJwdoacHDhzBqZOJZ2GNfRDPUVRXLGwgK1bAQBWroSKCtJp\nWEPLKEVRHJo9G7y8ICsLdu0iHYU19EM9RVHcun4dhg0DoRCSkqBnT9JpWEB7oxRFccvbG6ZPh8pK\nCAkhHYUdtDdKURTnsrPBxQXKy+HqVfDxIZ2mvWhvlKIozjk4wJo1AACLF4NcTjpNe9EySlEUCStX\nQp8+kJgIX31FOkp70Q/1FEUR8sMP8N57YG0NKSlgY0M6TdvR3ihFUYRMngxjxkBREWzeTDpKu9De\nKEVR5CQlQf/+gAh//AF9+5JO00a0N0pRFDlubjBvHsjlsGwZ6ShtR8soRwoLC8vKygoKCkgHoSgt\ns2EDWFmBuTlUVpKO0ka0jGpcdXX1xo0be/To8d577/Xo0WPjxo1VVVWkQ1GU1hCJIDERzp4FExPS\nUdqIjo1qVnR0dHBwcGJiIgB07do1KysLAJydnffs2TN+/HjS6SiKYgHtjWpKQkKCr6+vn59fYmJi\n3759Y2Jinjx5cvXqVU9Pz+Tk5AkTJvj4+Ny7d490TKo1EhJg3DiwsAALCxg3Tv+2HyZJp19bpNiW\nn58fGBgoEAgAwNbWNiwsrKamRtWqUCjCw8Pt7OwAgM/ni8XinJwcgmmplkpNRZEI9+zBnBzMycG9\ne1EkwtRU0rH0go6/trSMskkul0ulUmtrawAwMDAICgoqLCxs9Mji4uKQkBBjY2MAMDMzk0gkVVVV\nHKdtMwD090elUv1BPTdjBkok9R7ZsAFnzuTs/JWVlZydi2ukX9t20vv3PndiY2P79+/P9PH9/f2T\nkpJe+SUpKSkBAQHMl/Tu3TsyMpKDnO0HgNOm4YED6g/qOTs7TEur90haGtrbc3DmgoKCmTNnenl5\nKRQKDk5HALnXlhV6/97nQkZGxoQJE5hq2KtXr6ioqFZ9eXR0dN+/Jx6PGjXq/v37GsrJFgAsLkZn\nZ/zzz3oP6jkDA1TrD754gQIBB2eurKzs1asXAISFhXFwOgLIvbas0Pv3vmaVlZWFhISYmJgAgIWF\nRWho6IsXL9rwPDU1NWFhYSKRSDVgmpeXx3patjAV8+pVHDwYZbJ6D+qzTp0I9pi+++47ABCJRMXF\nxdyckVNEX9v20/v3vqYolcrw8HAHBwcA4PF4YrE4Kyurnc9ZVFQUEhJiZGQEAFZWVqGhoVo1YCqT\n4eXLWF39smKuWoVr19b+XQ/LqFKJp06h6vLgtGmNjN/NmMFZHB8fHwBYtmwZZ2fkDunXtp30773P\nhRs3bnh5eTEfw994442bN2+28AurqqoOHTpUXV3dzDEPHz5UTSl1cnJq7RAB654/x/BwnDABTUwQ\nAM+de1kxq6tx0CCMi0PUyzJ68iQC4OjRtf98+BBtbXHv3pdXk21tMTmZszgJCQkCgUAgECQkJHB2\nUo6Qfm3bSf/e+5qVm5srFov5fD4A2NnZhYeHt2rUf9u2bQDQtWvX8PBwpdql7vquXLni7u7OFFNf\nX9/4+Ph2Z2+digr87jucNg0tLBCg9s/gwXjhQr2KmZiIrq5YWqp3ZfT5c7SzQwA8efLlg/fv49ix\naGaGZmbo74/37nEcasGCBcz7gePzatzs2SiVkn1t20PP3vsaVFVVFRoaamVlBQDGxsYSiaSsrKy1\nTxIdHe3m5sYURz8/v+a7FTKZLCwszNbWFgAEAkFgYGB+fn47voMWyc1FqRSHDUMDg9rSKRDghAkY\nHo6qk6tVzP378d//1rsyunYtAuDw4eoTu4gqLCy0sbEBgAsXLpDOwp7vvkMAFImwtJR0lDbSs/e+\nply4cMHJyYkpfxMmTEhJSWnzUzHT7+3t7VWDqs1Pvy8sLAwKCjIwMAAAa2trqVRadzI/W7Kz1aun\noWFt9SwoUD9YrWIqlejvjwBYUIB6cv0jJQWNjJDPx9u31ZsePcIvvkAN/Be00P79+wHA0dFRq8bN\n2668HLt2RQA8epR0lLajZfQVEhMTfX19mQLq7u5+5coVVp62vLxcIpEIhULV9PvmJ1cnJSWNHTuW\nieHi4nLx4kVWYuTl5R05ovTzQ4Ggtnry+ThiBO7fj629YHbrFnbrhu+9x0ou0iZPRgCcNavJpjVr\nOM9Uq6amhpket2vXLlIZ2LRpEwLgkCGoy1NiaRltUklJSVBQkKGhIQB06NBBKpXKVLN7WJKamqqa\nfu/o6PjK6fdRUVGOjo6qAdPExMS2nTclJUUikTDDCx4epQBoZFTb9ywqattT4uPHaG2NAPj55218\nBm3x888IgJaWmJur3vTf/yIAduiARKejRUdHMxPsnj59SjAGC/76C01MkMfDa9dIR2kXWkYbwazp\nZAahmDWdBQ0/2bInJiamX79+THF86623mp9+L5PJpFKppaUlABgaGgYFBZWUlLTwRE+ePNm/f/+I\nESOYS2R/D7leOXKEnbLw/fcIgMbG+McfLDwbGXI5enoiAG7dqt4kk6GrKwLgzp0kktXzzjvvAMBH\nH31EOkj7zJhRuyROx9Eyqi4uLm7gwIFMlRkxYsSdO3c4OCkzYNqpU6cWTr/Pz89XDZja2NhIpVK5\nXN7UwUlJSaq+J8PCwkIsFkdFRVVUVLD7jcydiwDYp4/OXi0IC0MA7NlTfVENIh44UPu9NTtfjRuP\nHj0yNjbm8/ktn2yndeLikMdDMzN88oR0lPaiZfSlzMzMgIAAHo8HAD169OB+wmZrp9/fuXPnzTff\nZCrjgAEDfvnll7qtiYmJatWzQ4cOTPXU3CYXFRW1PTad7CcVF6NIhADYcHSloAA7dkQA/PFHEska\nERISAgDe3t7Nz5zTUgoFDhqEAOqz7nUTLaOIiJWVlRKJxNTUFABMTEwkEgnr3bSWY3YjVU2/P3/+\nfPPHR0VFMQuumVkE3377bVBQ0Guvvaaqnp06dQoKCrpy5Qo313bj42sn6kdEcHA2Vi1fjgA4cmQj\nTUuWIACOGcN1pKaVlZV16dIFACJ074VGPH4cAbBHD2zT4mltQ8soIuLOnTtVZShZO9ZOtGr6fVlZ\n2SeffMJc92eGTQGgY8eOs2bNOnfuHPcbrDEffzt0wPR0js/cdsr0NDQ2Rj4fb91Sb3vwAAUCFAiw\nrdf0NOTrr78GAAcHh/LyctJZWqOsDB0cEABPnSIdhR20jGJiYqKVlVWHDh0uXbpEOks9daffGxoa\nvnL6fXp6eteuXQFg7Nixly5dYn1eQau8807tPBaiKVrhUerkgs+GK1YsaqRt9GgEwIULOQ/1Ckql\n8vXXXweA9evXk87SGuvWIQC++SbpHKyhZRTj4+MBwMPDg3SQxjHT75m99F85/f7tt98GgJ9++omz\neE+f4oIFjVyPKSrC7t0RAD/5hLMsbff8+aXbt+Hu3Q41NQ2u7J0/jwBoY4NN7MBN1vXr13k8nlAo\nzMjIIJ2lZTIyUChEPl+X53Ooo2VU28so49atW97e3syn9SFDhvz222+NHjZu3DgAYGtyfksMHYoA\nuKixPtyvv6KBAfL5yNKSBU1RKmUJCa63b0Nu7mfqbVVV6OiIACiVkojWImKxGACmTJlCOkjLTJ2K\nADhnDukcbKJlFBMSEpgVSqSDvJpq+n1TH+K4L6OqC0qNDnOtX48AaGfXyEx27ZGX9/nt25CQ4KRU\nqs9kUnx9WHsmOTUlJyeHGRC/evUq6SyvEhurDesXWEfvDKpLJk6c+ODBg+3bt69atarRA5jZWsjh\nTbM9PGDXLgCAhQshI0O9dcMGGDYM8vLgww9BO+/kLZcXPH0qAQAHh108nlH9psJ4z/WFoSPw871g\nZNTEE5Bnb2/PTH5avHixXC4nHadJiBj51VcKe3tYtQo6dSIdh020jDZZemQyWXFxcUVFBYlQTTI1\nNV27dq2FhQXpIC8tWgTvvgslJfDBB1BTU69JIIDjx8HSEi5dgm++KSEUsDlPn25UKIotLf2trP6l\n1pSd/YlcWVD8ngVv7AQi2Vpu5cqVffr0SUxM/PLLL0lnadLJkyennjjhZWWFK1aQzsIyWkabLKOn\nT5+2trZetGgRiVBtxH1vlHHsGPTsCbduwaZN6k2vvQZHjqCPT8ySJd3jtezm45WV8QUFR3g8w27d\n9qk1vXhxp6DgKx7PsGvXvUSytYqRkREzaW/dunWFhYWk4zTi+fPnK1euBID1O3bwhELScVhGy2iT\nSJUkXWRlBd9+C4aGsGMHREert06bxnN2/q6iomzy5MmlpaUkAjbuyZNgRIWtbaBQ6Fq/BZ88WQqg\n7NRpsVDoTCZcK7377rtjxowpKiravHkz6SyN2L59+7Nnz8aMGTNp0iTSWdhHy6helUuC34uXF0gk\noFTCzJmQl6feKpVKPT0909LSlixZwn22RpWURJWV/U8gsHFw2KLWVFz8Q3n5bwKBqHPnDUSytc2+\nffsMDQ0PHTqkbb3+lJQUqVQqEAj27VPv9esHWkabpIvllWzmtWvB17fxC0pCoTAiIsLU1PT48eMn\nTpwgEq8uxOqsrOUA0LmzxMCgY90mpbIqO3sVAHTpstnAwIpMvjZxc3ObN2+eXC5ftmwZ6Sz1rF69\nWiaTffTRR3V3eNAntIzqZLnUTnw+HDsGNjaQnl759ddn1Vrd3d337t0LAAsXLkxOTiYR8KVnzw5U\nVz8yMfEQiRY0aNpbXZ1hYuJpa/sxkWztsXnzZltb25iYmKioKNJZal2+fPncuXNWVlZbtqj3+vUG\nLaNNllFdLK/EM3frBidPZmdm2i1ePJ2ZkFvXvHnzpk2bVl5ePmPGjOrqaiIJGTU1TwH4Dg47eDyB\n2uM5OTsAoFu3fTyeAaF0bdexY8eNGzcCwLJly8i+wgy5XL58+XIAWL9+vUgkIh1HU2gZpVg2dqzD\n4sWBlZWV7777bllZmVrroUOHevXqdefOnbVr1xKJx+jada+7e1KHDuozmbKzP1Eqy62s3rWweItI\nsPabP39+v3790tPTpVIp6SwQFhaWlJTk7OysPWPimkDLKO2Nsm/Hjh2vv/56Wlra0qVL1ZqsrKzO\nnDljaGgolUrJfvBseAm+ouJWYeFxPl/YtetuIpFYYWBgwFzJ2bp169OnTwkmKSwsXL9+PQDs3r2b\nuRmPvqJllGKfoaFhRESEpaXl119/HRERodbq5eW1adMmRJw7dy7Zn/P6mElO2KlTsLHxa68+XIuN\nGjVq8uTJ5eXln376KcEYmzdvLi4u9vf3V+2fq69oGaW9UY1wdHT84osvAGD+/PkpKSlqrSEhIX5+\nfvn5+dOnT1coFCQCqlMqK4VCZ0NDO3v7NaSzsGD37t1CoTA8PPzmzZtEAiQkJBw6dMjQ0FBfJznV\nRctoLVpGWScWi8ViMXNBSSaT1W3i8/knTpywt7ePjY0NDQ3lMpVC8Tw7e21CgtPdu6b37lk/ejSp\nrCwGAPh80549v3F3f2hg0IHLPBrSq1ev5cuXI2JwcDCR90NwcLBcLg8MDHR1dX310TqOltHa0kNp\nwuHDh52dnW/fvs2MkdVlZ2f3zTff8Hi8jRs3/vbbb5xFSk+fKpcX9ulzsX//Eg+PZGvr6Tk5Lyfi\n6NZE0eZ9+umn3bt3v3HjxsmTJzk+9YULF2JiYmxsbPR4klNdglcfou/oh3rNMTMzO3XqlLe39+7d\nu0ePHj1mzJi6rf7+/osXLz5w4MCkSZM8PT1b9fssOPg1J6f0VoZ5o0uXLeXlv/brl8N0OQUCUceO\n73fs+H6rnkdXmJqabty4cc6cOR9//PGsWbM4PjuPx1u+fHnHjh1ffajuo2WU0qyBAwdu2rRp7dq1\np06dUiujADBnzpzDhw8bGRnFxMS06mkDA31KS39p1ZfweMYAYGHh89dfgZ06BZmaDuTzTVr1DDrH\n2toaAPh8Ah86EbGkRBv39NIEWkZpb1TjVq9e7ejoOGXKFLXHX7x4MWPGDLlc7uXl1dqdtHr1MhSJ\nal59XB0CgS0A9Op1Ji9v9+PHi6qrkw0Nu1hZTe7ceZ1+jIeqqaqqYlaF7tmzZ8EC9cVaGnX//v1B\ngwZJpdKPPvrI2Vk39nZpD1pGKY3j8/kBAQENH1+5cmVSUpKrqyuz3J6bMAYGll26bO7SZTMAVlYm\n5eXtysiY0bv3BW7OzqV9+/ZlZGT069cvMDCQ41N7enrOnTs3LCxs+fLlFy9e5Pjs3KOXmGhvlIyz\nZ88ePnxYKBRGRkZyVkPr45mYuHfrJi0v/5XE2TXr6dOn27dvBwCpVGpgQGBV6/bt221sbH766aef\nfvqJ+7NzjJZRXSo9r6Qr38vjx4/nzJkDADt27PDw8ODy1CkpPsXFkTU1eYg11dUZ2dlrzM2HcxmA\nG59++ml5efmkSZPeeovMqlZra+t169YBwNKlS7Vhdb9G0TLaJF0pSTpHoVCIxeLi4uKJEyc2XC2q\naZ07bygqOpOY6HrvnmVq6milUi4QiHJzt3McQ6Nu3bp1/PhxY2PjPXv2EIyxePFiDw+PtLS0gwcP\nEozBATo2Sj/Uc23Xrl2//vpr586djx49yv2kXQuLURYWo1T/LCz8JjPzQx7PwMzMW3e3I6mLmXKv\nVCqXLl362muNrGpVKpV3795l96QuLi5mZmZqDzL7NPv5+W3cuHH69On29vbsnlR70DJKcerGjRvr\n16/n8/mnTp3Shp3TrK2nZ2evranJTU+f6uZ219DQgXSi9jpz5sz169c7d+7MfKZuqKqqavDgweye\n9Pr162+88UbDx319fSdOnHj+/PkNGzb85z//Yfek2oOWUdob5U5paSkzw2nVqlU+Pj6k4wAA8HhG\nItH8p083yuX5jx5NcXb+lcfT4b2IKioqVq9eDQBbt25t6vaxfD5/0KBB7J63YVdUZd++fT///PPR\no0c//vjjIUOGsHteLUHLqLaXHn2yZMmS9PT0119/fdu2baSzvCQSLcjNDVUqqyoqbjx9usHBYQfp\nRG23e/furKysIUOGzJ49u6ljhELh7du3OYvk6OgYFBS0a9eu4ODg//u//9PLtdf0ElOTdLG8anPm\niIiI48ePW1paRkREaNXukwJBJ9V60NzcnSUl58jmabO//vpr586dPB5PKpUSWbnUlPXr13fu3Pna\ntWvffvst6SwaoUWvNSnaXHr0Rmpq6vz58wHgwIEDjo6OpOOo69Qp+O+/YmbmhzJZJsEwbbZ27drK\nysr3339/6NChpLPUY2FhsXXrVgBYtWpVRUUF6Tjso2W0SbpYXrUzs0IB8+bNLy8vnzJlCvd7ZLSE\nqekAM7PaKyQKRXFm5oeIWrELasvFxcWdOXPG1NR0925t3Lp/9uzZXl5eWVlZu3btIp2FfbSMamnp\naRvt/F62bYO8vNBhw/y0+Vqtnd1y1d/Lyn7JziZ5q6jWUiqVzL6iK1as6Nq1K+k4jeDz+VKplMfj\nffbZZ5mZmaTjsIyW0SZLz1tvvXX79m29/OXJpf/9DzZtgrS0IVLpz9q8bZqV1btGRt1V/8zL211S\non6PaK114sSJP/74o3v37mRvFNg8b2/v6dOnV1ZWhoSEkM7CMlpGm2RlZTVo0CBtG8grKytbt25d\ndnZ2o63a1hstLIRZs0CphI0bge2piizj8QQi0cI6D2Bm5pzq6gxigVqstLSUKUzbtm0zMdHqrf92\n7txpbm4eGRkZGxtLOgubaBnVutLTFEQMDw93dnbetm3bmjU6cL8gRPj3vyE7G3x9QSf6HyJRIJ//\ncv6jQlGckfEBoqyZL9EGoaGheXl5w4cPnzFjBuksr+Dg4MBU/ODgYC25BxcraBnVDZcvX3Z3d589\ne3ZOTo6/v39Td3zUql8JR47AxYtgawvh4aBN02+aZGDQ0dp6at1HSkpuZWdLSOVpibS0tD179qhG\nHknHebWVK1f26tXr3r17R48eJZ2FNbrw7tYwrSo9DWVkZEycOHHs2LF//vlnr169oqKi/vvf/7q4\nuJDO9QoPHsDy5cDjwfHj0KUL6TQt1qlTEPMXRLh4ESZMABeXnT/9pL07Zq5Zs0Ymk82YMYP1hUka\nIhQKmesN69at05/t8bEdUlJSfvjhh/Y8gzY4e/YsAJiYmNy8eZN0lnqYMS9mtMvCwkIqlVZXVzdz\n/L1793r06GFpadmtWzeJRHL79m3Ooqp58QLd3REAFy8mFaHtkpN9Tp2Cfv1e/ozY2to+efKEdK5G\nXL16FQDMzc2zsrJIZ2kdPz8/AFi2bBnpIOxoVxkdPXo0AAwZMuS3335jKxCXMjMzmV3ZmSUfPB5P\nLBZrwztSqVSGh4c7ODioUmVnZzdzfEFBwfz585ndeZnb7zAGDBiwefPmhIQEzpIzFi5EAPT0xKoq\njs/MguLiH9zc1Hsbw4cPr6mpIR2tHrlc7unpCQBbtmwhnaXVEhISBAKBQCCIj48nnYUFbS+jSqXy\n0KFDtra2AGBgYDB//vz8/HwWk2lUSUlJUFAQsyTRzs7u8OHDq1evNjY2BgAjI6OgoKDnz5+Tynb9\n+nXVDg7e3t63bt1q5uCqqiqJRMJsDCEUCiUSSX5+flRUlFgs7tDh5f2FunXrFhgYGBUVxUEtOHsW\nAdDUFBMTNX0qjVAq5ebmjVzvXrNmDelo9YSFhQFAz549KysrSWdpC+b2UL6+vqSDsKBdvVFELC8v\nl0gkQqEQAExNTSUSiZb/p8rlcqlUamNjAwDGxsYhISHFxcVMU0pKiuqWQZ07dw4LC1MoFFxmy83N\nFYvFzFitvb19eHi4Uqls5vjIyMjevXszgcVi8ePHj+u2yuXyuLi4oKCgzp07q2qBjY2NWCyOiopq\nfnygzf76Czt2RAA8fFgTT8+RvXv3NiyjPB5Pe4awiouLmW0GIyMjSWdpo8LCQubH8Pz586SztFd7\nyyjj8ePHYrFY1fF55c8/KbGxsQMGDGByjh8/Pjk5ueExMTEx/f4eGBs0aFBcXBwHwep2Ko2NjSUS\nSXl5eTPHJyQkMCMqANC3b9+YmJhmDq6urr506VJgYGCnTp1URUEkEgUHv7h0CWUy1r4LuRzffBMB\ncOpU1p6TiNLSUktLy4aV1MrKKj09nXQ6RMQVK1YAwMiRI0kHaZf9+/cDgKOjY5Uujv7UwU4ZZVy9\nerV///7MG27kyJF3795l8cnbKT09fcKECUw2Nze3K1euNHOwQqEIDw9nig6PxwsICMjIyNBctqio\nqD59+jDZAgICUlNTmzm47nCESCQKCwtr1ef0hIQEiUTi6urq5DQFAAHQxAQnTMDwcGz/MMa2bQiA\nPXrg3/17HcZspNLQkCFDNNSRb7mHDx8aGRkZGBho1Y9YG9TU1PTt2xcAPvvsM9JZ2oXNMop/FyA7\nOzsA4PP5YrE4NzeX3VO0Vt3r3ZaWllKpVNayDlhxcXFISAgzYGpqahoSElJWVsZutoSEBF9fX+bn\n08PDIzo6upmD6w5HCASCoKCgwsLCNp/6jz/y165FFxdkiikAWljgBx/gd99hs/3gZp4QjYyQz8dm\nf0PpjJSUlKb2miM+SMp0CObOnUs2BitiYmIAwMLC4unTp6SztB3LZZRRVlYmkUiYAmRmZiaRSIh0\n2hUKRVhYWJcuXZiaHhgY2Ib/quTkZNWAqYODA1vjFXU7lba2tq/sVMbFxQ0cOJCJ4e/v/+eff7Y/\nAyMzE6VS9PVFgaC2nhoY4LBhKJViTk4rnkcmQ4kEJRK2cpH39ttvN1pGeTzejz/+SCrVpUuXAKBD\nhw55eXmkMrDrnXfeAYA5c+aQDtJ2GimjjPj4eFVXy93d/enVq5o7VyPu358xeTJz9v79+8fGxrbn\nyaKjo5lPH9DuCV51O5UGBgav7FQys7KY607M9Ps2n7p5GRm4dy8OH458fm09NTbG8eNRYyfUdkzB\nalTHjh2JDJLKZDJXV1c9+BRc16NHj4RCIZ/P17aJ2y2nwTLKYAqQ2NUV+XwcNQrv39f0GTEnB8Vi\n5PG+9fbu3r17ZGQkK/3HmpqasLAw5vIoM2D6119/tfZJ7ty54+3tzfwoenl5Xb9+vZmDKysrJRKJ\nqakptGz6PVuKijAyEsViNDdHANyxAwHQ3x/VXkXV0o1G13C0b2GHVlAqlc2sFhs8eDD3g6QHDhwA\nACcnJ+Ljs+xi9ojw9vbWzkvTr8TFm726ujrv4EG0tEQANDLCFSuwpEQjZ6qpwc8/r51xY2ioDA4u\nKSpi9wxFRUUhISFGRkbQpglezIqpnj17vrJTGR4e3q1bN2jZ9HsNKSrC48cxMxMBcNo0PHCgXqve\nl1H8u2w1ZcGCBVyGyc/PZ3YaPHv2LJfn5UBZWRkz+BYREUE6S1tw+GYvKjj/aG8AAAs/SURBVMKQ\nEDQyQgC0ssLQUGT3N2pkJPbuXftxNCAA09LYfPL6kpOTVdf9WzvB6/Dhw6Wlpc0c8ODBg7feqr1h\n+iun33MDAIuL0dkZ6w7J/hPKaFlZWd1VDA0dUPvdokmLFy8GAH9/f87OyKVvvvkGABwcHJqf6qed\nOH+zP3yI48fXFjsnJ2Rl5m18PI4eXfucfftis5MoWXTlyhUPDw/mx4mVCV75+fmBgYECgQBaNv2e\nM0xNvHoVBw9+Oc/0n1BGEXHJkiXNlFF7e3tuYjx48MDAwEAgECTq6OKwV1Eqla+//joArF+/nnSW\nVuMhkZ2NoqMhOBgSEwEAfH1h3z74ux61TkkJSCRw+DDU1ICtLWzbBnPmgIC7u0bL5fJjx46tW7cu\nPz+fz+fPmDFj165dzHyvVlEoFF988cXmzZuLioqMjY3XrFmzatWqZu79zTEeD5i3yerVIBDA9u31\nHmxqezZt3TOrdVJSUlxdXZVKZaOt77zzTqNLnlg3e/bsuLi4hQsXHjx4kIPTEXHjxo2hQ4caGRlt\n2LCBWWXOGU9PT6aItxGxAi6ToVSKVlbMOCYGBmKrluTL5SiVorV17QydoCBsxyTKdqo7YNqGCV6x\nsbGqZQsBAQEpKSmai9o2qrdJdTUOGoTMwq5/SG8UESdOnMjKz2p7iEQioVDY1CSnhw8fcvyaaMjg\nwYO7kNhXceXKle2JTag3qlJUBJs2wcGDoFCAtTVs2ACLFr26OxkXB0uXwt27AABvvgn798PfZYig\n5OTkFStWXLx4EQB69+69fft21YTTpmRmZi5ZsuTChQsA4O7uLpVKVVPEtIqq4wkASUkwZQrcvAmW\nli97ow3fRI0+qKMuX748duxY1T/5fL6ZmZm1tTWzpRYHEDE3N7eysjI6Olq1CFhl0aJFhw8f/vnn\nn7XzzaMmJyfnzJkzixYtYroddeXl5Tk5OZWWlv7rX/+yt7fnMpWfn9+UKVPa/vXs/BJpp5s30du7\ndnBz8GC8dq3JI4uLcc6c2pmNnTvjN9+oT8Mh7cqVK+7u7sxrO2rUqPtNTPAqKytTra3q0KFDy9dW\nEaH2Ntm/H//9739Qb7TuzKdhw4b9/vvv3GfYs2cPALi4uDR8nzCjCo02aaHZs2cDwKJFixo2MTff\nnjx5Mvep2kmb3uxRUejoiAB44gQiYnw8vv02mpujuTm+/TY+eICIKJOhszOam2NoKL54QTZvU2Qy\nWVhYGDO4w6yIrftZjNlLlLkLLjP9vqCggGDallCriUol+vv/g8ooIt67d2/p0qWnT58mddFPJpMx\npXzv3r1qTTU1Ncxv7oZN2iYuLo7H45mamqrtRoaIN27c4PF4QqFQSzZ/aRUte7NXVODBg6hQYGoq\nikS4Zw/m5GBODu7diyIRMnt2XLuGzW7eoSUKCwuDgoKYy+5WVlahoaFVVVX37t0bOXIk068ZMWLE\nH3/8QTompTOuXLkCAJaWljkNVuk206Q9FAoFc6eTTz/9VK1JqVS+8cYbALB27Voi2dpJy8qoyowZ\n6suzN2zAmTPJhGmH27dvjxgxgqmbXbp0YRbR29vbHzt2jOPNTCk9MH78eAD4+OOPm2qaN28e96la\n6Pjx48xPQcMtfk6ePNlUk07Q1jJqZ6c+fz4tDbmao8c6ZsDUz8/P3Nw8NDS0oqKCdCJKJ6WlpRkb\nG/P5/Ibjs6mpqUwTwXtwNaOsrIy5KU54eLhaU3l5OdP09ddfk4jGAm0towYGqLbI8sULFAgIpWFB\nZWVlYWEhkTWdlD5ZtWoVAAwdOrThKC2zl/OwYcO0ZNVGXevXrwcALy+vhtk2bNjANOnu5zNtLaOd\nOulTb5Si2FJaWsrcFeb06dMNm5h5Qtp2Z5GMjAwTExMej9dwLx5V07Vm5udoPW29T/3o0XDiRL1H\njh+HBjPmKOqfxsLCYtu2bQCwatWqiooKtabNmzcDwIoVK168eEEmX2PWrFlTWVn5wQcfMNeRGm1S\n7Xymk0jX8SY8fIi2trh378sr9ba22Nitkyjqn0ahUHh5eQHAhg0b1Jrkcnm/fv0EAuEXXzS3ByOX\nYmNjAcDMzOzJkyctb9It2lpGEfH+fRw7Fs3M0MwM/f3x3j3SgShKW1y7do3H45mYmDS8S1hs7N3e\nvavMzVEbxuEVCgVzE0lJg/siNNOkc7S4jFIU1bSZM2cCQEBAQMOmDz5AAPzgA+5DqTt27BgA9OjR\n40WDxTJHjx5tqknnkF5TT1FUm2RnZ7u4uJSXl1+9etXHx6duU1YWuLhARQX88gv8vdqDgNLSUicn\np7y8vIiIiOnTp9dtev78uZOT07Nnzxo26SJtvcREUVSzHBwcVq5caWBg9OWX5Wrb+HXtCiEhAADB\nwaBQEEkHALBjx468vLzhw4dPmzatYdOzZ89GjBjRsEkX0d4oRemqysrKt9+Wx8ZafPklzJ1br6mq\nCtzcICMDjhyBefMIZEtLS3N3d5fL5b///rvqpraM1NRUDw+PRpt0FO2NUpSuMjExCQqyAICQECgs\nrNckFMKuXQAAn3yi3sSNNWvWyGSymTNnNiyUISEhMplMLBbrRw0F0NoJTxRFtcyYMQiAS5Y02RQU\nxHWkq1evAoC5uXlWVpZa0+XLlwHA0tIyNzeX61gaQ3ujFKXb9u0DgQAOHYL4+EaaDA0bb9IchUIR\nHBwMACEhIcxieRW5XL5s2TKmqQ332tFatIxSlG5zc4PAQFAoIDi4kaZ580Aub6RJc44dO3b//v2e\nPXuuXLlSremrr75KSkrq3bt3wyadRi8xUZTOKyoCJycoLIQff4RJk+o1FRSAkxOIRPDbb8DBbeJU\nM5m+/fbb999/v37Ioj59+hQVFX3//ffvvfeexqNwiPZGKUrnMbcxA4AVK6Cqql6TrS3ExEB8PBc1\nFAC2bt367NmzN998U62GMk1FRUU+Pj56VkOB9kYpSj/I5TBwIMTHw7Zt8MknZDKkpKT07dtXLpff\nvn2bWeipkpiY2L9/f0S8c+eOp6cnmXwaQ3ujFKUPBAKQSgEAtm2Dx4/JZFi9erVMJps9e7ZaDWWa\n5HL5Rx99pH81FGhvlKL0yaRJcO4czJypvs0kB2JjY318fMzMzJKTk9Uu0F+6dGncuHFWVlYpKSki\nkYjrZJpHyyhF6Y/0dHBzA5kMYmPh73uAceT58+dbtmwRiUQhzELUv9XU1PTr1+/hw4e7d+9m9ufX\nP7SMUpReWbMGdu6EgQPh99+BrwWDdvv37w8ODnZycoqPjzcyMiIdRyNoGaUovVJWBs7OkJMDx47B\nhx8SDlNQUODk5FRcXBwVFTVx4kTCaTRGC35bURTFHgsL2LgRACAignASAJBIJMXFxWPHjtXjGgq0\nN0pR+kehgK++gg8/BLKfoePj4wcMGMDn8+/fv+/q6koyioYJSAegKIplBgZkNsdTExwcrFAoFixY\noN81FGhvlKL0Eo8H/v5w6RLwePUe5OzH/dy5c5MmTbKxsUlNTe3YsSNHZyWEjo1SlH6ytoaDB8mc\nurq6evny5QAgkUj0voYC7Y1SlF7i8aC4GN54A86eBReXlw8iwn/+A+nprX22nUVFrfiahISEa9eu\nOTs7x8fHGxoatu5kOoiOjVKUfrKygiNHQCyGa9egbik7fRp++aV1T+XhcS4h4XqrvmTq1KmzZs36\nJ9RQoL1RitJLqmHQ1atBIIDt218+ePp0qxfdi0Tn5PK8Vn3J5MmTbbnZVEoL0DJKUXpIVUZlMhg6\nFKRSGD6c00tM/yi0jFKUHqpbMZOSYMoUuHkTLC1pGdUIeqWeovScmxvMnw9LlpDOob9oGaUo/bdk\nCeTmkg6hv+iHeoqiqHahvVGKoqh2+X+fHpAHGRcgDgAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m = Chem.MolFromSmiles('COc1ccc2c(c1)[nH]c(n2)[S@@](=O)Cc1ncc(c(c1C)OC)C')\n", "m" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "Note that the coordinates used for the drawing are not present in the molecule, the RDKit generates them when the molecule is drawn." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reading Mol file data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAQ9ElE\nQVR4nO3da1BTd/oH8CchgFxERLxgUVsuliL1Akio2qqFtozAtnaavguvdtNx2sad2bZxHWeO+qJi\nx+mEFzvbvHBnM860M3Fmt0aL3WJBqyCCtypixeANUARBkTsh+f1fHP9IEW3knJNzTvL9TF9ohCdP\nZfgm58d5HjWMMQIAgKnSyt0AAIC6IUYBFMBup9xcio6m6GjKzaV///t3f6rRTP5ZT3sc/EsndwMA\nQW/TJjp5kkpLac0a0miouposFjp1iv75T7k7A59ocDYKIKcDB+jvf6e6OoqOfvxgXx+tXEm7d9Of\n/kREpNHQpN+nT3sc/AsX9QCy+uYb4rjfZSgRRUcTx9E338jUEzwfxChMRUdHR0lJyfvvv7948eKy\nsjKPxyN3R6pVX0/r10/y+Lp1VF/v925gKnBRD8/H6/Xu3bt327ZtHR0dWq3W6/USkV6vLysr0+v1\ncnenQjodDQ2R7omfUrjdFBlJbjfRM3+UhO9fBcC7UXgO5eXl6enpJpOpo6Njw4YNjY2NTqfzpZde\nOnXqVG5ubnFx8fXr1+XuUW1iY6m7e5LHu7spNvbxbxmb5D9QBsQo+KSxsfGtt94qLCy8cuVKenp6\nRUXFDz/88PLLLxcXF1+6dKm0tHT69OmHDh1KS0vbvHlzb2+v3P2qR3Y2VVVN8nhVFWVn+70bmBIG\n8EwPHjwwm82hoaFENGPGDKvVOjIy8uSHtbW1mUwmrVZLRPPnz7fZbB6Px//dqs9//sPS01lf3+8e\n7Otj6ensv/999NunfZ/i+1cZ8GWApxodHbVarbNmzSKikJAQs9l87969Z39KXV3dqlWr+Ffo7Ozs\n6upq/7Sqbn/+M8vMZP/7H+vrY/39rKKCZWayv/zl8QcgRpUNXwaY3LFjx1asWMEH4uuvv37mzBkf\nP9Hr9TocjoULFxKRRqMxGAw3b96UtFW1unSJPXz46Nf/+hfLyWGRkSwykuXksL17f/eRiFFlw5cB\nJrp27VpRUREfoC+++KLT6ZxCkb6+Po7jpk2bRkSRkZEcxw0ODoreqlq1tzOjkWm17G9/k7sVEAFi\nFB57+PChxWKJiIggoujo6NLS0oGBASEFb926ZTQa+UResGCB3W4Xq1W1crvZP/7BZs1iREynY3/9\nq9wNgQgQo8AYY16v1263v/DCC/yVuNFobG1tFat4ZWXlsmXL+DBdt27d+fPnxaqsMg4HS01lRIyI\nGQysqUnuhkAciFFgNTU1K1eu5GPutddeq6urE/0pPB6P3W6fM2cOEWm1WqPRePfuXdGfRbkuXGB5\neY8CNCODHTkid0MgJsRoULtz547RaNRoNEQ0b948u93u9Xqle7r79+9bLJawsDAiio2NLS0tHR4e\nlu7pFKGzk5lMTKdjRCw+ntlszO2WuycQGWI0SA0ODnIcFxUVRUTTpk3jOK5vwq2Lkrly5UphYSH/\n5nfx4sUHDx70z/P6m9vNrFYWF/foGNRsZl1dcvcEkkCMBiOHw5GcnMwHmcFguHr1qv97qKioWLJk\nCd9Dfn5+Q0OD/3uQUHk5S0t7dBVfUMAuX5a7IZAQYjS4/Prrr+v/f5/Q0qVLf/75ZxmbGRkZsVqt\nM2bMIKLQ0FCz2fzgwQMZ+xGHy8WKih4FaFISm9LtYqAuiNFgcffuXaPRyA9rzp492263K2RY8969\ne2azOSQkhIhmzZpltVpHR0flbmpKenqY2cxCQxkRi4lhVisL+JNfYIwhRoPB8PBwaWnpzJkziSgs\nLMxisXR3d8vd1ERnz5594403+LfJy5cvP3bsmNwdPY/RUWazsYQERsS0WmYysdu35e4J/AcxGuD4\nPUx8PG3YsOG3336Tu6Nn4dfu8d0WFRVdu3ZN7o58cPw4y8p6dBW/Zg07fVruhsDfEKMB69KlS/n5\n+Xwk8avtxKp89OjR5cuXV1VViVVwvIGBAX7tHhFFRERYLJaHY4PnSnPjBjMYmEbDiNiiRczhYFLe\nLgaKhRgNQD6utpuCtra2sftMCwsLRan5h0+kxLV7vb3MYmGRkYyIRUQwjmP9/XL3BLJBjAaUKay2\n89HDhw/NZjN/5/z06dOFj9v7YvzavZUrVypk7Z7T6WwtLn50FV9UhJlOQIwGjimvtns2j8djs9nm\nz5/Pz3GaTKa2tjZRKvtCUWv3Tp48yf97U28sWOB5/XVWWytXJ6AoiNFAIMpqu0lVV1ePjduvWrVK\ninF7X4xfuxcVFeX/tXtdXV2ffvqpTqcjopkzZ5aVlYl1TgIBADGqbqKvthtz8+ZNg8HAn04uXLjQ\n4XBIOm7vC1nW7g0NDY1NzYaHh3Mc19vb64fnBRVBjKqVdKvtZBy390VlZeXSpUv5MF2/fr2ka/cc\nDkdqaurY1GwTjkFhMohRVZJutZ3dbl+wYAFf2Wg03rp1S6zKIvLD2r0LFy7k5eXxfw8ZGRlHsNoO\nng4xqjLSrbabMG5fWVkpSlnpdHd3S7F2r6Ojw2Qy8ceg8fHxNpvNjdV28EyIUdWQ7lp7/Lj9nDlz\nlDNu74sJa/cOHTo05VJut9tqtcbFxRGRTqczm81dWG0HPkCMqoNEq+1UMW7vi4qKivT0dCFr98rL\ny9PS0vgKBQUFl7HaDnyGGFU66VbbqWvc/g9Nee2ey+Uau10sOTlZxNvFIEggRpWro6NDotV20o3b\ny+651u719PSMTc3GxMSIODULQQUxqkTSXWtLN26vKOPX7q1YseLJtXujo6M2my0hIYGfmjWZTLex\n2g6mCjGqOBJda0s3bq9YT1u7d/z48czMTP7xNWvWnMZqOxAGMaog0l1r//LLL2PBIeK4vfJNWLu3\nadOmgoIC/u9h0aJFShjNggCAGFUKt9u9c+dOIoqMjNyxY4dYM53Xr1+XaNxeRZqbmzdu3MjfxsSf\nk3z22Wc9PT1y9wUBAjGqFPX19UQ0d+7clpYWUQr29vZKNG6vUt999x3/oyTMdIK4dARKkpiYmJiY\nKLAIY2zfvn1bt25ta2vjx+137drFT98HM3516YwZM8bG5AFEgRgNNLW1tZs3b66rqyOi3NzcsrKy\nnJwcuZsCCGRauRsA0bS3t5eUlPBbQflx+5qaGmQogNTwbjQQDA8P79q1a8+ePf39/eHh4Vu2bPn8\n88/56XsAkBpiVPX279+/detWl8tFRAaD4csvv0xJSZG7KYAgghhVsdbW1k8++eTAgQNElJKSsmfP\nnnfffVfupgCCDs5GVSwqKurEiROzZ8+22WyXL19GhgLIAu9GVWzmzJnffvvt0qVL582bJ3cvAMEL\nMapub7/9ttwtAAQ7XNSDuqWmpiYnJzPG5G4EghfejYK6NTc38wN5/L9PBeB/eDcKACAIYhQAQBDE\nKACAIIhRAABBEKMAAIIgRgEABEGMAgAIghgFABAEMQoAIAhiFABAEMQoAIAgiFEAAEEQowAAgiBG\nAQAEQYwCAAiCGAUAEAQxCgAgCGIUAEAQxCgAgCCIUQAAQRCjAACCIEYBAARBjAIACIIYBQAQBDEK\nACAIYhQAQBDEKACAIIhRAABBEKMAAIIgRgEABEGMAgAIghgFABAEMQoAIAhiFABAEMQoAIAgiFEA\nAEEQowAAgiBGAQAEQYwCAAiCGAUAEAQxCgAgCGIUAEAQxCgAgCCIUQAAQRCjAACC6ORuAECQ5ORk\nr9er0WjkbgSCF2IU1O3q1atytwDBDhf16vbTTz+1t7fL3QVAUEOMqtidO3c++OCDpKSkLVu29PX1\nyd0OQJBCjKqYx+N58803BwcHd+/evWLFigMHDsjdEUAwQoyqWGJi4vfff3/q1Knc3FyXy/Xee+/p\n9fra2lq5+wIILohR1cvJyampqbHb7fPmzaurq1u1alVJSQkOTAH8BjEaCDQaTUlJicvl4jguLCxs\n3759KSkp27dvHx4elrs1gMCHGA0cUVFR27dvb2hoMBgM/f39O3bsyMjI2L9/v9x9AQQ4xGigSUlJ\ncTgcR44cefXVV10u14cffpiXl3fx4kW5+wIIWIhRZWltbW1tbRVeJy8v7+zZszabbfbs2ZWVlZmZ\nmR999FFnZ6fwyupVU1MjdwsQoBgog9vt3rlzJxFFRkbu2LFjYGBAlLJdXV1ms1mn0xFRXFyc1Wp1\nu92iVFaR5ubmjRs3ElFYWFhdXZ3c7UCgQYwqyO3bt00mk1arJaKEhASbzTY6OipK5cuXLxcUFPAv\nnGlpaeXl5aKUVb7Ozk6TycS/isTHx9tstpGREbmbgkCDGFWc06dPr1mzho+8rKys48ePi1XZ6XQm\nJyfzlYuKilwul1iVFWh0dNRqtcbFxRFRSEiI2Wzu6uqSuykITIhRJfJ6vQ6HY9GiRUSk0WgMBsON\nGzdEqTwyMmK1WmNiYogoNDTUbDb39PSIUllRDh8+/Morr/AvGO+8805jY6PcHUEgQ4wqV39/P8dx\nERER/IGpxWLp7e0VpfKTpwcej0eUyrJrbm4uKiriAzQpKcnpdMrdEQQ+xKjStbS0GI1Gfp9mYmKi\n3W73er2iVD59+vTq1aulOD2QRU9Pj9lsDgsLI6KYmBir1To8PCx3UxAUEKPqUFtbq9fr+cjT6/Un\nT54Upax0pwf+5PF4bDZbQkICEWm1WpPJ1NbWJndTEEQQo6rh8XjsdvvcuXP5sDAaje3t7aJUnnB6\nwHGcWLdb+cGJEyeysrL4F5jVq1fX19fL3REEHcSoyvT29nIcFx4eTkTR0dEcxw0NDYlSWbrTA4nc\nuHHDYDDwDS9cuNDhcCi8YQhUiFFVampqMhgM/Fuw1NRUh8MhVuWjR48uX76cr7x27dpz586JVVlE\nAwMDHMdFRkYS0bRp0ziO6+/vl7spCF6IURU7cuRIRkYGH3n84LwoZaU7PRDO6/Xa7fbExET+MNdo\nNLa0tMjdFAQ7xKi6ud1um80WHx9PRDqdzmQydXZ2ilL5/v37FouFPz2IjY0tLS0V6/Rgys6dO7d2\n7Vr+ZWPZsmVHjx6Vtx8AHmI0EEg3ON/U1DR2G2Zqaqpct2G2t7cbjUb+Rte5c+fa7faAudEVAgBi\nNHBINzhfUVGxZMkSvnJ+fn5DQ4NYlf/Q0NBQaWlpbGwsEYWHh1sslvv37/vt2QF8gRgNNE6nMykp\nSfTBeX6KlI8zforUD3HmdDpTU1PH/l+ampqkfkaAKUCMBqDh4WGJBufb2trGbooqLCwUpeakLl68\nmJ+fzwfokiVLKioqpHsuAIEQowFLurV7jY2NBQUFtbW1olSbgF9tFxoaitV2oBaI0QCnosF5rLYD\nlUKMBj5VDM7/+OOPWG0HKoUYDRbSrd0TCKvtQO0Qo8FFUYPz41fbTZ8+HavtQKUQo8Fo/OC8Xq+X\n6IdFz8Cvtps/fz5W20EAQIwGKRkH50+cOJGdnY3VdhAwEKNBTbq1e5O6efMmVttB4EGMgoRr98YM\nDg5itR0EKsQoPCLR2r0nV9vdunVLlMoACoEYhcdEX7t3/vz58avtqqqqROoUQEEQozCRKGv3xq+2\nmzNnDlbbQQBDjMLkJqzdO3z4sI+fiNV2EGwQo/Asz7t27+DBg+NX2125csU/fQLICDEKf8DHtXsN\nDQ1YbQfBCTEKPnly7d7YWef41XaxsbFWqxWr7SCoaBhjBOCbM2fObN68ubq6moiysrK+/vrrc+fO\n7dy5s7u7OyQk5OOPP+Y4jt90BxA8EKPwfLxe7969e7dt29bR0aHVar1eLxHp9fqysjK9Xi93dwAy\nQIzCVAwMDHz11VctLS1NTU1ffPFFcXGx3B0ByAYxCgAgiFbuBgAA1A0xCgAgCGIUAEAQxCgAgCCI\nUQAAQf4PC9kr8BbZ8O0AAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "molblock = \"\"\"phenol\n", " Mrv1682210081607082D \n", "\n", " 7 7 0 0 0 0 999 V2000\n", " -0.6473 1.0929 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n", " -1.3618 0.6804 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n", " -1.3618 -0.1447 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n", " -0.6473 -0.5572 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0.0671 -0.1447 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0.0671 0.6804 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0.7816 1.0929 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n", " 1 2 1 0 0 0 0\n", " 2 3 2 0 0 0 0\n", " 3 4 1 0 0 0 0\n", " 4 5 2 0 0 0 0\n", " 5 6 1 0 0 0 0\n", " 1 6 2 0 0 0 0\n", " 6 7 1 0 0 0 0\n", "M END\n", "\"\"\"\n", "m = Chem.MolFromMolBlock(molblock)\n", "m" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here the molecule has coordinates that were read in from the Mol block. We can see this because the molecule has a conformer:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.GetNumConformers()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The conformer that is present is 2D (we can see that from the coordinates above):" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.GetConformer().Is3D()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here's another Mol block and the corresponding molecule:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAXbElE\nQVR4nO3daVRU5xkH8AcIOxhmABHFuERNSmrVaBQEQeJKxLhlZFgGbXKi8QvantSlJxbb05xjbD+A\nOY2trZ4QQQeniYqGGFdWkSgwmFAEURGRnRlggIFhuf1wDaWICMxy5975/04+yJ3tYTn/vO8zd+5j\nxTAMAQDAWFlzXQAAAL8hRgEA9IIYBQDQC2IUAEAviFGwRO3t7Q8ePFCr1T09PVzXArz3EtcFAJha\nWVnZqVOnDhw40H/EwcHB0dHRwcFB9LOBXz7vJnd3d3t7e+6+DzAXVjjhCSzNb37zm/j4eDc3Nysr\nq9bW1t7e3rE9j42Nzbhx49zc3Fx+5ubm5urq6uLi4urq6urq+uxN/bca9jsCbiFGweLMmTPnzp07\nWVlZgYGB7BGtVtvZ2anVatU/G+bL/n83NTXpdLoxlzHkaveFi2IvLy8bGxsD/STAMBCjYFkaGxvH\njx/v5OSkUqns7Oz0fLZB+TuS8O3/cswvygbrqMJXJBJ5enra2trq+f3CkNAbBcuSkZHBMMzixYv1\nz1AicnR0dHR0FIlEEydOHO1jxxa+LS0tnZ2dNTU1Y6gWLWAjQYyCZUlPTyeipUuXclzHzxE85vwd\nVfhqtdqGhobOzk52FTyGFB5t+LL/ZhvQo30t3kGMgmUxnxgdMzZ/x/DA4VvAz7uJbQGzS2B98lfA\nLWD0RsGC1NXVeXt7Ozs7q1QqNApHqLu7u62tTa1Wt/2spaWltbVVo9GwXw66qaWlpf/LMb+ovb39\npk2bkpOTDfiNGA9Wo2BBMjMzGYYJCAhAho6cra0tu04c28NH2wJWqVRqtbqrq+vixYtarXZs624T\nQ4yCBRHAjp53RtUCViqVUqm0trbW2dn50KFDvMhQwodBwaIgRs1ZQkKCv79/aWnp3Llz8/PzP/jg\nA64rGin0RsFS1NbWTpw40dXVtamp6aWXsA8zI2q1+oMPPjhz5gwRxcbGfvbZZw4ODlwXNQr4YwJL\nwZ4xGhAQgAw1K7m5uVKptLKyUiQSHT9+fP369VxXNGrY1IOlYHf0wcHBXBcCT/X19R04cCAoKKiy\nsnLx4sVKpZKPGUpYjYLlYGM0JCSE60KAiKiurk4mk12+fNna2jouLu6TTz7h7y4BvVGwCNXV1ZMm\nTRo3bhwao+YgMzMzKiqqqqrKy8srKSlp+fLlXFekF2zqwSJkZmYSUWBgIDKUW729vQcOHHj77ber\nqqqCgoJu377N9wwlbOrBQuBUJ3NQXV0dHR19/fp1GxubuLi4/fv38+gTn8NAjIJFuH79OqExyqnv\nvvtuy5YtDQ0NEydOTEpKEtLvAr1REL4nT574+Pi4ubk1NjYKY/nDL93d3R9//PHnn3/OMExoaGhi\nYqKnpyfXRRkSVqMgfBkZGUQUGBiIDDW9hw8fSqXSH374wdbW9i9/+UtsbKzwLp2HGAXhQ2OUKwqF\n4sMPP2xpaZk2bZpcLl+4cCHXFRkF3qkH4UNj1PR0Ot3OnTvDw8NbWlokEklhYaFQM5TQGwXBq6ys\nnDJlikgkamxstLbGusEUysvLpVJpfn6+vb39Z599JsiN/EDY1IPAsWeMLlmyBBlqGqdOndq+fbtG\no5kxY0ZKSsqbb77JdUVGhz8sEDg0Rk2mo6MjJiYmMjJSo9FEREQUFBRYQoYSVqMgeIhR0/jxxx/D\nw8NLSkqcnJyOHDkSExPDdUWmg94oCNmjR4+mTp3q7u5eX1+PTb3xJCQk7Nu3T6vVzp49OyUl5Re/\n+AXXFZkU/rBAyNgzRtEYNZ62traYmJhdu3ZptdrY2Ni8vDxLy1DCph6EDTt6o1IqleHh4WVlZS4u\nLl988YVMJuO6Im7gf9EgZIhR40lISPDz8ysrK2NHJ1lshhJ6oyBgFRUV06ZN8/DwqK+vF/Z5iyam\nVqvff//9s2fPElFsbOyhQ4fs7e25LopL2NSDYLFL0aCgIGSoAd24cSMiIoLXo5MMDpt6ECzs6A2L\nHZ0UHBzM99FJBofVKAjWtWvXCDFqILW1tTKZ7MqVKwIYnWRw6I2CMD18+HD69Omenp51dXXY1Osp\nMzMzMjLyyZMnEyZMOHHihADGfhgWNvUgTOxVnYKDg5Gh+ugfnfTkyZMVK1YolUpk6LOwLAdhwlR6\n/VVXV0dFRaWnpwtsdJLBIUZBmHCNUT0NHJ2UnJyMFvMwsKkHAbp//z47A93X15frWvinu7t7586d\na9asaWhoCA0NVSqVyNDhYTUKAoQzRsfMEkYnGRxiFAQIZ4yOzcDRSSkpKW+99RbXFfEDNvUgQOwZ\no2iMjlxXV9f27ds3b97cPzoJGTpyOG8UhObevXuzZs2aMGFCdXU1NqQjUV5eHh4eXlBQYG9vf/jw\n4W3btnFdEc9gUw9C03+qEzJ0JE6ePPnRRx9Z1Ogkg8OmHoQGjdERam9vj4mJiYqK0mg0kZGRljM6\nyeCwGgWhwRmjI9E/OsnZ2fmLL76wqNFJBofVKAhKaWlpTU3NxIkTX3vtNa5rMV8JCQmLFi0qKSmZ\nPXv2rVu3kKF6QoyCoOAzoMNj34i38NFJBodNPQgKGqPDKCwslEqlZWVlbm5u//znP9977z2uKxII\nrEZBOBiGQYwOiWGYhIQEf3//srKyefPm5eXlIUMNCDEKwlFaWlpbWztp0qRZs2ZxXYsZUalUGzZs\n2LVrl06ni42Nzc3Nxc/HsLCpB+Fg36PHUnSgnJycyMjIyspKsVh8/PjxdevWcV2RAGE1CsKBHf1A\n7OikpUuXVlZWBgQEFBYWIkONBB8GBYFgGGbChAn19fX37t2bMWMG1+VwbODopP3792N0klHhJwsC\nUVJSUl9f/8orryBDL1++LJPJ6urqMDrJNLCpB4HoH77EdSFcYkcnhYaG1tXVrVy5EqOTTAOrURAI\nNEYxOokr6I2CEDAM4+Xl1dDQcP/+/enTp3NdDgfS0tK2bt2K0UmcwKYehKC4uLihoWHKlCkWmKE6\nnW7nzp1hYWENDQ3vvPNOUVERMtTEsKkHIbDYxuiDBw+kUumtW7fs7OwOHTqE0UmcQIyCEGRkZJDl\nNUZPnz69bdu2lpaW6dOny+VyjP3gCnqjwHt9fX3jx49vamp6+PDh1KlTuS7HFLq6umJjY48ePUpE\nmzdvPnr06Msvv8x1UZYLq1HgvZ9++qmpqWnq1KkWkqH37t2TSqUFBQWOjo5///vfcbVQzuEtJuA9\nizrVKTk5ef78+QUFBTNnzszJyUGGmgPEKPCehcQoOzopOjpao9FERUXl5+fPmzeP66KACL1R4Lu+\nvj5PT0+VSlVZWTl58mSuyzGWO3fuhIeH3717F6OTzBBWo8Bvd+7cUalU06dPF3CGsqOT7t69+6tf\n/er27dvIUHODGAV+E/aOvrm5+b333tu1a1dnZyc7Oun111/nuigYDO/UA78JOEbz8vKkUmlFRYWb\nm9u//vWvTZs2cV0RDA29UeCx3t5eDw+P5ubmx48f+/j4cF2OwTAMc/jw4T179nR1dS1atEgul1vI\nuVw8hU098FhRUVFzc/OMGTOElKGDRidlZGQgQ80cNvXAY8Lb0efk5ERERDx+/Bijk3gEq1HgMSHF\naP/opMePHwcEBCiVSmQoX6A3CnzV29vr7u7e0tJSXV3t7e3NdTl6qa2tjY6Ovnr1KkYn8RF+VcBX\nSqWypaVl5syZfM/QS5cuyWSy+vr6CRMmJCUlLVu2jOuKYHSwqQe+Yq8xGhISwnUhY9fb27t3797V\nq1fX19evXLmyqKgIGcpHWI0CX/G9MfrkyZOoqKiMjAwbG5tPP/109+7duOIyT6E3CrzU29srFotb\nW1t52hhNS0vbsmVLY2PjpEmTkpOTLfC6/UKCTT3wUkFBQWtr62uvvca7DO0fndTY2LhmzRqlUokM\n5Tts6oGXeNoYxegkQUKMAi/xsTGakpKybdu21tZWjE4SGPRGgX+6u7vFYnF7e3ttbe348eO5LufF\ntFrt9u3bT5w4QUTh4eFHjx4dN24c10WBwfCzN/q8fdCg44mJ5OdHLi7k4kJ+fvTll8avDEyhoKCg\nra3t9ddf50WG3rt3LyAg4MSJE46OjomJiXK5HBkqMMLd1O/YQbm5dPAgBQaSlRXl5NCePZSXR0eO\ncF0Z6IttjPJiR5+cnLxjxw6NRjNz5syUlBSM/RAkfq5GX+jcOcrIoOxsWr2aXFzI2ZlWrqSsLEpP\np9RUrosDffFiKv3A0UnR0dEYnSRg/OyNWlnRkGX3Hw8Npa1bKTx88B3kcvrqK0pLM3qFYDS8aIxi\ndJJFEehq9NYtGvJUmKVL6dYtk1cDhnT79u22tjZfX1+zzVCMTrI0vI1RK6sh/uvX3Exi8RCPcnen\n5maT1QjGYM6nOmF0kmXi7VtMz9vUs9zcSKWiZ1crKhW5uRm3MDAys43RmzdvRkREsKOTjh07tnHj\nRq4rAhPh7Wp0eAsW0PXrQxy/fp0WLCAiunGD/vpXevTIxHWBnnQ6XXZ2tpWVlVl9gJJhmISEhKVL\nl1ZUVPj5+RUWFiJDLQvDR88ru//4N98wvr5MW9v/3drWxvj6MmfOMAzDbNnCEDFEjK8vExfHlJQY\ns1wwmOzsbCKaPXs214X8T1NT07vvvktEVlZWe/bs0el0XFcEpibQ1eiGDbR4MQUF0aVL1N5OHR10\n5QoFBVFAAK1fT0S0bh2tW0cODvSf/9Af/0i+vrR4McXHU1UV16XDcMxtR5+dnT137tzU1FSxWHz2\n7NmDBw/a2tpyXRSYHNc5PiYvXI2yjh9nFi5knJwYJydm4ULm2LHB99dqmdRURiZjXF2fLk7716el\npUapHPSzfPlyIvr666+5LoTp7e2Ni4tjR30EBgZWVlZyXRFwhp/njRpcZyddvkwKBZ09SxrN04O+\nviSRUHQ0zZjBaXHwVFdXl1gs7uzsbGhoEA95Joap1NTUyGQyjE4CFmL0/2m1dOUKKRR05gy1tT09\nyOapTEavvsppcZYuKysrKChozpw5SqWSwzK+//77mJiY+vp6b2/vpKSkt99+m8NiwBwItDc6Zo6O\ntHYtffUV1ddTairJZOTi8rR/OmMGvfEGHThADx5wXaWF4rwxyo5OCg0Nra+vX7VqlVKpRIYCEU97\no6bU0fG0f+rs/L/+6fz5THw8U1XFdXGWhc2sM+y5FiZXVVXFnmVla2sbHx/f19fHSRlghrCpH7H+\n/f4331B7OxGRtTX5+5NEQhIJTZzIdX0C19nZKRKJdDodJ43Rb7/9duvWrezopJMnTwYFBZm4ADBn\niNHR6+igq1eHztPNm4lvo4H4IjMzMzg4eO7cuYWFhaZ8XZ1O97vf/e7zzz9nGGbNmjVffvmlh4eH\nKQsA84fe6Og5Of1f/1QiIVtbysmhXbvIx4cCAykhgWprua5SaDgZvnT//v2AgIDDhw+zG/nz588j\nQ+FZWI0aQksLnTtHCgVdukQ6HRGRjQ35+ZFEQlIpeXlxXZ8QhISEpKennzt3jv3IkAn0j0569dVX\n5XL5AvZjxADPQIwaVHMzpaYOnacREUNcKgVGRqvVikSinp6epqaml19+2dgv19HR8dFHH2F0EowQ\nYtQ4hsnTyEjy9OS6Pp5JT08PCQl588038/Pzjf1axcXF4eHhxcXFTk5OR44cwdVC4YXQGzUONzeK\niaHz56m2lhITKSyMbGye9k+9vZ/2Txsbua6SN9gzRk3QGE1KSvLz8ysuLn7jjTd++OEHZCiMBFaj\npqJW0/nzpFDQ999TdzcRkb09rVhBEgmtW0fG36jyWnBwcGZm5vnz58PCwoz0Eu3t7Tt27GA38tHR\n0UeOHHFxcTHSa4HAIEZNTqWiCxeGztP16wk9uGeYoDGK0UmgD8Qod57NUwcHWr6cJBLasIFcXbmu\nz1xcu3Zt2bJlCxYsuGWcOVoJCQl79+7t7OycM2eOXC7H2A8YLfRGuSMWD+6f9vTQhQu0ZQuNH//0\n1NT+y01ZMON9lL65uXnTpk39o5Nu3ryJDIUxwGrUnDQ10bffkkJBFy9STw/RgPXpxo1kqa26oKCg\nrKysCxcurFmzxoBPe/PmTalU+ujRI4xOAj0hRs3Ss3nq6EjLlpFEQps2kbMz1/WZTnt7u1gs7uvr\nU6lUrgZqdDAMc+jQof3793d3d/v5+cnl8ilTphjkmcEyIUbNW2MjpaVZcp5euXJlxYoVCxcuzMvL\nM8gTNjU1vf/++6mpqVZWVrt37/7zn/+MKy6DntAbNW8eHk/7pzU1lJhIy5dTV9fg/mlHB9dVGpFh\nG6PZ2dnz5s1LTU11d3c/d+7cwYMHkaGgP6xG+eb+fVIo6PRp6r/QkafnH6OiAsPCli5damNjw2lx\nhhcYGJiTk5OWlhYaGqrP8/T19f3pT3/69NNPe3p6AgMDT548OXnyZEMVCRYOMcpbVVX09dekUGg0\nmnF37hCRm5vb2rVrJRLJqlWr7OzsuK7PANrb20UiERGpVCp9ToavqamJjo6+du0aRieBMSBGea+m\nvPx4SopCoSgqKmKPeHl5bdq0SSKRLFmyhNfr00uXLq1atcrPzy83N3fMT4LRSWB0pr/gPhhJZWVl\nfHx8QECAlZUV+8sViUQymSw1NbWrq4vr6sZi3759RLR3796xPVyn08XGxrI/jVWrVtXV1Rm2PAAW\nYlSAHj169Lw81el0XFc3Cv7+/kR08eLFMTy2qqqKHfWB0UlgbNjUC1llZeWZM2cUCsWNGzfYX7RI\nJAoLC5NIJKtXr7a1teW6wOFoNBqxWGxlZTWGxuiFCxd+/etfY3QSmAjXOQ6mUFFRMWh9KhaLzXx9\n+t133xGRv7//qB7V1dXVv5EPCwtraGgwUnkA/RCjloVHebp3714i+v3vfz/yh5SXl7OjPuzs7LCR\nB5PBpt5CVVRUnDt3TqFQ5OTksEfc3d3feecdiUQSGhpqDucD+fn55eXlXbp0acWKFSO5v1wu3759\nO0YngekhRi2deeZpa2uru7u7tbW1Wq12cnIa/s4DRydJpdJ//OMfGJ0EJsX1chjMxYMHD9j9fv/f\nhoeHB7vf7+7uNnExaWlpRBQQEPDCe/7444++vr5E5OTklJiYaILaAAZBjMJgbJ7Onz//2Tzt6ekx\nTQ27d+8mok8++WT4ux09etTR0ZGIfvnLXxYXF5umNoBBEKPwXD/99FNcXBy71mNNmjQpNjY2Kyur\nt7fXqC/91ltvEdGVK1eedweNRiOTydiqPvzww46ODqPWAzAM9EbhxYqLixUKxenTp0tKStgjPj4+\nGzdulEgkA9/0N5Tm5mYPD4+XXnpJrVazi81BioqKwsPDS0tLXVxc/va3v2F0EnCM6xwHPmHXpwMn\nbfj4+LDrUwOeXXThwgUiWrJkyZC3xsfHOzg4ENGcOXPu3r1rqBcFGDPEKIzFs3k6efJkQ+Xpxx9/\nTER/+MMfBh1XqVQbNmxgXy42Nlar1er5QgAGgU096IXd78vl8tLSUvbI5MmTN2zYoM9+f8GCBfn5\n+deuXQsJCek/mJubGxER8ejRI5FIdOzYsf48BeAcYhQMg83TU6dOlZWVsUdeeeWV9evXjzZP2cao\nra2tWq1mN+8Dr7js7+9/6tQpjE4C88L1chiEht3vz5o1q/9vbMqUKSPf76emphJRcHAw+2VjY+Pa\ntWuJyNraOi4uzvRnsAK8EGIUjIXN05kzZ44qT3/7298SUVxcHMMwmZmZPj4+ROTu7p6ammq60gFG\nA5t6MDp2v5+cnFxeXs4emTp16rvvviuRSAIDAwfdef78+QUFBVevXs3MzGQ38kuWLDl58iSbpwBm\nCDEKpsPmaVJS0v3799kjg/JUrVZ7eHjY2dktWrQoIyODHZ20f/9+Xo9CAcFDjAIHns3TadOmrV27\n1tvbe9++fba2tt3d3d7e3snJyQPfrAcwT4hR4Exvb296erpCofjmm28aGhoG3oSNPPAIYhS419fX\nd+PGDYVC8e9//zs4OFgsFsfHx5vDNU8BRgIxCmakp6cH6Qm8gxgFANCLNdcFAADwG2IUAEAviFEA\nAL0gRgEA9PJfr3ZrGz4eCZ0AAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "molblock2 ='''cyclopropanol-3D\n", " Mrv1682210081607153D \n", "\n", " 10 10 0 0 0 0 999 V2000\n", " 0.9159 1.2233 0.2286 C 0 0 1 0 0 0 0 0 0 0 0 0\n", " -0.0738 1.2748 -0.9428 C 0 0 2 0 0 0 0 0 0 0 0 0\n", " -0.3167 0.3104 0.2232 C 0 0 2 0 0 0 0 0 0 0 0 0\n", " -1.3658 0.6623 1.1190 O 0 0 0 0 0 0 0 0 0 0 0 0\n", " 1.8730 0.7413 0.0155 H 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0.8835 2.0739 0.9139 H 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0.2747 0.8266 -1.8737 H 0 0 0 0 0 0 0 0 0 0 0 0\n", " -0.7163 2.1557 -0.9817 H 0 0 0 0 0 0 0 0 0 0 0 0\n", " -0.1264 -0.7446 0.0137 H 0 0 0 0 0 0 0 0 0 0 0 0\n", " -1.1210 0.2486 1.9735 H 0 0 0 0 0 0 0 0 0 0 0 0\n", " 1 2 1 0 0 0 0\n", " 2 3 1 0 0 0 0\n", " 1 3 1 0 0 0 0\n", " 3 4 1 0 0 0 0\n", " 1 5 1 0 0 0 0\n", " 1 6 1 0 0 0 0\n", " 2 7 1 0 0 0 0\n", " 2 8 1 0 0 0 0\n", " 3 9 1 0 0 0 0\n", " 4 10 1 0 0 0 0\n", "M END\n", "'''\n", "m2 = Chem.MolFromMolBlock(molblock2)\n", "m2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The odd coordinates in the drawing is because the molecule has a 3D conformer:" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m2.GetConformer().Is3D()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Notice that no Hs were drawn even though there are clearly H atoms in the Mol block. The default behavior for the RDKit Mol block reader is to remove Hs from the input molecule. If we want to keep the Hs and their coordinates, this behavior can be turned off:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAASVUlE\nQVR4nO3de1BUdRsH8GeX28IuCwTirFqgkOa6sIWXdAYcNXOosck0LqLSRUOycXOmi+aMruOrhTpj\npBbi5R1xvCA0vr2kZb1TkCRo4CSCKKEIiYCAcltYFmHP+8cpMuK2u+fsYXe/n+mP7ey5PIzLl9/l\n7PmJGIYhAAAwl1joAgAAbBtiFIiIRCKRSdsBoBdiFADAIohRAACLIEYBACyCGAWHgMFf4I+z0AXA\nSIFAATAPYhT+0O8dxMhWgCGhUw8AYBHEKACARdCpB0eBAQrgCVqjMDSDwSB0CRxg+iN0UWAPEKNA\nNMD8EhHdv39/0aJFixYtQuIADAQxCoNpbm7Oyck5d+5cSkqK0LUAjFAitDJgcKdPn16yZImbm1te\nXl5YWJjQ5ZhJJOr/oz7QdoDhQ2sUhrB48eLExESDwRATE9Pa2ip0OQAjDmIUhvbpp5+q1eqbN2+u\nXbtW6FrM9GiTc9GiRc8//7xOp6OBB4UBhg89GhiW0tLS6dOnd3R0HD16dMWKFUKXYxGFQlFXV1dT\nU6NQKISuBewBWqMwLEqlcvfu3US0Zs2asrIyocuxiIeHBxF1dHQIXQjYCcQoDNfq1atjY2N1Ot2y\nZcu6urqELsd8bIzq9XqhCwE7gRgFE6SkpAQGBl6+fHnjxo1C12I+tEaBW4hRMIG3t/epU6dcXFx2\n79799ddfC12Omdzd3QkxCtxBjIJpZsyYodVqGYZZuXJlTU2N0OWYA61R4BZiFEz20UcfPffccw0N\nDcuWLevp6RG6HJNhbBS4hRgFk4nF4rS0ND8/v5ycnF27dgldjsnQqQduIUbBHGPHjj169KhIJNq0\naVN+fr7Q5ZgGnXrgFmIUzPTCCy9oNJru7u7Y2NimpiahyzEBYhS4hRgF8+3YsSMsLOz3339PSEgQ\nuhYTYGwUuIUYBfO5ubmdOnXK09Pzyy+/PHz4sNDlDBc7NooYBa4gRsEiwcHBe/bsISKNRnP9+nWh\nyxkWTDEBtxCjYKnXX399+fLlHR0d0dHRNtHEw9gocAsxChxISUmZOHFiSUnJhx9+KHQtQ0OMArcQ\no8ABmUx24sQJV1fXffv2ffXVV0KXMwRMMQG3EKPAjalTp27fvp2I3nzzzaqqKqHLGQzGRoFbiFHg\nzHvvvbdw4cKmpqYVK1aM5C+JolMP3EKMAmdEItHhw4cVCkVubu62bduELmdA6NQDtxCjwCV/f//j\nx4+LxeKtW7dmZ2cLXU7/0BoFbiFGgWNz5859//33jUZjfHz8/fv3hS6nHxgbBW4hRoF727dvnzVr\nVnV19WuvvTYC10xEaxS4hRgF7jk7Ox87dszLy+vs2bMpKSlCl9MXxkaBW1hgGfiSmZkZHR3t5uZ2\n6dIltVotdDl/6ezsdHd3d3Nz6+zsFLoWsAdojQJfoqKi3njjDYPBEBcXN6J60BKJxMnJyWAwjOS7\nssCGIEaBR3v37p08eXJpaem6deuEruVv8JAn4BBiFHgklUozMjLc3d0PHjx48uRJocv5CybrgUOI\nUeCXSqVKSkoiorfffvv27dtCl/MHTNYDhxCjwLu1a9e+/PLLLS0tMTExDx8+FLocIkzWA6echS4A\n7J9IJDpw4MClS5cKCgqmTJkSGhoq+5OPj0/vay8vL7lczr729PT09vYWiUQ8lYTWKHAIMQrW4O/v\nv3r16h07dpSXl5eXlw//QIlE4uPj4+7uzr7o83qgt/z8/FxdXQc5LcZGgUOIUbCS+/fvs/dpSqXS\nzZs3u7u763S6lpaW1tZWnU6n0+na2tqam5t1f2pubiaizs7O2tpaU6/l7Ozs6ek5SFOX/ZaqTqfj\n/McEB4Tb78FKQkJCSkpK5syZk5OTo1ar8/Pz2Sbh4PR6fVNTU2dnJ/uiz+uB3mpsbBxyENbb21si\nkWzYsCExMdHNzY2jnxIcEWIUrOHevXsKhUIikdy5c2f27NmlpaUrV648dOgQT5fr6upi27NtbW1s\n27a1tZVt+WZlZZ0/f55hGJlMxrZGJ02atGnTptjYWCcnJ57qATvHAPAvIyODiObNm8cwzI0bN+Ry\nOREdOnTImjXcvHlz6tSpROTq6pqcnNzd3Z2RkfHUU0+xvwgBAQGpqakPHz60ZklgHxCjYA1r1qwh\noq1bt7L/m56eTkQSiaSgoMA6BZw8eZLN7uDg4MLCwt7tPT09GRkZkyZNQpiC2RCjYA1KpZKIcnNz\ne7doNBoieuKJJxoaGni9dHt7+4oVK9iUXLp0aUtLyz/36ROmgYGBCFMYPsQo8K62tlYkEkmlUoPB\n0Luxq6srIiKCiObPn9/d3c3TpYuLi9kE9/DwSEtLG3xnNkwnTpyIMAWTIEaBd2wXfv78+X2219bW\njhkzhoi2bNnCx3VTU1PZmwFUKtW1a9eGeVSfMB0/fjzCFAaHGAXeJSYmEtG2bdv++daFCxdcXFzE\nYvHZs2c5vGJbW1tvRz4hIaGjo8PUM7Bh+uSTTz4apvy1msGmIUaBd+xs+IULF/p9d9euXUTk4+Nz\n69YtTi535coVdpRTJpMN2ZEfXFdXV1paWnBwMBumkydPTktLQ5hCH4hR4Nfdu3eJSCqVdnV19buD\n0WiMiooiIrVabUazsY/k5GSJRMKe7caNGxaejYUwhcEhRoFfJ06cIKIFCxYMsk9bWxs7EbRy5Uqz\nL/TgwYNXXnmFTTqNRqPX680+Vb/YMA0KCmIvoVQqEabAQowCvxISEojo448/Hnw3C+/Jz8vLCwgI\nYAcHTp8+bValw4IwhX9CjAK/2Cnv/Pz8IfdMS0uTyWTp6ekmnb+np0er1To7OxPRrFmzKisrza3U\nBGyYTpgwgQ3TKVOmpKWl9fT0WOHSMAIhRoFH1dXVROTp6TnMG4Zqa2tNOn9jY+NLL71ERGKxWKvV\nWvm2JIPBkJqaOm7cODZMVSoVwtQxIUaBR8eOHSOiyMhIPk5+/vx5NsJ8fX2zsrL4uMRwsGE6duzY\n3jDNyMgwGo1C1QPWh0VEgEc5OTlENGfOHJOOGuih973bjUbjli1b5s2bV11dHRERceXKFbZNKghX\nV9eEhISKigo2TEtKSqKjo9VqdWZmJoPHpzkIoXMc7Bl7k9ClS5dMOmqgjyW7vaamZu7cufRnR35E\nTe+wLVP2q1lEFBISgpapI0CMAl+qqqqISC6XmzpkOUiMfvvtt/7+/kSkUCh+/PFHLsrkXp8wDQ0N\nRZjaN3TqgS8//fQTEYWHh7PT6BZin2b/4osv1tfXR0ZGFhUVsW3SEejRbr5Cobh69Wp0dPQzzzyD\nbr69QowCX8wbGO1XZWUl+zgoZ2fn5OTkb775ZtSoUZaflldubm4JCQk3btzYunWrj49PUVFRdHT0\n3LlzGxoahhz8BduCRUSAL0FBQRUVFQUFBdOmTTPpwEHS5OLFi88++6zFpVlbe3v7oUOHkpKSRo8e\n/euvv4rF4n5/70Qi/D7aJPyzAS+qqqoCAwO9vb0bGxtNXeOoN026uro++OCDvXv3MgyzcOHCM2fO\n2PTHtaWl5c6dOyqVaqC4RIzaKCywDLxge/Th4eFmrxN369at2NjYwsJCV1fXnTt3ajQasdi2x6C8\nvLy8vLyErgK4Z9ufS0eUlkYzZ5JMRjIZzZxJR44IXVD/LBwYTU9PDwsLKywsDAoKysvLe/fddzFu\nCCMWOhE25e23KT+fkpIoPJxEIrpwgdavp5kzKSVF6Mr6Gj9+fGVl5eXLl8PCwkw6sKOjQyqVsq9j\nY2NTU1PZR5aQHfV5B/mTYB8/oKNBa9R2/Pe/9NNP9PPPFBlJMhlJpbRgAeXmUk4OZWUJXdzfVFZW\nVlZW+vj4PP300yYdWFJSMn36dPpz6aTe5TztT7+3HwpdFJgJMWo79u8nrZZksr9tlMlIq6X9+wWq\nqX/Z2dlEFBERYdJo5sGDB2fMmFFaWqpSqQoKCuLj4/vsgKCBkQkxajsKCqjfG87nzKGCAqtXMxhT\nB0Z1Ol18fHxCQoJer3/rrbd++eUX9inOADYBM/W2o7mZHnusn+2+vtTcTES0fDkFBlJ0NIWGWrm0\nPtjW6DC/ZVRUVBQTE1NWViaTyT7//PN/NkIBRjg7GbN3CH5+VFpK/v59t9+7RyoVFRbS+PHE/mtO\nmkTR0RQVRSEh1i+zoqIiKCjI19e3vr5+yE79Z599tmHDhs7OTrVaferUKXYpOruH+0btDDr1tmPa\nNMrO7md7djZNm0YBAVRcTFotTZpEZWX0r39RaCgFBNC779LPP5MVfzmHOTDa1NS0ZMmSdevWdXZ2\najSaixcvOkiG0sCDvMhQW8Xnc0+AU6dPM0olo9P9baNOxyiVzH/+87eNJSWMVstMnMgQ/fFfQACj\n0TC5uQz/zxlavnw5ESUnJw+yT35+vnWWTgKwAsSoTVm1igkLY777jtHpmPZ25n//Y8LCmLfeGnB/\nNk+ffNKaeco+kb6oqKjfd41GY1JSkpWXTgLgFWLU1vz738yMGYyHB+PhwcyYwRw+zFRUMFFRTE3N\nYEexeRoc/FeeBgb+kaecKi8vJyI/P79+H68p7NJJADxBjNq+V15hiJj4+GHtzOZpUBBPeXrgwAEi\nWrx48T/fys3NHQlLJwFwDjFq+6qqGKmUIWKys4d7SHc388MPTGIiM2pUb552h4d/8sknN2/etKSW\nuLg4ItqzZ8+jGx9dAzkiIuLOnTuWXAJgpEGM2oXNmysef/yD6GhzVvctKWHWr2fGjr0ZHs7OOk6Y\nMGH9+vXXrl0zoxCFQkFExcXFvVtqamrmzZuHjjzYMcSoPTDq9ZOfeoqI9u/fb+YpHj4s+vHHVatW\n+fr69t7FMW3atJ07d96+fXuY5ygrKyMif3//3oHRc+fO9S6d9MMPP5hZG8DIhhi1E2fOnGHvH6qv\nr7fkPD09Pbm5uRqNpndFNiJSKpVarfb69euDH7t//34ievXVVxmG6erq0mg07KOMIiMj7927Z0lV\nACMZYtR+LFiwQKFQ5Ofnc3K23jxl++nDydPY2Fgi2rdvX3V19ezZs4nIxcUlOTkZi2KCfcOXz+zH\n3bt35XK5p6cnt6c1Go15eXmZmZkZGRl1dXXsRqVSGRUVtXTp0t6vHjEMM2bMmLq6ui+++GLz5s2N\njY2BgYHp6em2uHQSgEkQozBcg+RpXFyc0WicPHmyh4eHXq9nGGbhwoVHjhx5dKQVwF4hRu2KdZ55\nYTAYvv/++4yMjKysrNbWViISi8UBAQG3b98mImdnZ61Wu3HjRltfOglgmBCjdsXKjw7q6enJz8/P\nzMxMT0+vr6/39PR0cXH57rvvTF1RGcCmIUbtilBPYOvs7Fy1atXx48ffeeedffv28XchgBEI3S7g\ngEQiiYmJIaLffvtN6FoArA0xCtwIDQ0loqtXrwpdCIC1oVNvVwRcuZdhGF9f36amprq6utGjR/N6\nLYARBa1Re9Pv7cFWuK5IJAoJCSE0SMHxIEaBM+jXg2NCjAJn0BoFx4QYBc6gNQqOCVNMdkXYlXvb\n29vlcrmLi0tbW5uLiwvflwMYIdAatSsDZaV1/lhKpdIJEyYYDAb2waMADgIxClxSq9WEfj04GMQo\ncImdZSouLha6EADrQYwCl9hZpqKiIqELAbAexChwCZP14IAwUw9cYhjG29u7tbW1oaHBz89P6HIA\nrAGtUeCSSCRSqVSE4VFwJIhR4Bj69eBoEKPAMXwlFBwNYhQ4htYoOBpMMQHHWltbvb29JRJJW1ub\nk5OT0OUA8A6tUeCYXC4PDAzU6/Xl5eVC1wJgDYhR4B769eBQEKPAPTZGcc8TOAjEKHCPnazHV0LB\nQSBGgXvo1INDwUw9cK+np0cul+v1+gcPHnh7ewtdDgC/0BoF7jk5OalUKoZhMDwKjgAxCrxAvx4c\nB2IUeIGvhILjQIwCL9AaBceBKSbgxYMHD3x9faVSaWtrq1iMv9Zgz/D5Bl489thjjz/+eHt7e0VF\nhdC1APALMQp8Qb8eHARiFPiCGAUHgRgFvmCyHhwEYhT4olarCTEKDgAz9cCX7u5uT09Pg8HQ3Nws\nl8uFLgeAL2iNAl+cnZ2VSiXDMNeuXRO6FgAeIUaBR5hlAkeAGAUeYZYJHAFiFHiE1ig4AkwxAY8a\nGhr8/f29vLyamppEIpHQ5QDwwlnoAsCejRo1Ki4ubty4cQaDQSKRCF0OAC/QGgUAsAjGRgEALIIY\nBb4MNBiKQVKwM4hRAACLIEYBACyCGAUAsAhiFADAIrhvFHiE2SRwBIhR4FG/dyUjW8HOoFMPAGAR\nxCgAgEUQowAAFkGMAgBY5P9XswKfyLvCHAAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m2_h = Chem.MolFromMolBlock(molblock2,removeHs=False)\n", "m2_h" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reading PDB data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This time we'll read from a file instead of embedding the text in the notebook. We also won't try to display the molecule itself, since that isn't particularly informative for a large molecule." ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "327" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crn = Chem.MolFromPDBFile('../data/1CRN.pdb')\n", "crn.GetNumAtoms()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In order to be able to show something useful with the proteins, let's get the amino-acid sequence:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[(1, 'THR'),\n", " (1, 'THR'),\n", " (1, 'THR'),\n", " (1, 'THR'),\n", " (1, 'THR'),\n", " (1, 'THR'),\n", " (1, 'THR'),\n", " (2, 'THR'),\n", " (2, 'THR'),\n", " (2, 'THR')]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# get the residue information for each atom:\n", "residues = [atom.GetPDBResidueInfo() for atom in crn.GetAtoms()]\n", "# and now the number and name of each residue\n", "resinf = [(res.GetResidueNumber(),res.GetResidueName()) for res in residues]\n", "# take a look at those:\n", "resinf[:10]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "46" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# and get the ordered list of actual residues:\n", "residues = sorted(set(resinf))\n", "len(residues)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[(1, 'THR'), (2, 'THR'), (3, 'CYS'), (4, 'CYS'), (5, 'PRO')]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "residues[:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reading other sequence formats" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can also create molecules from a few sequence formats:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "327" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seq = 'TTCCPSIVAR SNFNVCRLPG TPEAICATYT GCIIIPGATC PGDYAN'\n", "m = Chem.MolFromSequence(seq)\n", "m.GetNumAtoms()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "327" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "helm = 'PEPTIDE1{T.T.C.C.P.S.I.V.A.R.S.N.F.N.V.C.R.L.P.G.T.P.E.A.I.C.A.T.Y.T.G.C.I.I.I.P.G.A.T.C.P.G.D.Y.A.N}$$$$'\n", "m = Chem.MolFromHELM(helm)\n", "m.GetNumAtoms()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "327" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fasta =\"\"\">\n", "TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN\n", "\"\"\"\n", "m = Chem.MolFromFASTA(fasta)\n", "m.GetNumAtoms()" ] } ], "metadata": { "anaconda-cloud": {}, "hide_input": false, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 1 }