{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "from rdkit import Chem\n", "from rdkit.Chem import AllChem\n", "from rdkit import DataStructs\n", "\n", "m1 = Chem.MolFromSmiles('O=C1CN(N=Cc2ccc([N+](=O)[O-])o2)C(=O)N1')\n", "m2 = Chem.MolFromSmiles('CCCC1COC(Cn2cncn2)(c2ccc(Cl)cc2Cl)O1')\n", "m3 = Chem.MolFromSmiles('CCCCCC=O')\n", "# similar to m1\n", "m4 = Chem.MolFromSmiles('CCCC1COC(Cn2cncn2)(c2ccc(Cl)cc2O)O1')\n", "\n", "mols = [m1, m2, m3, m4]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "fps = [AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=1024, useFeatures=False) for m in mols]\n", "indx_fps = dict()\n", "for indx, fp in enumerate(fps):\n", " indx_fps[indx] = fp" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "def diversity(mols_fp, threshold):\n", " diverse = []\n", " similar = []\n", " \n", " for m1 in mols_fp:\n", " m1_fp = mols_fp[m1]\n", " sim_vals = []\n", " for m2 in mols_fp:\n", " m2_fp = mols_fp[m2]\n", " # if different molecules else skip\n", " if m1 != m2:\n", " sim_vals.append(DataStructs.FingerprintSimilarity(m1_fp, m2_fp, metric=DataStructs.TanimotoSimilarity))\n", " # end if\n", " # end for\n", " # Get Maximum similarity\n", " sim_vals = max(sim_vals)\n", " if sim_vals >= threshold:\n", " similar.append(m1)\n", " else:\n", " diverse.append(m1)\n", " # end for\n", " \n", " return (diverse, similar)\n", " " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "(diverse, similar) = diversity(indx_fps, 0.8)\n", "print (diverse, similar)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "([0, 2], [1, 3])\n" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }