{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# RDKit C++ api basic examples with cling and xeus-cling kernel\n", "\n", "\n", "\n", "- CERN's cling C++ interpreter https://github.com/root-project/cling\n", "- QuantStack xeus-cling jupyter notebook kernel https://github.com/QuantStack/xeus-cling" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Add RDKit to include and library paths" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#pragma cling add_library_path(\"/opt/conda/lib\")\n", "#pragma cling add_include_path(\"/opt/conda/include/rdkit\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load methotrexate from SMILES and count its atoms\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "33 atoms\n" ] } ], "source": [ "#include \n", "#include \n", "\n", "#pragma cling load(\"libRDKitSmilesParse.so\")\n", "#pragma cling load(\"libRDKitGraphMol.so\")\n", "\n", "std::string methotrexate_smi = \"CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc1\";\n", "std::unique_ptr methotrexate(RDKit::SmilesToMol(methotrexate_smi));\n", "std::cout << methotrexate->getNumAtoms() << \" atoms\" << std::endl;" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Calc some properties" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "logP: 0.2684\n", "molecular weight: 454.171\n" ] } ], "source": [ "#include \n", "#pragma cling load(\"libRDKitDescriptors.so\")\n", "\n", "double logp = RDKit::Descriptors::calcClogP(*methotrexate);\n", "std::cout << \"logP: \" << logp << std::endl;\n", "\n", "double e_mwt = RDKit::Descriptors::calcExactMW(*methotrexate);\n", "std::cout << \"molecular weight: \" << e_mwt << std::endl;\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Calc inchi and inchi key" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "InChI: InChI=1S/C20H22N8O5/c1-28(9-11-8-23-17-15(24-11)16(21)26-20(22)27-17)12-4-2-10(3-5-12)18(31)25-13(19(32)33)6-7-14(29)30/h2-5,8,13H,6-7,9H2,1H3,(H,25,31)(H,29,30)(H,32,33)(H4,21,22,23,26,27)/t13-/m0/s1\n", "InChIKey: FBOZXECLQNJBKD-ZDUSSCGKSA-N\n" ] } ], "source": [ "#include \n", "\n", "#pragma cling load(\"libRDKitInchi.so\")\n", "#pragma cling load(\"libRDKitRDInchiLib.so\")\n", "\n", "RDKit::ExtraInchiReturnValues tmp;\n", "std::string inchi = RDKit::MolToInchi(*methotrexate, tmp);\n", "std::cout << \"InChI: \" << inchi << std::endl;\n", "\n", "std::string key = RDKit::InchiToInchiKey(inchi);\n", "std::cout << \"InChIKey: \" << key << std::endl;" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2D coord generation and depiction" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#include \n", "#include \n", "#include \n", "#include \n", "#include \"nlohmann/json.hpp\"\n", "\n", "#pragma cling load(\"libRDKitMolDraw2D.so\")\n", "#pragma cling load(\"libRDKitDepictor.so\")\n", "\n", "namespace ht\n", "{\n", " struct html\n", " { \n", " inline html(const std::string& content)\n", " {\n", " m_content = content;\n", " }\n", " std::string m_content;\n", " };\n", "\n", " nl::json mime_bundle_repr(const html& a)\n", " {\n", " auto bundle = nl::json::object();\n", " bundle[\"text/html\"] = a.m_content;\n", " return bundle;\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// generate the 2D coordinates:\n", "RDDepict::compute2DCoords(*methotrexate);\n", "\n", "// // generate SVG as string\n", "RDKit::MolDraw2DSVG drawer(400, 400);\n", "drawer.drawMolecule(*methotrexate);\n", "drawer.finishDrawing();\n", "std::string svgs = drawer.getDrawingText();\n", "\n", "// show the molecule\n", "ht::html mol_svg(svgs);\n", "mol_svg" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Morgan fingerprint and Tanimoto similarity" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Similarity between aspirin and paracetamol: 0.27451\n" ] } ], "source": [ "#include \n", "#include \n", "#include \n", "#include \n", "\n", "#pragma cling load(\"libRDKitFingerprints.so\")\n", "#pragma cling load(\"libRDKitDataStructs.so\")\n", "\n", "\n", "std::string aspirin_smi = \"CC(=O)Oc1ccccc1C(=O)O\";\n", "std::string paracetamol_smi = \"CC(=O)Nc1ccc(O)cc1\";\n", "\n", "std::unique_ptr aspirin(RDKit::SmilesToMol(aspirin_smi));\n", "std::unique_ptr paracetamol(RDKit::SmilesToMol(paracetamol_smi));\n", "\n", "// unhashed fps\n", "RDKit::SparseIntVect *fp_aspirin, *fp_paracetamol;\n", "\n", "fp_aspirin = RDKit::MorganFingerprints::getFingerprint(*aspirin, 2);\n", "fp_paracetamol = RDKit::MorganFingerprints::getFingerprint(*paracetamol, 2);\n", "\n", "double tani = TanimotoSimilarity(*fp_aspirin, *fp_paracetamol);\n", "std::cout << \"Similarity between aspirin and paracetamol: \" << tani << std::endl;\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Substructure search" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SMARTS 1 match\n" ] }, { "data": { "text/plain": [ "{ {0 , 1}, {1 , 2}, {2 , 3}, {3 , 4} }" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#include \n", "#pragma cling load(\"libRDKitSubstructMatch.so\")\n", "\n", "std::unique_ptr mol_ss(RDKit::SmilesToMol(\"CC[C@H](F)Cl\"));\n", "std::unique_ptr patt(RDKit::SmartsToMol(\"C[C@H](F)Cl\"));\n", "\n", "RDKit::MatchVectType res;\n", "\n", "if( RDKit::SubstructMatch(*mol_ss , *patt , res) ) {\n", " std::cout << \"SMARTS 1 match\" << std::endl;\n", " } else {\n", " std::cout << \"Not SMARTS 1 match\" << std::endl;\n", "}\n", "\n", "res" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Maximum Common Substructure" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1\"" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#include \"GraphMol/FMCS/FMCS.h\"\n", "#pragma cling load(\"libRDKitFMCS.so\")\n", "\n", "std::vector mols;\n", "\n", "const std::string smi[] = {\n", " aspirin_smi,\n", " paracetamol_smi,\n", " methotrexate_smi\n", "};\n", "\n", "for (auto& i : smi) {\n", " mols.emplace_back(RDKit::SmilesToMol(i));\n", "}\n", "\n", "RDKit::MCSResult res = RDKit::findMCS(mols);\n", "res.SmartsString" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Murcko scaffold" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c1ccc(NCc2cnc3ncncc3n2)cc1\n", "*c1ccc(N(*)Cc2cnc3nc(*)nc(*)c3n2)cc1\n" ] } ], "source": [ "#include \"GraphMol/MolHash/MolHash.h\"\n", "#pragma cling load(\"libRDKitMolHash.so\")\n", "\n", "std::unique_ptr murcko_mol(RDKit::SmilesToMol(methotrexate_smi));\n", "auto scaffold = RDKit::MolHash::MolHash(murcko_mol.get(), RDKit::MolHash::HashFunction::MurckoScaffold);\n", "std::cout << scaffold << std::endl;\n", "\n", "std::unique_ptr extended_murcko_mol(RDKit::SmilesToMol(methotrexate_smi));\n", "auto extended_murcko = RDKit::MolHash::MolHash(extended_murcko_mol.get(), RDKit::MolHash::HashFunction::ExtendedMurcko);\n", "std::cout << extended_murcko << std::endl;\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "C++14", "language": "C++14", "name": "xcpp14" }, "language_info": { "codemirror_mode": "text/x-c++src", "file_extension": ".cpp", "mimetype": "text/x-c++src", "name": "c++", "version": "14" } }, "nbformat": 4, "nbformat_minor": 2 }