{ "cells": [ { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import os\n", "from rdkit import Chem\n", "from rdkit import RDConfig\n", "\n", "import pandas as pd\n", "import janitor\n", "from janitor import chemistry\n", "\n", "from rdkit.Chem import PandasTools\n", "from rdkit.Chem.Draw import IPythonConsole\n", "\n", "from sklearn.decomposition import PCA\n", "plt.style.use('ggplot')" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "path = os.path.join(RDConfig.RDDocsDir,'Book/data/cdk2.sdf')" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "df = PandasTools.LoadSDF(path)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ClusterIDMODEL.CCRATIOMODEL.SOURCEROMolb_mmffld_Minimization_Converged-OPLS_2005idr_mmffld_Potential_Energy-OPLS_2005r_mmffld_RMS_Derivative-OPLS_2005s_st_Chirality_1s_st_Chirality_2s_st_Chirality_3
01ZINC038144571CORINA 3.44 0027 09.01.2008\"Mol\"/1ZINC03814457-78.64540.000213629NaNNaNNaN
12ZINC038144591CORINA 3.44 0027 09.01.2008\"Mol\"/1ZINC03814459-67.47059.48919e-0513_S_17_12_14_24NaNNaN
" ], "text/plain": [ " Cluster ID MODEL.CCRATIO MODEL.SOURCE ROMol b_mmffld_Minimization_Converged-OPLS_2005 id r_mmffld_Potential_Energy-OPLS_2005 r_mmffld_RMS_Derivative-OPLS_2005 s_st_Chirality_1 s_st_Chirality_2 s_st_Chirality_3\n", "0 1 ZINC03814457 1 CORINA 3.44 0027 09.01.2008 \"Mol\"/ 1 ZINC03814457 -78.6454 0.000213629 NaN NaN NaN\n", "1 2 ZINC03814459 1 CORINA 3.44 0027 09.01.2008 \"Mol\"/ 1 ZINC03814459 -67.4705 9.48919e-05 13_S_17_12_14_24 NaN NaN" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(2)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "fp1=chemistry.morgan_fingerprint(df, mols_col='ROMol', radius=2, nbits=512, kind='bits')" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.frame.DataFrame" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(fp1)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.01.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.01.01.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.01.01.00.00.01.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.01.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0
10.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.01.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.01.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.01.00.00.00.01.00.00.01.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.01.00.01.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.01.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.01.00.00.00.00.00.00.00.01.00.00.00.00.01.00.00.00.00.00.00.00.01.00.00.00.00.00.01.00.00.00.00.01.00.00.00.00.01.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.0
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511\n", "0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n", "1 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fp1.head(2)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(47, 512)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fp1.shape" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(47, 512)" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fp2=chemistry.morgan_fingerprint(df, mols_col='ROMol', radius=2, nbits=512, kind='counts')\n", "fp2.shape" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "pca = PCA(n_components=3)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "pca_res = pca.fit_transform(fp2)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAD8CAYAAABjAo9vAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xl8VPW9//HXmZlMksmemZCNBEggIMgi+yIoELCiVqstRateta0PRa/dXO+1aq/lSmvRXrdqr0iV3lq1FW39uUYKqMi+ryFA2LIxmeyTzHp+fyCBMBMSMsuZTD7Pv5zvnDnz9gzzyXe+53u+R1FVVUUIIUTU0GkdQAghRHBJYRdCiCgjhV0IIaKMFHYhhIgyUtiFECLKSGEXQogoI4VdCCGijBR2IYSIMoZg7OSDDz5g5cqVKIpCXl4eCxcuxGg0BmPXQgghLlDAPXabzcZHH33E4sWLWbJkCV6vl7Vr1wYjmxBCiB4ISo/d6/XidDrR6/U4nU7S0tK6fE1FRUWnz1ksFqxWazCiBZ1k67lIzifZei6S80VbtpycnG5tpwRjrZgPP/yQN998E6PRyOjRo7nvvvt8tikpKaGkpASAxYsX43Q6O92fwWDA7XYHGiskJFvPRXI+ydZzkZwv2rJ1d4g74MLe3NzMkiVL+NnPfobJZOKZZ55h8uTJzJgx47yvkx578EVyNojsfJKt5yI5X7Rl626PPeAx9p07d9KvXz+Sk5MxGAxMmjSJ0tLSQHcrhBCihwIu7BaLhQMHDuBwOFBVlZ07d5KbmxuMbEIIIXog4JOnQ4YMYfLkyTz00EPo9XoGDhxIcXFxMLIJIYTogaDMipk/fz7z588Pxq6EEGHm9pbTpv4BlVoU0olT7sKgK9A6lghAUAq7EKJ38niP06L+BJUzkxla1H0keJ/DoBuoXTAREFlSQIg+rI2XOxR1AJVK2tRXNEokgkEKuxB9mFet9duuYgtzEhFMUtiF6MN0SlYn7f3CnEQEkxR2IfqweGUhOvI7tOnIJ457NEokgkFOngrRh+mUDBJ1r9CmvoxHrUKnZBKv3IVOydA6mgiAFHYh+jidYsGkPKp1DBFEMhQjhBBRRgq7EEJEGSnsQggRZaSwCyFElJHCLoQQUUYKuxBCRBkp7EIIEWWksAshRJSRwi6EEFFGCrsQQkQZKexCCBFlpLALIUSUkcIuhBBRRgq7EEJEGVm2N4xaHC4+2HyAY7WNTCnqz5SiXHSKonUsIUSUCUphb2lp4eWXX+bYsWMoisLdd99NUVFRMHYdNSrrm3n8nS84VtsIwBf7jzF6QCaP33Apep38cBJCBE9QCvuyZcsYM2YMv/jFL3C73TgcjmDsNqq8/NmW9qIO4HR72VZezZp9x5g5fICGyYQQ0SbgrqLdbmfv3r3MmjULAIPBQEJCQsDBok11o92nzeXx8nXpCQ3SCCGiWcA99pqaGpKTk3nppZc4cuQIBQUF3HbbbcTFxQUjX9SIi9H7bbckxYc5iRAi2imqqqqB7ODgwYP853/+J08++SRDhgxh2bJlxMfHs2DBgg7blZSUUFJSAsDixYtxOp2d7tNgMOB2uwOJFTI9zfbXL7bzh4/XYXeceW1OehJL770BS3JwfuFE8nGDyM4n2XoukvNFWzaj0di9ffck0NnMZjNms5khQ4YAMHnyZN577z2f7YqLiykuLm5/bLVaO92nxWI57/Na6mm24otyaWgawardR7E7XViS4rnj8tHgbMVqbdU0W7hEcj7J1nORnC/asuXk5HRru4ALe2pqKmazmYqKCnJycti5cyf9+/cPdLdR6YaJw7hh4jCtYwgholxQZsXccccdPPfcc7jdbvr168fChQuDsVshhBA9EJTCPnDgQBYvXhyMXQkhhAiQXBkjhBBRRgq7EEJEGSnsQggRZaSwCyFElJHCLoQQUUYKew/UtNqpc7RpHUMIIfyS9dgvwIF6G3d9uZJjDfXodQqDklL41YRppMbGah1NCCHaSY+9m9xeL7/avI4dJ6upczqwtrWx8WQ1j238SutoQgjRgRT2blpfXcnRpkaf9sONDdjagrPWixBCBIMMxXST3e3G7WchTI+q4vB6NUgkosH2mhr+um8/bR43w9PN3DpiOLEG+VqKwMi/oG6ampVDrimRE/bmDu1ZpgSy4k1dvt7p9lBaaSMhNoaBGSkocq/TPu+z8nKe27KV+m/uOLapqpodJ0/y+1kz5XaJIiBS2LspISaGH110Ma+V7uFYUyN6RSE/MYkHx4zvskiv2XuU5V/soqK+mTiDngEZKTz6nWmkJ8pNNvqyt/eXthf10/bU1rL62HFmDcjXKJWIBlLYL8AV+YO49uLRvLdrO/GGGKZl5WDoomfV1ObktVU7qG5oAcDudLP3RC1Pf7CepxZcHobUIhKpqkp9m++UWafXy9aaainsIiBS2C9QotHIt/IHdXv7lbvK24v62U7UNmF3uDDFxgQznuglFEUhJTaWKnvHe+EadDoutlg0SiWihQzkhZius2EaBWSYvW+7dvBgks+51dnQtDRmDxigUSIRLaTHHmKzRgzg3Y37qarv2GvPMycRb5Teel92zeBCzPHx/L20FIfHw+DUVO4cParL4T0huiKFPcQS4oz8eNYYXl+9k8r6ZmJj9AywpPDA1ZO1jiYiwNTcHKbmdu8+lkJ0lxT2MJha1J+JhTkcrK4jITaG/uZkrSMJIaKYFPYwMeh1DM0xax1DCNEHSGEXQogQK7Ht4vP6Xdg9TlINJm7sN4VLQzj7SQq7EEKE0Bf1+/hz9Ze0eE9djFbhrON/jn/C4MwBISvAcvpdCCFCqKRuV3tRP83qbmL5kVUhe8+gFXav18uDDz7I4sWLg7VLIYTo9dq8Lr/t9S7fCxeDJWiF/cMPPyQ3NzdYuxNCiKjQz+g7C04BLknt/hXsFyoohb22tpYtW7Ywe/bsYOxOCCGixm2ZM8iLTW9/rEfHcFN/rs2ZGLL3DMrY/Z/+9CduvvlmWlvlhhNCCHE2szGJRYO+z8e27Rx11DI6IZ/pqcOI0YVu7krAe968eTMpKSkUFBSwe/fuTrcrKSmhpKQEgMWLF2M5z1Qfg8Fw3ue1JNl6LpLzSbaei+R8kZTtx5kdrzAOZTZFVf3cFugC/OUvf2HNmjXo9XqcTietra1MnDiR++6777yvq6io6PQ5i8WC1WoNJFbISLaei+R8kq3nIjlftGXLyene8hMB99hvuukmbrrpJgB2797NP//5zy6LuhBCiNCReexCCBFlgjp6P2LECEaMGBHMXQohhLhA0mMXQogoI4VdCCGijBR2IYSIMlLYhRAiysiyvUIEma21jfcPlgFwbeFg0uPjNE4k+hop7EIE0SeHy/njjh3U2O0AfHDwED8aOZIrC0K34JMQ55KhGCGCxOnx8Mae3e1FHaDGbmf5nj04PR4Nk4m+phcVdhUDe4ljJTrqtA4jhI/Sujoqmn3X2K5obmafzaZBItFX9YqhGIVmUpX/IIYydIodt5pBmzqXZn6sdTQh2iUbjcQbDDQ5nR3aTTEGUmNjNUol+qJe0WNPVp4lVtmBTjn1E9egnMSkvI+BvRonE+KM/ORkClNTfdoLU1PJT/a92YIQodIrCruBgz5tOqUZE+9pkEaIzi26dBrTcnLISUggOyGBaTk5LLr0Uq1jiT6mVwzFgN5vq4oxzDmEOL/k2FgWXzYDt9cLgEHXK/pOIsr0isLuVEdi4BCKcmbpeI9qxs73NUzVUbPbydHWenLikkiNidc6Tq9VW9nAey+vpr6mmfTsZL5z92WkZiRpHeuCSUEXWuoVhb2Je9DRQIy6Gx0teMigRf0eHvprHQ2Al8s3sLr2MFanndSYOMal5PLg4OnoFEXraL1KzfE6nr3nTWqOn5n1VLbtGA+8cnOvLO5CaKVXFHaIoUF9HIUGdNTjIZdIib6mtpz3q/bS6nUDcNJpp+TkQfrHJ3Nz/zEap+td3n1xVYeiDlB1xMaKl1Zz++NXa5RKiN6nV/1eVEnBwwAipagDfFxT2l7UT3PjZUPdcY0S9V62ygb/7VX+24UQ/kVOhQyhg23lfNG0Ho/q4WLTMCYkjEGnBOdvWmd3jA3oRrJ90OFdFRwvq/H7XIoMwwhxQaK+sH/RuI5PG1ZjV1sB2N92kLK2w/zAckNQ9l+cUciWxgoc3jOXjOtRGJuSHZT99xV/feYzHK0un/bUfolcd/cMDRIJ0Xv1qqGYC+VRPXzdvLm9qAN48LCvtYwqp//e4YWaZSlgXr8iMowmANJi4plhHsiteZcEZf99gdvlwVbd6Pe5kVMHY8n2vehHCNG5qO6xN3qaaPb6rt1hV1spaztMlrFfwO+hKAo/KZjKrf0v4aDdRl58CpmxiQHvty/R6XUY42J82hUdDBiWpUGi4Gvy2Pm0biM2dyMTki5itKkQRWZNiRCJ6sKeqE8gXomjBXuH9jjFSH9jTlDfK80Yz3hjblD32VfodArDJw3i5PE6PG5ve3vWADPTrhmlYbLgKG+r5OWqf3DSXQ/AlpZSRpoKuTvrWinuIiSiurDHKDGMMA1lbdNGXJyZuZJnzGVAbGTMgRen3PiLOej1CrvXl+Nqc2HJTeXmh67w25Pvbd6u/Vd7UQdwqm522g+xx17OiARZp10EX1QXdoBrUueSqk9mZ+tePKqXfGMuV6UWS08pwuj0Ohb8Yq7WMULC5mryaXOqLjY075XCLkIi4MJutVp58cUXqa+vR1EUiouLmTdvXjCyBYWiKMxInsKM5ClaRxF9VJzO/6+OzJj0MCcRfUXAhV2v13PLLbdQUFBAa2srDz/8MKNGjaJ/fxnqEAJgbGIR1bY6nGcNB2bFpDMzVWZOidAIuLCnpaWRlpYGQHx8PLm5udhsNinsQnzjmrRpKOjY2lKKw+vCYkjhxozZxOvk5hsiNBRV7ezayQtXU1PD448/zpIlSzCZTB2eKykpoaSkBIDFixfjPOcuM2czGAy43e5Onw+lPbU17Ku3MjEzl/6JKT7Pa5mtK5GcDSI7n2TruUjOF23ZjMbuLVUetMLe1tbG448/zvXXX8+kSZO63L6ioqLT5ywWC1arNRixus3h8fDQllXsrrfS7HaRboxjakYO/zFySocTrVpk665IzgaRnU+y9Vwk54u2bDk53ZumHZQrT91uN0uWLGH69OndKuqR6KX9W1hvraTZfeqydpuzjc8qy/m0slzLWL3egWobn+w+RGW978wQIURoBDzGrqoqL7/8Mrm5uVx9de9dWnV3fa1Pm8PrpaTyCFfkyJS0C+V0e3j8n1+yr6qWFoeL1PhYpg8bxL0zRslUUyFCLODCvn//ftasWUN+fj4PPPAAADfeeCNjx44NOFw46XX+i41BilCPLP1yO5uPVLU/rm918MnOUi7ql0LxRQO1CyZEHxBwYR82bBhvv/12MLJoalpGLvsaanF6z1zSnmQw8r0BwzRM1Xvtq7L5tDndXtaUHpPCLkSIRf2Vp911S8EIqtvsrD9ZQaPLSXpsHNf0H8xYc6bW0XolXWe/gPRRvaCoEBFBCvs3FEXhgRETsbtd1DpayYxLwKjXax2r15pckENptQ2X58wvoOS4WK4dPUTDVEL0DVLYz2EyxGAy9P6Fp7Q2f9wwrE12NpRX0dTmIN0Ux3cmjmR0XuBLJQshzk8KuwgJRVG4Z+Y47nC6sNnb6JdkIjszM2LnFAsRTaSwn0d1Uwtvb99Hq8vNNcMHc1GmWetIvU68MYZco/wCEiKcpLB34l9lR3n5621YW07dVm/t4RPMG17If1w9W+NkQghxfjJFwQ+vqvKXLXvaizpAk9NFSWk5NU2+t9oTQohIIoXdj5ome4eiflqtvY01B8rDH0gIIS6AFHY/kuKMxMf4jlIZ9Try03xXfBRCiEgihd2PBGMMyXG+y2OqKhTJCVQhRISTwn4BXF4v/7dhh9YxhBDivKSwd6LN7fHbfry+McxJhBDiwkhh70S/RJNPW4xOx6yhBRqkiX7ONheO1s7vqiWE6D6Zx96Ju6aM4YlPv+JEQzNwqqiP65/J7GEF2Gp9124XPdPS2MrSx/7J8YM1qF6VzPx07njiGtIzk7WOJkSvJYW9EwXmVJ7/TjF/276fyqYWLh2Yy4zCPHSyPntQ/eGhd9m7obz9sa2qkRfv/xuPvnG73JBDiB6Swn4eKXGx/HDSKK1jRC1bVSMnyk76tFccsnJoVwWFI3M1SCVE7yeFXWjG3tyG0+HyaXc6XLQ0nLlArNHdxt+qt1LlaGJUUg5XmC8iRidLKgvRGSnsQjM5gyyYs1N8eu39+qcxdFw+ACfa6nn84IeccDQAsLb+EGvqylg0+Bop7kJ0QmbFCM3o9DpuuHcmGbmp7W3pWcnMu30qsfGnLhD73+Nr24s6gAeV3c1VfGjdE/a8QvQW0mMXmho9fQhDxuSx9oMduJweLv32KJLSEtqfP+lq9nmNF5UdTSe4tt/IcEYVoteQwi40Z0qKo/jGiX6fi9f5Lu0AYDEm+G0XQshQjIhw8ywXkaiP7dCWZUzm+1ljNUokROQLSo9927ZtLFu2DK/Xy+zZs7nuuuuCsVshmGUeihf4yLqHFo8Tc0wCP8ydTHqM9NijWUtjK3vWHybZnEjRJXlyTcMFCriwe71eli5dyqOPPorZbOaRRx5h/Pjx9O/fPxj5hKDYPJRi81CtY4gw+fTP6yj56yZqKxswxhnIKczgvmfmk2JJ1DparxHwUExZWRlZWVlkZmZiMBiYOnUqGzduDEY2IUQfU1vZwMfL11NbeWomlLPNTfnuSl771QcaJ+tdAi7sNpsNs/nMGuVmsxmbzRboboUQfdCqv22hweo7E6rqSC1ej1eDRL1TwEMxqqr6tPkbDyspKaGkpASAxYsXY7FYOg9lMJz3eS1Jtp6L5HySreeCmS/V7H/xt5gYA5YMCzrdhfVFI/nYhTJbwIXdbDZTe9Zqh7W1taSlpflsV1xcTHFxcftjq9Xa6T4tFst5n9dSX83mVd2UO1dR49lNvC6docZvE6e7sNsE9tVjF6hIzgbBzTfxyov45C9ftw/FnJZX1K9HIwGRfOx6ki0nJ6db2wVc2AsLC6msrKSmpob09HTWrl3LfffdF+huo0qzp5FVTR/R4LERpzNxaeIcMmO69wFFAq/q4Sv7bznp2Qec+jlc6drK5PifkGoYoG04EVWS0kzc+MAc3vvDamxVjRjjYhgwLIvbHrtK62i9SsCFXa/Xc8cdd7Bo0SK8Xi8zZ84kLy8vGNmigt3bwpu2P2L1VLe3VbqOcX3qreQY8zVM1n1HXV9g9ezndFEHsKs17HK8yaWGh7ULJqLSJZcNZfT0ImqO2jAlx5GcLlNbL1RQ5rGPHTuWsWPlghF/1jZ/3qGoAzR5G/ii+TO+n/7D9rb9x2vZUlbF4Jw0xg3ORqeLnHm71e6dqPjeKrDVW6dBGtEX6HQKWQPlxvE9JUsKhFid2//dluzeU2f+PV4vv3lnLTsO1dDicBEbo6cgK40nbp6BKTYmnFE7laDr57c9RokPcxIhRHfIkgIhlm7wf9bbpDt1scWnWw6zYX8FLd+sS+5wedh7zMqyT7eFLWNXimKvIlHJ6tAWg4kBxsu1CSSEOC8p7CE2NXE2Fn1mh7ZkXSozEq8AYFPpCTxe3ymjh6rqw5KvO4xKIlNMvyBLP4ZkXT5mfREj437AICnsQkQkGYoJsXidiZvS72J180fUu23EfzMrJiPmVA/YGOP/I4gxRNZNJJL02UxNuF/rGKKbXE43lYetpJgTe9Wl+NaqBt5b+jUNtS1YspK57kdTSZGTpxdMCnsYJOgTmZfyPb/PfWfKUHYerqHB7mhvi4vRM+NimVkkeuZf72ym5K8bsVU3YkqMpXBkLj9edB0xxsj+ulcdtfH7B9/jZMWZOeylOyp46PnvkZxm0jBZ7yNDMRor6m/mtjmjGJSZSnpiHHkZyVw3ZSjzJgzROlrE2NtczZMHP+Y/D/yT10+sx+F1ax0pYlUcsvKPP35BVXktzlYX9Seb2fyv/bz5u0+1jtald/93bYeiDqeK/XuvrdUoUe8V2X/C+4jiSwqYNXoQdoeLeKMBvV7+3p62pq6MPxz7knr3qZtbb2k6zs7mCn5TdC16RY7TuT77ywYabS0dG1U4uOO4NoEuQMO5ub9hrWgMc5LeTwp7kK3bd4I/f7GTclczXp2KOTGeOcMGcsu4EeddU1qnU0iM93+3oL5sRfX29qJ+WmnLSdbUHWRmuvyqOZfH7Xu9AYC3F6yflZjif/psWkbvOUcQKaTLE0S7j9Tw/D83sM/RQKvOgwMvFc0tvLl1L3/eLDdf7olzizqACw/bmiK/B6qFWfPHk+CnQOYPzfSzdWS57odTMGcmdWjrl5vKdT+cqlGi3ksKexD97cu9WHGinvM7yOnx8uXh4zR5ailt+5qTrqPaBOyFkg1xPm16FIYnZPnZWgwcnk3xgvGYs08t0GZKimXYhIHc/PC3NE7WtbzCDO77zbWMnV7IkFG5TJhVxM+XXC899h6QoZggaXO62Xu0FtW3DgEqOUWbea9xJa1qI0biyTAM5IqkuzAoMvxyPldljKDq+DoaPW3tbYWmDGabizRMFdm+fecMZn1/AmXbj2PJSaH/YP9XDkei/gUZ3LPo21rH6PWksAfJ0k+20uJwoVfBE0uH30J5/Wvol3OCVvXUQKeTVk6497LevoJpCd/XJnAvMdd8EakGE/+o2YnD62ZgfDq35U7GoETWPP9Ik5gSz5gZcg6ir5LCHiQHK08tiKV3gt7RsbgXFdjQ633PXtW4y8MXsBebmDKAiSnBWR64oqKBOpudgkIL8fGRsRaPEMEmhT1I9N/c2UUBjA3giQFPPAxIT+KS3GyOnbPCI4BOTnGETavdye9//y/Ky23Y7U769UukuHgYV84boXU0IYJOCnuQjB+STVllHW6PFwUwuCA9xsj9xZNIMRVR03wAh3pmnq4OPXnG4BaVbXuOs/y99TgcbvKzU1hw9WhMceHplaqqBze16ElBp8SG5T0vxGuvrWPPnqr2xzU1zXzwwS5Gj+lPTs6F3QlKiEgnhT1I5s8Yjq25jS1lVbS0OUlLjOPK8YMZ2t8MmLkk7lvsc3yF3dtArC6B/oaLuCTuyqC9/9qtR1n27hbqG09ND9xdVsOBI7X810+KQ77uTIPnA+o9f8dDHQoJJOgmkqG/77zz9sPtyBHf5ZMbG9v45JM93H77FA0SCRE6UtiDRFEU7r5qHG1ON/UtbViSTRjOuoJ0VHwxI+Iuo8lrw6RLwaj4nT7TYx+s2tde1E87dLyOVesPMWda6E6itXlLsXpexcvp1SjrafB+gF5Jx6y/JWTve6E6+yOjk6tXRRSSf9VBFmc0kJWW2KGon6ZXYkjVZwa9qAM0NTt82rxelX2HQ3sj3zrPW2cV9dNc2D1fh/R9L9SgAt+78aSkxvOtKy/SIE3voaoqquq7rLSIbNJjjxLJibFUWZs7tOl1CsMLQzuHWcXZSXtkLdR1++1TaG5yUF5eS0uLk4yMRK64YjiZmclaR4tIzjYXrz/+Hod3HMfj9pJdmMHtv76+Vy0B3JdJYY8S18wcxmt/30LdWcMxBXnpXDZxUEjfN0k3C7tnAyodfzEYdYND+r4XKjbWwP0PFGO1NtPY0Eb/vDSMRpkL35k/3v82W0rOLINx8piN5+5ezqNv3xVR506Ef1LYo8TkMfkMzMvijXfX0epwM7B/KvO/NdLvkFAwJeoup0VdT4t3HV7qUYgnVhlCP/29IX3fnrJYErFIr/O8mupaOLzTdy2eE2XVlG4qZ+iE0HYWROCksEeRi4fm8OCPZ4T1PRVFIcvwMA7vMVrVDRgpJF43Wnp1vZi9oRVHm8un3dnqoq66wc8rRKQJqLAvX76czZs3YzAYyMzMZOHChSQkyG2s+qJYXR6xyF2fokFGXjrpmcnYGzrOskrPTuXiabJGT28Q0O/0UaNGsWTJEn73u9+RnZ3NihUrgpVLCKERnV7HtffOxpyT2t6WbE5g5oKJJMot6nqFgHrso0ePbv/voqIi1q1bF3AgIYT2xs0dweCxA/j8/77GYXcy88ZJZA20aB1LdFPQxthXrlzJ1KmyIL4Q0SLFksj1P5mjdQzRA4raxdUHTz75JPX1516AAgsWLGDChAkAvPvuuxw8eJD777+/05NmJSUllJSUALB48WKcTv/znwEMBgNud2TNgz5NsvVcJOeTbD0XyfmiLZvR2L37N3RZ2LuyatUqPvvsMx577DFiY7u/+FNFRUWnz1ksFqzW0F4x2VOSreciOZ9k67lIzhdt2XJycrq1XUBDMdu2beP999/nV7/61QUV9WixpaKad3bvp9XtoTA9hR+OHYkpRtb4FkJoK6DCvnTpUtxuN08++SQAQ4YM4c477wxKsEhXcvAIL6zfSr3j1BWX26pq2HvSxnPzZmHQyRI8QgjtBFTYn3/++WDl6HX+vqe0vaiftt9q49OD5cwbUqBRKiGEkCtPe6zB4buaokdV2VF1Ugq7iCiqqvLPP/yLzZ/spq3FQVpWCt/9xVwGXxKc2w2KyCNjBj2UEud7TkGvUxid1XvuCC/6ho9e/YIP/7iGY/urOHm8jtJN5fzxgXdoOGc1UBE9pLD30PeGF5F6TnEfZk5nTqH0gkRk2fzJLpznrP1iPV7HR6+u0SiRCDUZiumhWQUDsJji+euu/bS53Qwxp/FvY0bIiVMRcfwt6AXQcLIpzElEuEhhD8CorH6MkqEXcY7qykZa7U7yBqajD/Gyyd1hyU2loqymQ1tMbAwTrhypUSIRalLYhQiSpsY2Xnj6c44frcPldGPpl8QNN41j3KSBmub6waPXYD3+BhWHToIKsSYjI6YNZsysYZrmEqEjhb0PUFWVFR/sZNuuCtweLwP6p3HrgvHEx/WOi6kcHjdOr5ekGCOgovAvdHwExOBlASqjtI4IwB+fW83+PVXtjyuO1/PX1zcw7OJsEhICu4DP6/Gyc/U+ak/YGDt3JKmZKd1+bUZeOr98ZyGr3tpA5aGTTLhyJMOnFMqa+VFMCnsf8Kc3N7LyizLcbi8Ah4/YqKpp4rEH5kT0l9vhcbNo71eiTcmSAAAPo0lEQVTsabTi8nrIikvk6VEbSDd+jkIbAAob8HA7Kj/QNmubi4pjdT7tJ6ubWLu6jDnzRvR439YTNp64bgkVpVW4HG7+8T+fMmPBZK6/f1639xFrMnLF7Zf2OIPoXbQfABQh5XS62b67sr2on1Z+1Mb+AzWdvCoy/Pfer/i8ppzKtmaszlZqHIfQsaq9qAMoNKDnPejkptrhoqrQ2aJLXk9AyzHx3N2vcmTncVyOUwtG1VU1sHL5V1SUVXXxStFXSWGPck3NDux236LX5nBz9ITvqp2RwuHxsKex4wJJY1IqSDO2+NnaCnS+qFw4xMXHkJ3jOzxizkhk2uVDerxfVVU5UVbp095U28zKN77q8X5FdJPCHuXSUuNJTYnzaU9OimXk8GwNEnWPW/XgUjv+yjjckk6jy3fZUpUkQPubQPz4vssoLMogLj4GnQ6ycpL57g/GkZgU2Pi6waD3225Kjg9ovyJ6yRh7FDtUXsvXm44wMC+d+oY2mppPLYNgNOq5ZFQu2ZnJGifsXILBSFZcAicd9va2Ay0ZHGjOZmzqEU6fGlAxoDIJSNQm6FlS00w8+t/XcOyIDXuLk4IhGRiNgX3FFEXhoilFHD9Q2WGsx5KXzpw7wnvjctF7SGGPQqqq8odla9m8/Th2uwudDizmRIYOzkBRFKZNGsjEsflax+zS/UVTeGz3ao7ZG/ACFmM8OxsfZkza5yjsAfSoTMbLj7SO2k5RFPIHmoO6z3ueu53G+ibKNpfjbHWSnp3KDQ/OIyld+z9mIjJJYY9CO/dUsnHLMdq+Odnm9ULNyWZys1N48N8v1zbcBRiclMafJl5DSfVhbM42rsgqICPWhJfxWkcLq5jYGBa++G+0NrVhb2olPTs1omczCe1JYY9CX6w73F7Uz1Zd0/suITfq9MzLHqx1jIgQnxRHfJLv+RIhziUnT6NQcicn64xG/yfhgs3pcGNv9l3WWAgRHtJjj0LXXDGCDVuOYa09MzXQYNAx5uLckL6v0+HmtWdWcqi0BrfLg7lfEjcvnM6AwRkhfV8hREfSY49CqSnx3HXbFIYUWDCnm8jJSuaKWUOZf93okL7va8/+iw1rDmKtaqK+1s7BvdW88psSnH6GhYQQoSM99ig1YlgW//XIt3C5POj1OnS60J5sczndHN5f7dNefaKedasOMOOKi0L6/kKIM6SwR7mYmPCMq7tdXtwur0+7qkJjfWunr1NVD1bPepq9ZaTrx5GsGy4zPoQIkBR2ERTxCUbMmYnU1Xa85D8l3cTUWUV+X+NWW9jleIJm72FUXFS6PyJFdzHDYx9GUcLzB0mIaCRj7CJobrl3Btl5ae3DPqnpJmZdPYL0DP8X0hxyLqXJW4rKqTv8eLBj826myv1Z2DILEY2kxy6CJm+QhSde+C4bVpfRWN/K5JlDSLN0fnVki/eon1YPNs8msmO+FbqgQkS5oBT2f/zjH/z5z3/m1VdfJTk5ctcfEf411rfy3vINnKxqJCXNxLU3TyAjq2efY4zRwLQ53bszj06J8bvWrQ65CEeIQARc2K1WKzt37sRi0X51PXHhmhpa+e1D71Nx9MxNIkp3V/Kz/7qK7Ly0kL53hn4Gzd7DeDlzcjWGVPJirg/p+woR7QIeY3/99df5wQ9+IDMZeqn3/29Th6IOYK1q4u9/Wh/y986JuZI8w3cwKfkYFTMJSiEFxjtI1BeE/L2FiGYB9dg3bdpEeno6AwcO7HLbkpISSkpKAFi8ePF5e/gGgyFifwFEW7Z6q/+piPZmV9D/P/3ls3APqno3HtWBXonTrIMQbZ9rOEVyvr6arcvC/uSTT1Jf73unnQULFrBixQoeffTRbr1RcXExxcXF7Y+tVmun21oslvM+r6Voy5aY4nvjCoC4eH3Q/z+7zufv7kjhEW2fazhFcr5oy5aTk9Ot7bos7L/85S/9th89epSamhoeeOABAGpra3nooYd46qmnSE1NvYCoQkvX3jyBfTsqqKloaG9Lz0jg2psnaJhKCBGIHg/F5Ofn8+qrr7Y/vueee3jqqadkVkwvk2ZO4BeLrubvy9ZTV9tMYnIc1948gfyCyPz5KoTomsxjF2RkJXPXI3O0jiGECJKgFfYXX3wxWLsSQggRAFlSQAghoowUdiGEiDJS2IUQIspIYRdCiCgjhV0IIaKMTHcUQeH2ePlwbSk7yqqJi43hhssvYlBOaBcRE0L4J4VdBExVVRb9aQ07yqrwfrMM7+5DNfzo22OZNipf23BC9EEyFCMCtrW0kr3lJ9uLOkB9cxvvrdmnXSgh+jDpsYtuqW9u4/01+6hrbOXSMQMYNzS7fSXGLfsrcbg8fl/j8XjR66X/IEQ4SWEXXdp35CTP/nUdNXWnVl9ct/s4Ey7K5ec3TkFRFAbnpWPQ63B7vB1elxhvlKIuhAbkWye69MZH29uLOoDD5WHz/gr2lJ8EYPqoAQzM7riipynOwOWXDAxnTCHEN6THLs5LVVVqG+w+7a0ON19uO8qIQf3Q63U8fsfl/On/beVYTSMxMTpmjStg1rhB4Q8shJDCLs5PURTijDG+7UBuRlL740STkXu/NymMyYQQnZGhGNGlicNzMRr0Hdr690tmzsRCjRIJIc5HeuyiSzfNHYlBr2PDnhM4XG4y0xP58bfHEWuUfz5CRCL5ZoouKYrC94sv5vvFF2sdRQjRDTIUI4QQUUYKuxBCRBkp7EIIEWWksAshRJSRwi6EEFFGCrsQQkQZRVVVtevNhBBC9BYR2WN/+OGHtY7QKcnWc5GcT7L1XCTn66vZIrKwCyGE6Dkp7EIIEWX0TzzxxBNah/CnoKBA6widkmw9F8n5JFvPRXK+vphNTp4KIUSUkaEYIYSIMhGxuuOzzz5LRUUFAHa7HZPJxNNPP+2z3T333ENcXBw6nQ69Xs/ixYtDnu3tt9/m888/Jzk5GYAbb7yRsWPH+my3bds2li1bhtfrZfbs2Vx33XUhz7Z8+XI2b96MwWAgMzOThQsXkpCQ4LNduI9bV8fC5XLxwgsvcOjQIZKSkvjpT39Kv379QpoJwGq18uKLL1JfX4+iKBQXFzNv3rwO2+zevZvf/va37XkmTZrEd7/73ZBng64/J1VVWbZsGVu3biU2NpaFCxeGbZihoqKCZ599tv1xTU0N8+fP56qrrmpvC+exe+mll9iyZQspKSksWbIEgObmZp599llOnjxJRkYGP/vZz0hMTPR57apVq3j33XcBuP7667n88stDni3s31U1wrz++uvqO++84/e5hQsXqg0NDWHN89Zbb6nvv//+ebfxeDzqvffeq1ZVVakul0u9//771WPHjoU827Zt21S3262qqqouX75cXb58ud/twnncunMsPv74Y/WVV15RVVVVv/zyS/WZZ54JSzabzaYePHhQVVVVtdvt6n333eeTbdeuXepTTz0Vljzn6upz2rx5s7po0SLV6/Wq+/fvVx955JEwpjvD4/GoP/rRj9SampoO7eE8drt371YPHjyo/vznP29vW758ubpixQpVVVV1xYoVfr8PTU1N6j333KM2NTV1+O9QZwv3dzWihmJUVeXrr79m2rRpWke5IGVlZWRlZZGZmYnBYGDq1Kls3Lgx5O87evRo9PpTdzYqKirCZrOF/D270p1jsWnTpvZe0uTJk9m1axdqGE71pKWltfdw4+Pjyc3NjYhj1l2bNm1ixowZKIpCUVERLS0t1NXVhT3Hzp07ycrKIiMjI+zvfdrw4cN9euMbN27ksssuA+Cyyy7z+x3ctm0bo0aNIjExkcTEREaNGsW2bdtCni3c39WIGIo5be/evaSkpJCdnd3pNosWLQJgzpw5FBcXhyXXJ598wpo1aygoKODWW2/1+dBsNhtms7n9sdls5sCBA2HJdtrKlSuZOnVqp8+H67h151icvY1er8dkMtHU1NQ+3BUONTU1HD58mMGDB/s8V1paygMPPEBaWhq33HILeXl5Yct1vs/JZrNhsVjaH5vNZmw2G2lpaWHLB/DVV1912vnS8tg1NDS0H4u0tDQaGxt9tjn332d6enrY/7iH47satsL+5JNPUl9f79O+YMECJkyYAJz/H8zpfaSnp9PQ0MCvf/1rcnJyGD58eEizzZ07t32c8K233uKNN95g4cKFHbbz19tUFCXgXF1lO33c3n33XfR6PdOnT+90H6E4bv5051iE8nh1R1tbG0uWLOG2227DZDJ1eG7QoEG89NJLxMXFsWXLFp5++mmee+65sOTq6nPS+rgBuN1uNm/ezE033eTznJbHLhDhPIbh+q6GrbD/8pe/PO/zHo+HDRs2nPdkQXp6OgApKSlMmDCBsrKyoBSorrKdNnv2bH7zm9/4tJvNZmpra9sf19bWBq0X1VW2VatWsXnzZh577LFO/4GG6rj5051jcXobs9mMx+PBbrf7PckVCm63myVLljB9+nQmTZrk8/zZhX7s2LEsXbqUxsbGsPya6OpzMpvNWK3W9sfB/HfWXVu3bmXQoEGkpqb6PKflsYNTx62uro60tDTq6ur8vm96ejp79uxpf2yz2UL2XThXOL+rETPGvnPnTnJycjr8TDpbW1sbra2t7f+9Y8cO8vPzQ57r7DHMDRs2+P1pWVhYSGVlJTU1NbjdbtauXcv48eNDnm3btm28//77PPTQQ8TGxvrdJtzHrTvHYty4caxatQqAdevWMWLEiLD0mlRV5eWXXyY3N5err77a7zb19fXtPeOysjK8Xi9JSUkhz9adz2n8+PGsWbMGVVUpLS3FZDJF1DCMVsfutPHjx7N69WoAVq9e3f6L9mxjxoxh+/btNDc309zczPbt2xkzZkzIs4X7uxoxFyi9+OKLDBkyhLlz57a32Ww2XnnlFR555BGqq6v53e9+B5zq3V966aVcf/31Ic/1/PPPU15ejqIoZGRkcOedd5KWltYhG8CWLVt4/fXX8Xq9zJw5MyzZ/v3f/x23293e2x0yZAh33nmn5sfN37F46623KCwsZPz48TidTl544QUOHz5MYmIiP/3pT8nMzAxpJoB9+/bx2GOPkZ+f3/6H5MYbb2zvBc+dO5ePP/6YTz/9FL1ej9Fo5NZbb2Xo0KEhz9bZ5/Tpp5+2Z1NVlaVLl7J9+3aMRiMLFy6ksLAw5NlOczgc3H333bzwwgvtvfOz84Xz2P3+979nz549NDU1kZKSwvz585kwYQLPPvssVqsVi8XCz3/+cxITEzl48CCfffYZd911F3BqjHvFihXAqemOM2fODHm2FStWhPW7GjGFXQghRHBEzFCMEEKI4JDCLoQQUUYKuxBCRBkp7EIIEWWksAshRJSRwi6EEFFGCrsQQkQZKexCCBFl/j9pTTU0nY/sOQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.scatter(pca_res[:,0], pca_res[:,1], c=df.Cluster)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "df['SMILES'] = df.ROMol.apply(Chem.MolToSmiles)\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "92fa73726aa1402fa227ca8abbc7b84d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "08b535349135422badf672199068bc01", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='mols', max=47, style=ProgressStyle(description_width='initial…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "df = chemistry.smiles2mol(df, smiles_col='SMILES', mols_col='newROMol', progressbar='notebook')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ClusterIDMODEL.CCRATIOMODEL.SOURCEROMolb_mmffld_Minimization_Converged-OPLS_2005idr_mmffld_Potential_Energy-OPLS_2005r_mmffld_RMS_Derivative-OPLS_2005s_st_Chirality_1s_st_Chirality_2s_st_Chirality_3SMILESnewROMol
01ZINC038144571CORINA 3.44 0027 09.01.2008\"Mol\"/1ZINC03814457-78.64540.000213629NaNNaNNaNCC(C)C(=O)COc1nc(N)nc2[nH]cnc12\"Mol\"/
12ZINC038144591CORINA 3.44 0027 09.01.2008\"Mol\"/1ZINC03814459-67.47059.48919e-0513_S_17_12_14_24NaNNaNNc1nc(OCC2CCCO2)c2nc[nH]c2n1\"Mol\"/
" ], "text/plain": [ " Cluster ID MODEL.CCRATIO MODEL.SOURCE ROMol b_mmffld_Minimization_Converged-OPLS_2005 id r_mmffld_Potential_Energy-OPLS_2005 r_mmffld_RMS_Derivative-OPLS_2005 s_st_Chirality_1 s_st_Chirality_2 s_st_Chirality_3 SMILES newROMol\n", "0 1 ZINC03814457 1 CORINA 3.44 0027 09.01.2008 \"Mol\"/ 1 ZINC03814457 -78.6454 0.000213629 NaN NaN NaN CC(C)C(=O)COc1nc(N)nc2[nH]cnc12 \"Mol\"/\n", "1 2 ZINC03814459 1 CORINA 3.44 0027 09.01.2008 \"Mol\"/ 1 ZINC03814459 -67.4705 9.48919e-05 13_S_17_12_14_24 NaN NaN Nc1nc(OCC2CCCO2)c2nc[nH]c2n1 \"Mol\"/" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(2)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# add_column function is not native pandas method.\n", "# df['NumAtm'] = df.ROMol.apply(Chem.Mol.GetAtoms) \n", "df = df.add_column('NumAtm', [Chem.Mol.GetNumAtoms(mol) for mol in df.ROMol])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.head(2)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }