{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Machine learning - Features extraction\n", "\n", "Runs binary and multi-class classifiers on a given dataset.\n", "Dataset are read as Parquet file. The dataset must contain a feature vector named \"features\" and a classification column.\n", "\n", "## Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import mltoolkit\n", "from pyspark.sql import SparkSession\n", "import numpy as np\n", "import sklearn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Configure Spark Session" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "spark = SparkSession.builder.appName(\"datasetClassifier\").getOrCreate()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read in data from parquet file" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total number of data: 18491\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
structureChainIdalphabetacoilfoldTypefeatures
01LBU.A0.3615020.1079810.530516other[-0.03669819220865391, 0.13017714411934028, 0....
11LC0.A0.4103450.2758620.313793alpha+beta[0.017792403316538488, 0.06889735366958401, 0....
21LC5.A0.4281690.1577460.414084alpha+beta[0.12736012024892182, -0.0036459625095703716, ...
31LFP.A0.4279840.2345680.337449alpha+beta[0.07269115472257498, -0.010540929652990833, 0...
41LFW.A0.3226500.2735040.403846alpha+beta[-0.027897640212830196, 0.0941510383131058, 0....
\n", "
" ], "text/plain": [ " structureChainId alpha beta coil foldType \\\n", "0 1LBU.A 0.361502 0.107981 0.530516 other \n", "1 1LC0.A 0.410345 0.275862 0.313793 alpha+beta \n", "2 1LC5.A 0.428169 0.157746 0.414084 alpha+beta \n", "3 1LFP.A 0.427984 0.234568 0.337449 alpha+beta \n", "4 1LFW.A 0.322650 0.273504 0.403846 alpha+beta \n", "\n", " features \n", "0 [-0.03669819220865391, 0.13017714411934028, 0.... \n", "1 [0.017792403316538488, 0.06889735366958401, 0.... \n", "2 [0.12736012024892182, -0.0036459625095703716, ... \n", "3 [0.07269115472257498, -0.010540929652990833, 0... \n", "4 [-0.027897640212830196, 0.0941510383131058, 0.... " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "parquetFile = './input_features/'\n", "data = spark.read.parquet(parquetFile).cache()\n", "\n", "df = data.toPandas()\n", "print(f\"Total number of data: {df.shape[0]}\")\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Select only alpha and beta foldType" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total number of data: 4937\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
structureChainIdalphabetacoilfoldTypefeatures
51LGH.A0.8571430.00.142857alpha[0.23627377279441464, 0.05140024884180589, 0.4...
61LGH.B0.7441860.00.255814alpha[0.07006392560221933, -0.05091538017785007, 0....
71LGH.D0.8571430.00.142857alpha[0.23627377279441464, 0.05140024884180589, 0.4...
81LGH.E0.7441860.00.255814alpha[0.07006392560221933, -0.05091538017785007, 0....
91LGH.G0.8571430.00.142857alpha[0.23627377279441464, 0.05140024884180589, 0.4...
\n", "
" ], "text/plain": [ " structureChainId alpha beta coil foldType \\\n", "5 1LGH.A 0.857143 0.0 0.142857 alpha \n", "6 1LGH.B 0.744186 0.0 0.255814 alpha \n", "7 1LGH.D 0.857143 0.0 0.142857 alpha \n", "8 1LGH.E 0.744186 0.0 0.255814 alpha \n", "9 1LGH.G 0.857143 0.0 0.142857 alpha \n", "\n", " features \n", "5 [0.23627377279441464, 0.05140024884180589, 0.4... \n", "6 [0.07006392560221933, -0.05091538017785007, 0.... \n", "7 [0.23627377279441464, 0.05140024884180589, 0.4... \n", "8 [0.07006392560221933, -0.05091538017785007, 0.... \n", "9 [0.23627377279441464, 0.05140024884180589, 0.4... " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df[(df.foldType == 'alpha') | (df.foldType == 'beta')]\n", "\n", "print(f\"Total number of data: {df.shape[0]}\")\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Basic dataset information and setting" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Feature count : 50\n", "Class count : 2\n", "\n", "Dataset size (unbalanced) : 4937\n", "alpha 3684\n", "beta 1253\n", "Name: foldType, dtype: int64\n", "\n", "Dataset size (balanced) : 2506\n", "beta 1253\n", "alpha 1253\n", "Name: foldType, dtype: int64\n" ] } ], "source": [ "label = 'foldType'\n", "testFraction = 0.1\n", "seed = 123\n", "\n", "\n", "vector = df.features.iloc[0]\n", "featureCount = len(vector)\n", "print(f\"Feature count : {featureCount}\")\n", "\n", " \n", "classCount = df.foldType.unique().size\n", "print(f\"Class count : {classCount}\\n\")\n", "\n", " \n", "print(f\"Dataset size (unbalanced) : {df.shape[0]}\")\n", "print(df.foldType.value_counts())\n", "\n", "\n", "df = mltoolkit.downsample(df, 'foldType')\n", "print(f\"\\nDataset size (balanced) : {df.shape[0]}\")\n", "print(df.foldType.value_counts())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Random Forest Classifier" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Class\tTrain\tTest\n", "\n", "alpha\t1126\t127\n", "\n", "beta\t1129\t124\n", "\n", "Total time taken: 0.20052623748779297\n", "\n", "Methods\tRandomForestClassifier\n", "AUC\t0.9726631953263907\n", "F Score\t0.9032258064516129\n", "Accuracy\t0.9043824701195219\n", "Precision\t0.9032258064516129\n", "Recall\t0.9032258064516129\n", "False Positive Rate\t0.0967741935483871\n", "True Positive Rate\t0.905511811023622\n", "\t\n", "Confusion Matrix\n", "['alpha' 'beta']\n", "[[115 12]\n", " [ 12 112]]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/miniconda3/envs/mmtf-workshop-2018/lib/python3.6/site-packages/sklearn/ensemble/forest.py:248: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3XeYFFXWx/HvEQQMiAjoCoggQZKIOAbMEZFV0TXBogsm1pxWV13DquuaWLMoAuYAKitiYEVfRdlVEVFACSpIBpWMIILMcN4/bs3QDBN6humu6enf53n6obuquutU09On771V55q7IyIiArBV3AGIiEjloaQgIiIFlBRERKSAkoKIiBRQUhARkQJKCiIiUkBJQZJmZr3M7N2446hMzGy1me0Rw36bmpmbWfV07zsVzGyKmR1RjufpM1nBlBQylJnNNrNfoy+lH83sGTPbPpX7dPcX3b1LKveRyMwOMrMPzGyVma00szfNrG269l9EPB+a2fmJy9x9e3efmaL9tTKzV81sSXT8X5nZ1WZWLRX7K68oObXYktdw93bu/mEp+9ksEab7M5kNlBQy24nuvj3QEdgHuCHmeMqlqF+7ZtYZeBcYATQEmgGTgI9T8cu8sv3iNrPmwGfAPGAvd68DnA7kALUreF+xHXtle98FcHfdMvAGzAaOSXh8L/B2wuOawL+AucBPwABgm4T13YGJwM/A90DXaHkd4EngB2ABcAdQLVrXB/hfdP9x4F+FYhoBXB3dbwj8G1gMzAIuT9juVmAY8EK0//OLOL7/Ao8Vsfw/wHPR/SOA+cDfgCXRe9Irmfcg4bnXAT8CzwN1gbeimJdH9xtH2/8TyAPWAquBR6PlDrSI7j8D9AfeBlYRvtSbJ8TTBfgWWAk8BnxU1LFH276Q+P9ZxPqm0b57R8e3BLgxYf3+wKfAiuj/8lGgRsJ6By4BpgOzomUPEZLQz8AXwKEJ21eL3ufvo2P7AtgNGBO91i/R+3JmtP0JhM/XCuAToEOhz+51wFfAOqA6CZ/nKPbxURw/AfdHy+dG+1od3TqT8JmMtmkHvAcsi577t7j/VjPtFnsAupXzP27TP6LGwNfAQwnrHwDeAHYi/LJ8E7grWrd/9MV0LKG12AhoHa0bDjwBbAfsDIwD/hytK/gDBA6LvkAselwX+JWQDLaKvjRuAWoAewAzgeOibW8F1gMnR9tuU+jYtiV8AR9ZxHGfA/wQ3T8CyAXuJySAw6Mvpz2TeA/yn3tP9NxtgHrAqdH+awOvAq8n7PtDCn2Js3lSWBq9v9WBF4Gh0br60ZfcH6J1V0TvQXFJ4UfgnBL+/5tG+x4Uxb434Qu2TbR+X+DAaF9NgWnAlYXifi96b/IT5VnRe1Ad+EsUQ61o3bWEz9iegEX7q1f4PYge7wMsAg4gJJPehM9rzYTP7kRCUtkmYVn+5/lT4Ozo/vbAgYWOuXrCvvqw8TNZm5AA/wLUih4fEPffaqbdYg9At3L+x4U/otWEX20OvA/sGK0zwpdj4q/Uzmz8RfgE8EARr7lL9MWS2KLoCYyO7if+ARrhl9th0eMLgA+i+wcAcwu99g3A09H9W4ExJRxb4+iYWhexriuwPrp/BOGLfbuE9a8ANyfxHhwB/Jb/pVdMHB2B5QmPP6T0pDA4YV034Jvo/p+ATxPWGSGpFpcU1hO13opZn/8F2Thh2TigRzHbXwkMLxT3UaV8xpYDe0f3vwW6F7Nd4aTwOPCPQtt8Cxye8Nk9t4jPc35SGAPcBtQv5piLSwo9gQmp/LvLhpv68zLbye7+f2Z2OPAS4dfoCqAB4dfuF2aWv60RfrVB+IU2sojX2x3YGvgh4XlbEb68NuHubmZDCX+IY4A/Ero88l+noZmtSHhKNUKXUL7NXjPBcmADsCvwTaF1uxK6Sgq2dfdfEh7PIbRWSnsPABa7+9qClWbbEloXXQktH4DaZlbN3fNKiDfRjwn31xB+6RLFVHDM0fs3v4TXWUo41nLtz8xaEVpQOYT3oTqh9ZZok/8DM7sGOC+K1YEdCJ8pCJ+Z75OIB8L/f28zuyxhWY3odYvcdyHnAbcD35jZLOA2d38rif2WJUYphgaaqwB3/4jwK/Vf0aIlhK6cdu6+Y3Sr42FQGsIfZPMiXmoeoaVQP+F5O7h7u2J2PQQ4zcx2J7QO/p3wOrMSXmNHd6/t7t0Swy7heH4hdCGcXsTqMwitonx1zWy7hMdNgIVJvAdFxfAXQvfIAe6+A6GLDEIyKTHmJPxAaAGFFwyZqnHxm/N/hK6s8nqckFBbRsfyNzYeR76C4zGzQ4G/Et7fuu6+I6GLMf85xX1mijIP+Geh//9t3X1IUfsuzN2nu3tPQvflPcCw6P+4tPd/HqGrUraAkkLV8SBwrJnt7e4bCH3ND5jZzgBm1sjMjou2fRI4x8yONrOtonWt3f0Hwhk/95nZDtG65lFLZDPuPoHw5TsYGOXu+S2DccAqM7vOzLYxs2pm1t7M9ivD8VxP+LV5uZnVNrO6ZnYHoQvotkLb3mZmNaIvthOAV5N4D4pSm5BIVpjZTsDfC63/ifJ/6bwN7GVmJ0dn3FwC/K6E7f8OHGRm/czsd1H8LczsBTPbMYn91SaMYaw2s9bARUlsn0sYZK9uZrcQWgr5BgP/MLOWFnQws3rRusLvyyDgQjM7INp2OzP7vZklddaUmZ1lZg2i/8P8z9SGKLYNFP9/8Bawq5ldaWY1o8/NAcnsUzZSUqgi3H0x8BxhcBfC2R0zgLFm9jPhl+ee0bbjCAO2DxB+DX5EaPJD6PuuAUwldOMMo+RujJeAY6J/82PJI3w5dySceZSfOOqU4Xj+BxxHGJj9gdAttA9wiLtPT9j0xyjOhYSB3QvdPb/Lqdj3oBgPEgZtlwBjgXcKrX+I0DJabmYPJ3ss0fEsIbR87iV0DbUlnGGzrpjtvyckwKbAFDNbSWiJjSeMI5XmGkKX3irCl/TLpWw/inC83xHe67Vs2sVzP2G85l1CsnmS8F5BGCN61sxWmNkZ7j6eMMb0KOH/Zgah7z9ZXQnHvJrwnvdw91/dfQ3hLLCPo30dmPgkd19FOHniRMLnYjpwZBn2K2w8c0Qk40RXwL7g7iV1w1RKZrYV4ZTYXu4+Ou54RPKppSCSJmZ2nJntaGY12djHPzbmsEQ2oaQgkj6dCWfHLCF0cZzs7r/GG5LIptR9JCIiBdRSEBGRAhl38Vr9+vW9adOmcYchIpJRvvjiiyXu3qC07TIuKTRt2pTx48fHHYaISEYxsznJbKfuIxERKaCkICIiBZQURESkgJKCiIgUUFIQEZECKUsKZvaUmS0ys8nFrDcze9jMZkQTkndKVSwiIpKcVLYUniFUOyzO8UDL6NaXUP9dRERilLLrFNx9jJk1LWGT7oQJ2J1Q2nhHM9s1qukvklbusH49/PILrFmT/C0v2fnYRLbA1r/9wnZrFnPo2U3ZryyzkpRDnBevNWLTeu3zo2WbJQUz60toTdCkSZO0BCeVR14e/Ppr2b6sy3Mrzxe8FZ7LTKSCHekfMJALWEkdxrUez377pXYoOCOuaHb3gcBAgJycHFXwy0Br18KyZZveli/ffFnibeXK8GW9rshpaEq21Vaw3Xaw7bab3+rXL3p5MrfE16xVC6pVKz0WkXJZsQKuvRYGD4YWLWDwA3Q6PPXnBsWZFBYQJtrO1zhaJpWUO6xeXfyXeElf8r+WUCC6enXYaSeoWzf8u+uu0K4d1KlT/Bd7abett9aveMlgeXlw0EHw7bfw17/CrbfCNtuU+rSKEGdSeAO41MyGEiZ9X6nxhPTIywu/wkv6lV7cl3xubvGvu802G7/Yd9oJmjeH/fbb+Li42/bb6wtcBIClS8MfRbVq8M9/wm67QU5OWkNIWVIwsyHAEUB9M5tPmIh8awB3HwCMBLoR5m9dQ5gzWCqQO7zzDgwcCPPnb/ySX7EirCtO7dqbfmnvtVfpX+x166bth4xI1eMOL74IV1wBd98NF1wAp5wSSyipPPuoZynrHbgkVfvPZnl58O9/w113wcSJ0LAhdOgArVqV/uW+446h60VE0mTePLjwQhg5Eg48EA4+ONZwMmKgWZLz22/wwgvhh8b06SEJPPUU9OoFNWrEHZ2IbGbIEPjzn8MvuQcfhEsvjf3sBSWFKuCXX8IJCv/6V+gm2mcfePXV0PrU2TEilVjdunDAAaGPt1mzuKMBlBQy2vLl0L8/PPQQLFkChx0GgwbBccdp4FakUsrNhQceCM36G2+Erl0r3R+skkIG+vHH8Ll6/HFYtQq6dYMbboBDDok7MhEp1qRJcN558MUXcMYZYXDZrFIlBFCV1IwyezZccgk0bRq6irp1gwkT4O23lRBEKq116+Dmm8OppfPmhb7doUMrXTLIp5ZCBpg6NQwev/RSuFK3d+9wPUvLlnFHJiKlmj4d7rkH/vhHuP9+qFcv7ohKpKRQyaxaFZLA1KkwZUpoCXzwQbhK9/LL4eqroXHjuKMUkRKtXg0jRoRT/9q3h2++gT32iDuqpCgpxCQ3N3QtTpmyMQFMmRJal/lq1oTWrUPL8/LLQ80eEank3nsP+vaFOXOgUydo0yZjEgIoKcTiq6+gT5/QCoBQWK11azj00FDzp107aNs2fI50SqlIhli+HK65Jlwc1KoVfPRRSAgZRkkhjdavD2MD//hHOD35qadCImjWTF/+IhktLy9cifzdd+FUwFtuCb/2MpCSQppMmgTnnBNaBz17wsMPqztIJOMtWbKxgN2dd0KTJqHLKIPplNQUW78ebr89nI22cCG89lo4i0gJQSSDucNzz4VuosGDw7KTT874hABqKaTU5Mlw9tmhKN0f/xhaB5X8bDQRKc2cOaFe0ahRYc6Dww6LO6IKpaSQIuPHwzHHhG7F4cPDjwgRyXAvvAAXXRRaCo88AhdfHC4eqkKUFFLg88/h2GNDV+OHH4ZuRhGpAho0CAPKTzwBu+8edzQpoaRQwfITQr16MHq0EoJIRlu/Hu67L/x7882heF2XLpW2REVFqFrtnpiNG6eEIFJlTJgQylrfcEO4wjR/usIqnBBASaHCjBsXfkDUq6cuI5GMtnYt/O1vYYLxhQvDNIZDhlT5ZJBPSaECJLYQPvwwzLUtIhlqxoxQhvhPf4Jp0+APf4g7orRSUthCn30WEkKDBkoIIhlr9Wp4/vlwv317+PbbUHKgbt1444qBksIW+Oyz0GWkhCCSwUaNCgXHevcOLQOoNFNjxkFJoZzGjt00IaictUiGWbo0JIKuXUNt+v/+NyML2FU0nZJaDmPHhjPTdt45nGWkhCCSYfIL2M2YEeZKvummjC1gV9GUFMoov4Wwyy5KCCIZZ/HicEZItWphNrTdd4eOHeOOqlJR91EZJCYEdRmJZBB3ePrpUMBu0KCwrHt3JYQiKCkk6dNPN00IjRrFHZGIJGX27NDfe+65sNdecOSRcUdUqSkpJOGTT8Jn6ne/U0IQySjPPx9OMf30U3jssfAH3KpV3FFVahpTKMUnn4STE373uzCGoIQgkkF22SWUth4wQGUGkqSkUIL8FsKuu4YfGA0bxh2RiJRo/Xq4995wdtEtt4Q+3y5d4o4qo6j7qBj5CaFhQyUEkYzw5ZehXtFNN4UrkvML2EmZKCkU4eOPNyaE0aOVEEQqtV9/heuvh/33h59+CrNavfhi1hSwq2gpTQpm1tXMvjWzGWZ2fRHrm5jZaDObYGZfmVm3VMaTjI8/DmMISggiGWLmTLj/fujTJ5S41jSHWyRlScHMqgH9geOBtkBPM2tbaLObgFfcfR+gB/BYquJJxm+/wamnqstIpNL7+Wd45plwv107mD4dBg/OygJ2FS2VLYX9gRnuPtPdfwOGAt0LbePADtH9OsDCFMZTqtdeC63Phx4Kg8siUgmNHBlOMz3vvI0F7Kro1JhxSGVSaATMS3g8P1qW6FbgLDObD4wELivqhcysr5mNN7PxixcvTkWsADz+OOyxh05WEKmUliyBs8+G3/8eatcOfb0qYFfh4h5o7gk84+6NgW7A82a2WUzuPtDdc9w9p0GDBikJZMoUGDMG/vxn2Crud0VENpVfwG7o0HCq6ZdfwoEHxh1VlZTK6xQWAIkzDDSOliU6D+gK4O6fmlktoD6wKIVxFWnAAKhRA845J917FpFi/fRTqE9frVqYDW333aFDh7ijqtJS+Zv4c6ClmTUzsxqEgeQ3Cm0zFzgawMzaALWA1PUPFWP1anjuOTj99PD5E5GYucOTT8Kee8LAgWHZiScqIaRBypKCu+cClwKjgGmEs4ymmNntZnZStNlfgAvMbBIwBOjjnv4rToYMCSczXHRRuvcsIpuZOROOOQbOPz9UMT3mmLgjyioWw3fwFsnJyfHx48dX2Ou5w777Qm4uTJqk611EYvXss3DxxaG7qF8/uOACDfJVEDP7wt1zStsu62sfjRsHEyaEAopKCCIxa9gQjjoqnAqoCUtikfVJ4fHHYfvt4ayz4o5EJAv99hvcfTds2AC33grHHhtuEpusbpctWwYvvxwSQu3acUcjkmU+/zz03f7972EcIcO6squqrE4KzzwDa9dqgFkkrdasgWuuCdcZLF8Ob7wRTv9T/22lkLVJYcOGcG3CQQfpLDeRtJo1Cx55JAwiT5kSTjWVSiNrk8IHH4QaWmoliKTBypXw9NPhfrt2MGNG+FVWp068cclmsjYpPP441KsHp50WdyQiVdzbb4dEcP758M03Ydluu5X8HIlNViaFhQthxAg491yoVSvuaESqqMWLoVcvOOGEUNL600+hdeu4o5JSZOUpqYMHh/paf/5z3JGIVFF5eXDIIWH84LbbwsxoNWrEHZUkIamkENUuauLuM1IcT8rl5YVSKscdB82bxx2NSBXz44+w887hiuT77oOmTcPcB5IxSu0+MrPfA18D70WPO5rZ8FQHliqLF8OCBaFFKyIVZMMGeOIJaNUq/Avhj0wJIeMkM6ZwO3AAsALA3ScCLVIZVCqtXBn+3WmneOMQqTJmzICjj4YLL4T99gvNcMlYySSF9e6+otCyjL30MD8p6Ew4kQrw9NOw115h0ptBg+D//i9MXygZK5kxhWlmdgawlZk1Ay4HxqY2rNRZEaW3HXeMNw6RKqFJk9Ay6N8fGhWebVcyUTIthUuBfYENwGvAOuCKVAaVSmopiGyBdetC4bpbbgmPjz4aXn9dCaEKSSYpHOfu17n7PtHteuD4VAeWKmopiJTTZ5+FAna33QZz56qAXRWVTFK4qYhlN1Z0IOmiloJIGf3yC1x9NXTuHP6A3norVJNUAbsqqdgxBTM7DugKNDKz+xNW7UDoSspIK1eGiZy23z7uSEQyxJw5YRaqCy8Mcx/ssEPcEUkKlTTQvAiYDKwFpiQsXwVcn8qgUmnFitBK0I8ckRKsWAHDhoV6RW3bhtNONRNaVig2Kbj7BGCCmb3o7mvTGFNKrVypriOREo0YEcoHL1oUSlW0bq2EkEWSGVNoZGZDzewrM/su/5byyFJkxQoNMosUadEi6NEDTj4ZGjSAsWNVwC4LJZMUngGeBoxw1tErwMspjCml1FIQKUJeHhx8MAwfDnfcAePHQ05O3FFJDJJJCtu6+ygAd//e3W8ig09JVVIQSbBwYahbVK0aPPQQTJgAN94IW28dd2QSk2SSwjoz2wr43swuNLMTgYyd5l7dRyKERPD446F7aMCAsKxbtzCoLFktmTIXVwHbEcpb/BOoA5ybyqBSSS0FyXrffRfmRx4zBo45Bo7P2Ia/pECpScHdP4vurgLOBjCzjLymfcOGkBTUUpCs9eSTcOmlYcrBp56CPn10frZsosTuIzPbz8xONrP60eN2ZvYc8FlJz6usVq8OV+arpSBZq2nT0DKYOhXOOUcJQTZTbFIws7uAF4FewDtmdiswGpgEtEpLdBVMJS4k66xbBzfdFG4QCti99hrsumu8cUmlVVL3UXdgb3f/1cx2AuYBe7n7zPSEVvFUDE+yyiefwHnnwTffwLnnhmayWgZSipK6j9a6+68A7r4M+C6TEwKopSBZYvVquOKKcDXymjXwzjthLEEJQZJQUlLYw8xei27DgWYJj19L5sXNrKuZfWtmM8ysyHpJZnaGmU01sylm9lJ5DiJZ+S0FJQWp0ubODfMkX3IJTJ6s6TGlTErqPjq10ONHy/LCZlYN6A8cC8wHPjezN9x9asI2LYEbgIPdfbmZ7VyWfZRVfktB3UdS5SxfDq++Cn37hmsNZs6Ehg3jjkoyUEkF8d7fwtfeH5iR3+VkZkMJ4xRTE7a5AOjv7sujfS7awn2WSN1HUiUNHw4XXwyLF8Phh8OeeyohSLklc0VzeTUiDE7nmx8tS9QKaGVmH5vZWDPrWtQLmVlfMxtvZuMXL15c7oDUfSRVyo8/wumnwx/+AL/7HYwbFxKCyBZI5ormVO+/JXAE0BgYY2Z7ufuKxI3cfSAwECAnJ6fccwCuXAk1a4brdkQyWl4eHHoozJsHd94J11yjekVSIZJOCmZW093XleG1FwC7JTxuHC1LNB/4zN3XA7Oiktwtgc/LsJ+k5U+wI5Kx5s8PXUPVqsHDD0OzZipvLRWq1O4jM9vfzL4GpkeP9zazR5J47c+BlmbWzMxqAD2ANwpt8zqhlUB01XQrIGWnvarEhWSsDRvgkUdCAnj88bDs+OOVEKTCJTOm8DBwArAUwN0nAUeW9iR3zwUuBUYB04BX3H2Kmd1uZidFm40ClprZVMLV0te6+9KyH0ZyVAxPMtI338Bhh8Hll4drD044Ie6IpApLpvtoK3efY5te+JKXzIu7+0hgZKFltyTcd+Dq6JZyKpstGWfw4FDAbttt4dln4eyzdRGapFQyLYV5ZrY/4GZWzcyuBDJyOk61FCTjNG8OJ54I06bBn/6khCApl0xL4SJCF1IT4Cfg/6JlGUcDzVLprV0Lt98e7t95Jxx5ZLiJpEkySSHX3XukPJI00ECzVGoffxwK2H37LZx/vgrYSSyS6T763MxGmllvM8vYaTjXrw+1wdRSkEpn1Sq47LJw3cG6dTBqFAwapIQgsSg1Kbh7c+AOYF/gazN73cwyruWgEhdSac2fHwaUL7sMvv4aunSJOyLJYkmVuXD3T9z9cqAT8DNh8p2MomJ4UqksXbrxeoM2bUIBu4cegu23jzcuyXrJXLy2vZn1MrM3gXHAYuCglEdWwVT3SCoFdxg2LFQyvfzyMH4AmglNKo1kBponA28C97r7f1McT8qopSCx++GHMMfB8OGw777w7rsqYCeVTjJJYQ9335DySFJMYwoSq/wCdgsWwL33wlVXQfW461GKbK7YT6WZ3efufwH+bWabVSZ19z+kNLIKpu4jicW8edCoUShg179/KGDXqlXcUYkUq6SfKi9H/5ZpxrXKSi0FSau8vJAEbrghtAwuuUTTYkpGKGnmtXHR3TbuvkliMLNLgS2dmS2t1q8P/9asGW8ckgWmTQsXoX36aahkeuKJcUckkrRkTkk9t4hl51V0ICJVwsCB0LEjfPcdPP88vP02NGkSd1QiSStpTOFMwhwIzczstYRVtYEVRT9LJMu1bAmnnBImwNl557ijESmzksYUxhHmUGgM9E9YvgqYkMqgRDLGr7/CrbeGkhR3360CdpLxShpTmAXMIlRFFZHCxowJheumT4cLL1QBO6kSih1TMLOPon+Xm9myhNtyM1uWvhBFKpmff4aLL4bDDw9nGb3/fihZoYQgVUBJ3Uf5beD66QhEJGMsXAjPPANXXx3mPthuu7gjEqkwxbYUEq5i3g2o5u55QGfgz4D+CiS7LFkCjz0W7rduDbNmwX33KSFIlZPMKamvE6bibA48DbQEXkppVCKVhTu8/HIoYHflleFUU4Bddok3LpEUSSYpbHD39cAfgEfc/SqgUWrDEqkEFi6Ek0+GHj1g993hiy9UokKqvKSm4zSz04GzgZOjZVunLiSRSiAvDw47LBSw+9e/4IorVMBOskIyn/JzgYsJpbNnmlkzYEhqwxKJyZw50LhxKGD32GOwxx7QokXcUYmkTTLTcU4GLgfGm1lrYJ67/zPlkYmkU14e3H9/mAUtf0a0Ll2UECTrlNpSMLNDgeeBBYABvzOzs93941QHJ5IWkyeHAnbjxsEJJ4RxBJEslUz30QNAN3efCmBmbQhJIieVgYmkxYABYVrMOnXgpZfCoLIuQpMslszZRzXyEwKAu08DaqQuJJE08GjeqDZt4PTTYepU6NlTCUGyXjIthS/NbADwQvS4FyqIJ5lqzRq45ZYwkHzPPaFUxeGHxx2VSKWRTEvhQmAm8NfoNpNwVbNIZvnwQ+jQIVyJvHr1xtaCiBQosaVgZnsBzYHh7n5vekISqWArV8Jf/xomwGneHD74QOWtRYpRUpXUvxFKXPQC3jOzomZgE6n8fvgBXngBrrkGvvpKCUGkBCV1H/UCOrj76cB+wEVlfXEz62pm35rZDDO7voTtTjUzNzOd0SQVY/FieOSRcL91a5g9G/r1g223jTUskcqupKSwzt1/AXD3xaVsuxkzq0aYse14oC3Q08zaFrFdbeAK4LOyvL5IkdzDqaVt2sBf/rKxgF2DBvHGJZIhSvqi38PMXotuw4HmCY9fK+F5+fYHZrj7THf/DRgKdC9iu38A9wBryxy9SKJ58+DEE6FXr3Al8oQJKmAnUkYlDTSfWujxo2V87UbAvITH84EDEjcws07Abu7+tpldW9wLmVlfoC9AkyZNyhiGZIXcXDjiCPjxR3jgAbjssnDaqYiUSUlzNL+fyh2b2VbA/UCf0rZ194HAQICcnBydRygbzZ4Nu+0WKpg+8UQoYLfHHnFHJZKxyjROUEYLCLO25WscLctXG2gPfGhms4EDgTc02CxJyc0NJa3btNk4I9oxxyghiGyhVBaI/xxoGZXaXgD0AP6Yv9LdV5Iw/7OZfQhc4+7jUxiTVAVffRUK2I0fD927w6mFezpFpLySbimYWc2yvLC75wKXAqOAacAr7j7FzG43s5PKFqZI5LHHYN99w7wHL78Mw4dDw4ZxRyVSZSRTOnt/4EmgDtDEzPYGznf3y0p7rruPBEYWWnZLMdsekUzAkqXcQ7G69u1DJdMHHoD69UvTy2epAAAVV0lEQVR/noiUSTLdRw8DJxCubsbdJ5mZLgmV9PjlF7jppjCQ3K9fmCLzsMPijkqkykqm+2grd59TaFleKoIR2cT778Nee8GDD8K6dSpgJ5IGySSFeVEXkptZNTO7EvguxXFJNluxAs4/P5xNVL06jBkDDz+suQ5E0iCZpHARcDXQBPiJcOpomesgiSTtp59g6FC47jqYNAkOPTTuiESyRqljCu6+iHA6qUjq5CeCK66APfcMF6VpIFkk7ZI5+2gQsFlnrrv3TUlEkl3c4cUXQzJYvRq6dYOWLZUQRGKSTPfR/wHvR7ePgZ2BdakMSrLE3Lnw+9/D2WeH1sHEiSEhiEhskuk+ejnxsZk9D/wvZRFJdsgvYLdoURhEvvhiFbATqQTKU+aiGbBLRQciWWLmTNh993BW0aBBYXrMpk3jjkpEIqV2H5nZcjNbFt1WAO8BN6Q+NKlScnPhnnugbVvo3z8sO/poJQSRSqbEloKZGbA3G6ubbnDXFURSRhMnhgJ2X34Jp5wCp58ed0QiUowSWwpRAhjp7nnRTQlByubRR2G//WDBAhg2DF57DXbdNe6oRKQYyZx9NNHM9kl5JFK15P9+6NAhTI85dapKXItkgGK7j8yselT+eh/gczP7HvgFMEIjolOaYpRMsno13HgjbL11mARHBexEMkpJYwrjgE6A5j6Q5Lz7LvTtG64/uOyyjeWuRSRjlJQUDMDdv09TLJKpli+Hq6+GZ54JF6GNGQOHHBJ3VCJSDiUlhQZmdnVxK939/hTEI5lo0aIwiHzDDXDLLVCrVtwRiUg5lZQUqgHbE7UYRDbx448wZAhcddXGAnb16sUdlYhsoZKSwg/ufnvaIpHM4A7PPReSwZo1cMIJoV6REoJIlVDSKalqIcimZs+Grl2hT59wZbIK2IlUOSW1FI5OWxRS+eXmwpFHwpIloUzFhRfCVslc5iIimaTYpODuy9IZiFRSM2ZAs2ahgN1TT8Eee4SCdiJSJemnnhRt/Xq4805o125jAbsjj1RCEKniylM6W6q6L78MBewmTgzF6848M+6IRCRN1FKQTT38MOy/fzjl9LXX4JVXYBdNnyGSLZQUJMgvYLfPPvCnP4UCdqecEm9MIpJ26j7KdqtWhSuRa9aE++6DQw8NNxHJSmopZLN33oH27eGxx0JLQdNliGQ9JYVstHQp9O4Nxx8P220HH38M99+viqYioqSQlZYuheHD4eabYcIE6Nw57ohEpJJIaVIws65m9q2ZzTCz64tYf7WZTTWzr8zsfTPTSfCp8sMPYdIbd2jVCubMgdtvD2MJIiKRlCUFM6sG9AeOB9oCPc2sbaHNJgA57t4BGAbcm6p4spZ7uBK5TZvQMpgxIyyvWzfeuESkUkplS2F/YIa7z3T334ChQPfEDdx9tLuviR6OBRqnMJ7sM2sWdOkSLkTbe2+YNEkF7ESkRKk8JbURMC/h8XzggBK2Pw/4T1ErzKwv0BegSZMmFRVf1ZabC0cdFcYPHn88TJOpAnYiUopKcZ2CmZ0F5ACHF7Xe3QcCAwFycnJ03mRJpk8PReuqV4enn4bmzWG33eKOSkQyRCp/Oi4AEr+NGkfLNmFmxwA3Aie5+7oUxlO1rV8Pd9wRrjt49NGw7IgjlBBEpExS2VL4HGhpZs0IyaAH8MfEDcxsH+AJoKu7L0phLFXb+PFh3OCrr6BHD+jZM+6IRCRDpayl4O65wKXAKGAa8Iq7TzGz283spGizfoR5oF81s4lm9kaq4qmyHnoIDjggTH4zYkSYN3nnneOOSkQyVErHFNx9JDCy0LJbEu4fk8r9V2nu4QrknJzQSrj3Xthxx7ijEpEMVykGmqUMfv4ZrrsOatWCBx6Agw8ONxGRCqBzFDPJyJFhJrSBA8PZRSpgJyIVTEkhEyxZAmedBb//PdSpA598Av36qYCdiFQ4JYVMsHw5vPkm/P3vYarMA0q6BlBEpPw0plBZLVgAL74I114bSlPMmaOBZBFJObUUKht3GDQI2raFW2+F778Py5UQRCQNlBQqk++/h6OPDnWKOnUKF6O1aBF3VCKSRdR9VFnk5oaEsGwZPPEEnH++CtiJSNopKcTt229D0brq1eHZZ8P9xqogLiLx0E/RuPz2G9x2G+y1F/TvH5YdfrgSgojESi2FOIwbF0pTTJ4Mf/wj9OoVd0QiIoBaCun34IPQufPGaw9efBHq1487KhERQEkhffJLUuy/P1xwAUyZAiecEG9MIiKFqPso1VauhL/+FbbZJrQSDjoo3EREKiG1FFLpzTfDRWiDB0PNmipgJyKVnpJCKixeHAaQTzoJ6tWDsWPhnntUwE5EKj0lhVRYuTKUub7ttjBV5n77xR2RiEhSNKZQUebNgxdegOuvD6Up5swJZa5FRDKIWgpbasMGGDAgTH5zxx0bC9gpIYhIBlJS2BLTp8NRR8FFF4VTTb/+WgXsRCSjqfuovHJz4dhjYcUKePJJOOccDSSLSMZTUiiradPCpDfVq8Pzz4cCdg0bxh2VSKW0fv165s+fz9q1a+MOJWvUqlWLxo0bs/XWW5fr+UoKyVq3Du68M9z69YMrr4RDD407KpFKbf78+dSuXZumTZtiakmnnLuzdOlS5s+fT7Nmzcr1GhpTSMbYsWHSm9tvh5494eyz445IJCOsXbuWevXqKSGkiZlRr169LWqZKSmU5r77QlmKVavCtQfPPRcuSBORpCghpNeWvt9KCsXZsCH827kzXHhhKHN9/PHxxiQikmJKCoWtWBHmOrjiivD4oIPgscdghx3ijUtEyu3111/HzPjmm28Kln344YecUKhScZ8+fRg2bBgQBsmvv/56WrZsSadOnejcuTP/+c9/tjiWu+66ixYtWrDnnnsyatSoIrf54IMP6NSpE+3bt6d3797k5uYC0K9fPzp27EjHjh1p37491apVY9myZVscUyIlhUSvvx4K2D37LNSurQJ2IlXEkCFDOOSQQxgyZEjSz7n55pv54YcfmDx5Ml9++SWvv/46q1at2qI4pk6dytChQ5kyZQrvvPMOF198MXl5eZtss2HDBnr37s3QoUOZPHkyu+++O88++ywA1157LRMnTmTixIncddddHH744ey0005bFFNhOvsIYNEiuPRSePVV6NgR3norDCyLSIW58kqYOLFiX7Njx1CRviSrV6/mf//7H6NHj+bEE0/ktttuK/V116xZw6BBg5g1axY1a9YEYJddduGMM87YonhHjBhBjx49qFmzJs2aNaNFixaMGzeOzp07F2yzdOlSatSoQatWrQA49thjueuuuzjvvPM2ea0hQ4bQs2fPLYqnKGopAPz8M7z3Hvzzn2GqTCUEkSpjxIgRdO3alVatWlGvXj2++OKLUp8zY8YMmjRpwg5JdBtfddVVBV06ibe77757s20XLFjAbrvtVvC4cePGLFiwYJNt6tevT25uLuPHjwdg2LBhzJs3b5Nt1qxZwzvvvMOpp55aanxllb0thblzw8Vnf/tbKE0xd27oMhKRlCjtF32qDBkyhCuiMcIePXowZMgQ9t1332LP0inr2TsPPPDAFsdYeP9Dhw7lqquuYt26dXTp0oVq1aptss2bb77JwQcfXOFdR5DipGBmXYGHgGrAYHe/u9D6msBzwL7AUuBMd5+dypjYsAEeGwDXXRfun3lmSApKCCJVzrJly/jggw/4+uuvMTPy8vIwM/r160e9evVYvnz5ZtvXr1+fFi1aMHfuXH7++edSWwtXXXUVo0eP3mx5jx49uP766zdZ1qhRo01+9c+fP59GjRpt9tzOnTvz3//+F4B3332X7777bpP1Q4cOTUnXERCugEvFjZAIvgf2AGoAk4C2hba5GBgQ3e8BvFza6+67775eHvfe696Kbzz3oEPdwf3YY91nzSrXa4lIcqZOnRrr/p944gnv27fvJssOO+ww/+ijj3zt2rXetGnTghhnz57tTZo08RUrVri7+7XXXut9+vTxdevWubv7okWL/JVXXtmieCZPnuwdOnTwtWvX+syZM71Zs2aem5u72XY//fSTu7uvXbvWjzrqKH///fcL1q1YscLr1q3rq1evLnY/Rb3vwHhP4rs7lWMK+wMz3H2mu/8GDAW6F9qmO/BsdH8YcLSl6EoXy8tlFMex1dSv4emnYdQoaNo0FbsSkUpiyJAhnHLKKZssO/XUUxkyZAg1a9bkhRde4JxzzqFjx46cdtppDB48mDpR2fs77riDBg0a0LZtW9q3b88JJ5yQ1BhDSdq1a8cZZ5xB27Zt6dq1K/379y/oGurWrRsLFy4Ewqmnbdq0oUOHDpx44okcddRRBa8xfPhwunTpwnbbbbdFsRTHPEWnXZrZaUBXdz8/enw2cIC7X5qwzeRom/nR4++jbZYUeq2+QF+AJk2a7DtnzpwyxzNiBIx/8H/c+FRzajXbtbyHJSJlMG3aNNq0aRN3GFmnqPfdzL5w95zSnpsRA83uPhAYCJCTk1OuLNa9O3TvfkiFxiUiUtWksvtoAbBbwuPG0bIitzGz6kAdwoCziIjEIJVJ4XOgpZk1M7MahIHkNwpt8wbQO7p/GvCBp6o/S0RioT/p9NrS9ztlScHdc4FLgVHANOAVd59iZreb2UnRZk8C9cxsBnA1cH3RryYimahWrVosXbpUiSFNPJpPoVatWuV+jZQNNKdKTk6O51/pJyKVm2ZeS7/iZl6rUgPNIpKZtt5663LPACbxUO0jEREpoKQgIiIFlBRERKRAxg00m9lioOyXNAf1gSWlblW16Jizg445O2zJMe/u7g1K2yjjksKWMLPxyYy+VyU65uygY84O6ThmdR+JiEgBJQURESmQbUlhYNwBxEDHnB10zNkh5cecVWMKIiJSsmxrKYiISAmUFEREpECVTApm1tXMvjWzGWa2WeVVM6tpZi9H6z8zs6bpj7JiJXHMV5vZVDP7yszeN7Pd44izIpV2zAnbnWpmbmYZf/piMsdsZmdE/9dTzOyldMdY0ZL4bDcxs9FmNiH6fHeLI86KYmZPmdmiaGbKotabmT0cvR9fmVmnCg0gmYmcM+kGVAO+B/YAagCTgLaFtrkYGBDd7wG8HHfcaTjmI4Fto/sXZcMxR9vVBsYAY4GcuONOw/9zS2ACUDd6vHPccafhmAcCF0X32wKz4457C4/5MKATMLmY9d2A/wAGHAh8VpH7r4othf2BGe4+091/A4YC3Qtt0x14Nro/DDjazCyNMVa0Uo/Z3Ue7+5ro4VjCTHiZLJn/Z4B/APcAVaF2czLHfAHQ392XA7j7ojTHWNGSOWYHdoju1wEWpjG+CufuY4BlJWzSHXjOg7HAjmZWYRPPV8Wk0AiYl/B4frSsyG08TAa0EqiXluhSI5ljTnQe4ZdGJiv1mKNm9W7u/nY6A0uhZP6fWwGtzOxjMxtrZl3TFl1qJHPMtwJnmdl8YCRwWXpCi01Z/97LRPMpZBkzOwvIAQ6PO5ZUMrOtgPuBPjGHkm7VCV1IRxBag2PMbC93XxFrVKnVE3jG3e8zs87A82bW3t03xB1YJqqKLYUFwG4JjxtHy4rcxsyqE5qcS9MSXWokc8yY2THAjcBJ7r4uTbGlSmnHXBtoD3xoZrMJfa9vZPhgczL/z/OBN9x9vbvPAr4jJIlMlcwxnwe8AuDunwK1CIXjqqqk/t7Lqyomhc+BlmbWzMxqEAaS3yi0zRtA7+j+acAHHo3gZKhSj9nM9gGeICSETO9nhlKO2d1Xunt9d2/q7k0J4ygnuXsmz+WazGf7dUIrATOrT+hOmpnOICtYMsc8FzgawMzaEJLC4rRGmV5vAH+KzkI6EFjp7j9U1ItXue4jd881s0uBUYQzF55y9ylmdjsw3t3fAJ4kNDFnEAZ0esQX8ZZL8pj7AdsDr0Zj6nPd/aTYgt5CSR5zlZLkMY8CupjZVCAPuNbdM7YVnOQx/wUYZGZXEQad+2TyjzwzG0JI7PWjcZK/A1sDuPsAwrhJN2AGsAY4p0L3n8HvnYiIVLCq2H0kIiLlpKQgIiIFlBRERKSAkoKIiBRQUhARkQJKClLpmFmemU1MuDUtYdumxVWTLOM+P4wqcU6KSkTsWY7XuNDM/hTd72NmDRPWDTazthUc5+dm1jGJ51xpZttu6b4lOygpSGX0q7t3TLjNTtN+e7n73oRiif3K+mR3H+Duz0UP+wANE9ad7+5TKyTKjXE+RnJxXgkoKUhSlBQkI0Qtgv+a2ZfR7aAitmlnZuOi1sVXZtYyWn5WwvInzKxaKbsbA7SInnt0VKf/66jOfc1o+d22cX6Kf0XLbjWza8zsNEJ9qRejfW4T/cLPiVoTBV/kUYvi0XLG+SkJhdDM7HEzG29hHoXbomWXE5LTaDMbHS3rYmafRu/jq2a2fSn7kSyipCCV0TYJXUfDo2WLgGPdvRNwJvBwEc+7EHjI3TsSvpTnR2UPzgQOjpbnAb1K2f+JwNdmVgt4BjjT3fciVAC4yMzqAacA7dy9A3BH4pPdfRgwnvCLvqO7/5qw+t/Rc/OdCQwtZ5xdCWUt8t3o7jlAB+BwM+vg7g8TSkkf6e5HRqUvbgKOid7L8cDVpexHskiVK3MhVcKv0Rdjoq2BR6M+9DxCTZ/CPgVuNLPGwGvuPt3Mjgb2BT6PyntsQ0gwRXnRzH4FZhPKL+8JzHL376L1zwKXAI8S5md40szeAt5K9sDcfbGZzYxq1kwHWgMfR69bljhrEMqWJL5PZ5hZX8Lf9a6ECWe+KvTcA6PlH0f7qUF430QAJQXJHFcBPwF7E1q4m02a4+4vmdlnwO+BkWb2Z8LsVM+6+w1J7KNXYsE8M9upqI2iejz7E4qwnQZcChxVhmMZCpwBfAMMd3e38A2ddJzAF4TxhEeAP5hZM+AaYD93X25mzxAKwxVmwHvu3rMM8UoWUfeRZIo6wA9RjfyzCcXRNmFmewAzoy6TEYRulPeB08xs52ibnSz5+am/BZqaWYvo8dnAR1EffB13H0lIVnsX8dxVhPLdRRlOmD2rJyFBUNY4o4JvNwMHmllrwsxjvwArzWwX4PhiYhkLHJx/TGa2nZkV1eqSLKWkIJniMaC3mU0idLn8UsQ2ZwCTzWwiYS6F56Izfm4C3jWzr4D3CF0rpXL3tYQKlK+a2dfABmAA4Qv2rej1/kfRffLPAAPyB5oLve5yYBqwu7uPi5aVOc5orOI+QiXUSYS5mb8BXiJ0SeUbCLxjZqPdfTHhzKgh0X4+JbyfIoCqpIqISAK1FEREpICSgoiIFFBSEBGRAkoKIiJSQElBREQKKCmIiEgBJQURESnw/2MqVYm13vmZAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Sample predictions: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexstructureChainIdalphabetacoilfoldTypefeaturesindexedLabelpredictions
0176333SO6.Q0.0000000.3076920.692308beta[0.04679717467381404, 0.3444721248860543, 0.65...1beta
1158775AZW.A0.0000000.6382980.361702beta[-0.21980827627213378, -0.003240449422676312, ...1beta
2109085EWO.A0.0000000.4813080.518692beta[0.004428481034901699, 0.08782819747137079, 0....1beta
3124582V0X.B0.6439020.0146340.341463alpha[0.13609181969768855, -0.03941942743447602, 0....0alpha
4114524J4A.J0.7777780.0000000.222222alpha[0.16031929521033397, -0.2720281567711097, 0.3...0alpha
5132622OLT.A0.8691590.0000000.130841alpha[0.10041343866212837, 0.057986875398463644, 0....0alpha
670734Y9V.A0.0431180.4693200.487562beta[-0.05767635944842671, 0.2050873197059935, 0.4...1beta
790365MJ3.A0.0231790.5331130.443709beta[0.08743077671674432, 0.05430643786294539, 0.4...1beta
8107614AFK.A0.0069770.5651160.427907beta[0.04967376806404385, 0.18319203607183707, 0.4...1beta
971774B1M.B0.0000000.6625770.337423beta[0.010340684371919411, -0.10917012139891638, 0...1beta
\n", "
" ], "text/plain": [ " index structureChainId alpha beta coil foldType \\\n", "0 17633 3SO6.Q 0.000000 0.307692 0.692308 beta \n", "1 15877 5AZW.A 0.000000 0.638298 0.361702 beta \n", "2 10908 5EWO.A 0.000000 0.481308 0.518692 beta \n", "3 12458 2V0X.B 0.643902 0.014634 0.341463 alpha \n", "4 11452 4J4A.J 0.777778 0.000000 0.222222 alpha \n", "5 13262 2OLT.A 0.869159 0.000000 0.130841 alpha \n", "6 7073 4Y9V.A 0.043118 0.469320 0.487562 beta \n", "7 9036 5MJ3.A 0.023179 0.533113 0.443709 beta \n", "8 10761 4AFK.A 0.006977 0.565116 0.427907 beta \n", "9 7177 4B1M.B 0.000000 0.662577 0.337423 beta \n", "\n", " features indexedLabel predictions \n", "0 [0.04679717467381404, 0.3444721248860543, 0.65... 1 beta \n", "1 [-0.21980827627213378, -0.003240449422676312, ... 1 beta \n", "2 [0.004428481034901699, 0.08782819747137079, 0.... 1 beta \n", "3 [0.13609181969768855, -0.03941942743447602, 0.... 0 alpha \n", "4 [0.16031929521033397, -0.2720281567711097, 0.3... 0 alpha \n", "5 [0.10041343866212837, 0.057986875398463644, 0.... 0 alpha \n", "6 [-0.05767635944842671, 0.2050873197059935, 0.4... 1 beta \n", "7 [0.08743077671674432, 0.05430643786294539, 0.4... 1 beta \n", "8 [0.04967376806404385, 0.18319203607183707, 0.4... 1 beta \n", "9 [0.010340684371919411, -0.10917012139891638, 0... 1 beta " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "\n", "clf = RandomForestClassifier()\n", "mcc = mltoolkit.MultiClassClassifier(clf, 'foldType', testFraction=0.1)\n", "matrics = mcc.fit(df)\n", "for k,v in matrics.items(): print(f\"{k}\\t{v}\")\n", "\n", "# Plot ROC \n", "mltoolkit.plot_roc(mcc.TPR, mcc.FPR, mcc.AUC) \n", "\n", "print(\"Sample predictions: \")\n", "mcc.prediction.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Logistic Regression Classifier" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Class\tTrain\tTest\n", "\n", "alpha\t1129\t124\n", "\n", "beta\t1126\t127\n", "\n", "Total time taken: 0.10818099975585938\n", "\n", "Methods\tLogisticRegression\n", "AUC\t0.9591694183388366\n", "F Score\t0.8924302788844621\n", "Accuracy\t0.8924302788844621\n", "Precision\t0.9032258064516129\n", "Recall\t0.8818897637795275\n", "False Positive Rate\t0.0967741935483871\n", "True Positive Rate\t0.8818897637795275\n", "\t\n", "Confusion Matrix\n", "['alpha' 'beta']\n", "[[112 12]\n", " [ 15 112]]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/miniconda3/envs/mmtf-workshop-2018/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xu8VXP+x/HXR6lccpmKoYuiooukjsit3GPK/RIxMtJg3DIMxjD0MwahcYkkdyo0oqERgzQMUpRuSrrn0kVRKJ36/P74rnPanc5ln8va6+xz3s/HYz/O3mutvddn7XPO/uzv97vW52vujoiICMBWSQcgIiKVh5KCiIjkU1IQEZF8SgoiIpJPSUFERPIpKYiISD4lBUmbmfUyszeSjqMyMbM1ZrZnAvttamZuZjUzve84mNl0M+tahufpb7KCKSlkKTObb2Y/Rx9K35jZk2a2fZz7dPfn3P3YOPeRyswONrO3zWy1mX1vZv8ys9aZ2n8h8Ywzsz6py9x9e3efG9P+WprZi2a2PDr+z8zsajOrEcf+yipKTs3L8xru3sbdx5Wwny0SYab/JqsDJYXs1sPdtwfaA/sDNyQcT5kU9m3XzDoDbwCvALsDzYApwPtxfDOvbN+4zWwv4CNgEbCvu+8InAHkAHUreF+JHXtle98FcHfdsvAGzAeOTnl8F/BayuPawN3AQuBbYDCwTcr6k4DJwA/Al0C3aPmOwGPA18AS4DagRrSuN/BedP9h4O4CMb0CXB3d3x34J7AMmAdckbLdLcBI4Nlo/30KOb7/Ag8VsvzfwNPR/a7AYuDPwPLoPemVznuQ8tzrgG+AZ4CdgVejmFdG9xtF2/8N2ACsBdYAD0bLHWge3X8SGAS8BqwmfKjvlRLPscAs4HvgIeDdwo492vbZ1N9nIeubRvs+Pzq+5cCNKes7AR8Aq6Lf5YNArZT1DvwB+AKYFy27j5CEfgAmAYelbF8jep+/jI5tEtAYGB+91o/R+3JWtH13wt/XKuB/QLsCf7vXAZ8B64CapPw9R7FPjOL4Frg3Wr4w2tea6NaZlL/JaJs2wJvAd9Fz/5z0/2q23RIPQLcy/uI2/ydqBEwF7ktZPxAYDfyK8M3yX8Dfo3Wdog+mYwitxYbAPtG6UcAjwHbALsAE4PfRuvx/QODw6APEosc7Az8TksFW0YfGzUAtYE9gLnBctO0twHrg5GjbbQoc27aED+AjCjnuC4Cvo/tdgVzgXkIC6BJ9OO2dxnuQ99w7o+duA9QDTov2Xxd4EXg5Zd/jKPAhzpZJYUX0/tYEngNGROvqRx9yp0brrozeg6KSwjfABcX8/ptG+340in0/wgdsq2h9R+CgaF9NgZnAVQXifjN6b/IS5bnRe1AT+GMUQ51o3bWEv7G9AYv2V6/gexA93h9YChxISCbnE/5ea6f87U4mJJVtUpbl/T1/AJwX3d8eOKjAMddM2VdvNv1N1iUkwD8CdaLHByb9v5ptt8QD0K2Mv7jwT7SG8K3NgbeAnaJ1RvhwTP2W2plN3wgfAQYW8pq7Rh8sqS2Ks4F3ovup/4BG+OZ2ePT4IuDt6P6BwMICr30D8ER0/xZgfDHH1ig6pn0KWdcNWB/d70r4YN8uZf0LwE1pvAddgV/yPvSKiKM9sDLl8ThKTgpDU9adAHwe3f8t8EHKOiMk1aKSwnqi1lsR6/M+IBulLJsA9Cxi+6uAUQXiPrKEv7GVwH7R/VnASUVsVzApPAz8X4FtZgFdUv52f1fI33NeUhgP3ArUL+KYi0oKZwOfxvl/Vx1u6s/Lbie7+3/MrAswjPBtdBXQgPBtd5KZ5W1rhG9tEL6hjSnk9fYAtga+TnneVoQPr824u5vZCMI/4njgHEKXR97r7G5mq1KeUoPQJZRni9dMsRLYCOwGfF5g3W6ErpL8bd39x5THCwitlZLeA4Bl7r42f6XZtoTWRTdCywegrpnVcPcNxcSb6puU+z8RvukSxZR/zNH7t7iY11lBONYy7c/MWhJaUDmE96EmofWWarPfgZldA1wYxerADoS/KQh/M1+mEQ+E3//5ZnZ5yrJa0esWuu8CLgT6A5+b2TzgVnd/NY39liZGKYIGmqsAd3+X8C317mjRckJXTht33ym67ehhUBrCP+RehbzUIkJLoX7K83Zw9zZF7Ho4cLqZ7UFoHfwz5XXmpbzGTu5e191PSA27mOP5kdCFcEYhq88ktIry7Gxm26U8bgJ8lcZ7UFgMfyR0jxzo7jsQusggJJNiY07D14QWUHjBkKkaFb05/yF0ZZXVw4SE2iI6lj+z6Tjy5B+PmR0G/Inw/u7s7jsRuhjznlPU30xhFgF/K/D739bdhxe274Lc/Qt3P5vQfXknMDL6HZf0/i8idFVKOSgpVB3/AI4xs/3cfSOhr3mgme0CYGYNzey4aNvHgAvM7Cgz2ypat4+7f0044+ceM9shWrdX1BLZgrt/SvjwHQqMdfe8lsEEYLWZXWdm25hZDTNra2YHlOJ4rid827zCzOqa2c5mdhuhC+jWAtveama1og+27sCLabwHhalLSCSrzOxXwF8LrP+Wsn/ovAbsa2YnR2fc/AH4dTHb/xU42MwGmNmvo/ibm9mzZrZTGvurSxjDWGNm+wCXpLF9LmGQvaaZ3UxoKeQZCvyfmbWwoJ2Z1YvWFXxfHgUuNrMDo223M7PfmFlaZ02Z2blm1iD6Heb9TW2MYttI0b+DV4HdzOwqM6sd/d0cmM4+ZRMlhSrC3ZcBTxMGdyGc3TEH+NDMfiB889w72nYCYcB2IOHb4LuEJj+Evu9awAxCN85Iiu/GGAYcHf3Mi2UD4cO5PeHMo7zEsWMpjuc94DjCwOzXhG6h/YFD3f2LlE2/ieL8ijCwe7G753U5FfkeFOEfhEHb5cCHwOsF1t9HaBmtNLP70z2W6HiWE1o+dxG6hloTzrBZV8T2XxISYFNgupl9T2iJTSSMI5XkGkKX3mrCh/TzJWw/lnC8swnv9Vo27+K5lzBe8wYh2TxGeK8gjBE9ZWarzOxMd59IGGN6kPC7mUPo+09XN8IxryG85z3d/Wd3/4lwFtj70b4OSn2Su68mnDzRg/B38QVwRCn2K2w6c0Qk60RXwD7r7sV1w1RKZrYV4ZTYXu7+TtLxiORRS0EkQ8zsODPbycxqs6mP/8OEwxLZjJKCSOZ0Jpwds5zQxXGyu/+cbEgim1P3kYiI5FNLQURE8mXdxWv169f3pk2bJh2GiEhWmTRp0nJ3b1DSdlmXFJo2bcrEiROTDkNEJKuY2YJ0tlP3kYiI5FNSEBGRfEoKIiKST0lBRETyKSmIiEi+2JKCmT1uZkvNbFoR683M7jezOdGE5B3iikVERNITZ0vhSUK1w6IcD7SIbn0J9d9FRCRBsV2n4O7jzaxpMZucRJiA3QmljXcys92imv4isRgyBIYNK3k7kcqkzoYf2Wn9Mn59UFP+8Y9495XkmEJDNq/XvjhatgUz62tmE81s4rJlyzISnFRNw4bB5MlJRyGSvv1Xvs1jE9vRf/qpmG+MfX9ZcUWzuw8BhgDk5OSogp/kK+03/8mToX17GDcutpBEKsaqVXDttTB0KDRvDkMHMrBL/N/jk0wKSwgTbedpFC2TaqY8XTrvvht+dil0wtAttW8P55xTtn2JZMyGDXDwwTBrFvzpT3DLLbDNNiU+rSIkmRRGA5eZ2QjCpO/fazyhesrr0mnfvvTP7dIlfMj37VvxcYlk3IoV8KtfQY0a8Le/QePGkJOT0RBiSwpmNhzoCtQ3s8WEici3BnD3wcAY4ATC/K0/EeYMlkoo7sFZdelItecOzz0HV14Jd9wBF10Ep5ySSChxnn10dgnrHfhDXPuXsiksAZS2i6a01KUj1dqiRXDxxTBmDBx0EBxySKLhZMVAs2ROYV056qIRicnw4fD734cxhH/8Ay67LHQdJUhJoRoqrjtIXTkiGbTzznDggeGfslmzpKMBVPuoWiruXH115YjEKDcXBgwIg8gA3brBG29UmoQAailkrfIM/qo1IJKAKVPgwgth0iQ488wwuGwWbpWIWgpZqjxX5qo1IJJB69bBTTeFU0sXLYIXX4QRIypdMsijlkIW07d9kSzwxRdw553hm9i990K9eklHVCwlhUqqpO6hsl7sJSIZsGYNvPIK9OoFbdvC55/DnnsmHVValBQSVNwHf0nXBqgLSKSSevPNcP72ggXQoQO0apU1CQGUFBJVXHkHXRsgkmVWroRrroHHH4eWLcM3u1atko6q1JQUEpDXQtBZQCJVxIYN4Urk2bPhhhvg5puhTp2koyoTJYUEpCYEdQGJZLHlyzcVsLv9dmjSJHQZZTGdkpqQvBaCuodEspA7PP106CYaOjQsO/nkrE8IoKQgIlI6CxbA8cfD+eeHMYPDD086ogqlpCAikq5nnw2nmL73HjzwAPz3v7DPPklHVaE0piAikq4GDcKA8iOPwB57JB1NLJQUMiT1mgRdeCaSJdavh3vuCT9vugmOOw6OPbbSlqioCOo+ypDUWkU660gkC3z6aShrfcMNMGNGGFyGKp0QQC2F2OmaBJEss3Yt9O8Pd90F9evDP/8Jp56adFQZo6QQg9SuotRyFWodiGSBOXPg7rvht78NXUc775x0RBmlpBCD1JaBylWIZIE1a2DUKDjvvHB20axZlWrim0xSUoiJuopEssTYseFb26JFYc6DVq2qbUIADTRXqCFDoGvXsk9+IyIZtGJFuACtWzfYdttwzUEWFrCraGopVCDVNBLJEnkF7ObMgRtvhL/8JWsL2FU0JYVyKuz6A3UbiVRSy5aFmc9q1Aizoe2xhy4aKkDdR+Wk6w9EsoA7PPFEKGD36KNh2UknKSEUQi2FCqDWgUglNn9+GEh+80047DA44oikI6rU1FIQkarrmWfCKaYffAAPPRS+vbVsmXRUlZpaCiJSde26ayhtPXhwmABHSqSkICJVx/r1oTzFhg1hSsxjjw03SZu6j0SkavjkEzjggHB66axZmwrYSakoKYhIdvv5Z7j+eujUCb79NpSreO65Kl/NNC6xJgUz62Zms8xsjpldX8j6Jmb2jpl9amafmdkJccYjIlXQ3Llw773Qu3cocX3yyUlHlNViSwpmVgMYBBwPtAbONrPWBTb7C/CCu+8P9AQeiiueiqaSFiIJ+uEHePLJcL9NG/jiCxg6tNpVNI1DnC2FTsAcd5/r7r8AI4CTCmzjwA7R/R2Br2KMp0KppIVIQsaMCaeZXnghzJwZllXRqTGTEOfZRw2BRSmPFwMHFtjmFuANM7sc2A44urAXMrO+QF+AJgmeVqaSFiIJWr4c+vWDZ5+F1q3h/fdVwC4GSQ80nw086e6NgBOAZ8xsi5jcfYi757h7ToMGDTIeZF5X0e9/v2nSHLUQRDIor4DdiBHhVNNPPoGDDko6qiopzpbCEqBxyuNG0bJUFwLdANz9AzOrA9QHlsYYV6nldRVpwhyRDPv2W2jQIBSwu/vu0E3Url3SUVVpcbYUPgZamFkzM6tFGEgeXWCbhcBRAGbWCqgDLIsxplJJHUzO6ypSQhDJAHd47DHYe+/wjwjQo4cSQgbElhTcPRe4DBgLzCScZTTdzPqb2YnRZn8ELjKzKcBwoLd75bniRIPJIgmYOxeOPhr69An/fEcXOtQoMYm1zIW7jwHGFFh2c8r9GcAhccZQXhpMFsmgp56CSy8N3UWDB8NFF8FWSQ99Vi+qfSQilcfuu8ORR8LDD0OjRklHUy0pKRRQ2GmnIhKTX36BO+6AjRvhllvgmGPCTRKjdlkBmklNJEM+/hg6doS//jWMI1Se4cRqTS2FQmgcQSRGP/0UrjUYOBB22w1Gjw5nFkmloJaCiGTWvHnwwANhEHn6dCWESkYtBRGJ3/ffw0svwQUXhAJ2c+ZA48YlP08yTi0FEYnXa6+FRNCnD3z+eVimhFBpKSmISDyWLYNevaB791DS+oMPYJ99ko5KSqDuIxGpeBs2wKGHhvGDW28NM6PVqpV0VJKGtJJCVLuoibvPiTkeEclm33wDu+wSrki+5x5o2jTMfSBZo8TuIzP7DTAVeDN63N7MRsUdmIhkkY0b4ZFHoGXL8BNCt5ESQtZJZ0yhP2FynFUA7j4ZaB5nUCKSRebMgaOOgosvhgMOgOOOSzoiKYd0ksJ6d19VYJkuPRQReOIJ2HffMOnNo4/Cf/4De+6ZdFRSDumMKcw0szOBrcysGXAF8GG8YYlIVmjSJLQMBg2Chg2TjkYqQDothcuAjsBG4CVgHXBlnEGJSCW1bl0oXHdzVAH/qKPg5ZeVEKqQdJLCce5+nbvvH92uB46POzARqWQ++igUsLv1Vli4UAXsqqh0ksJfCll2Y0UHIiKV1I8/wtVXQ+fOoVzFq6/Ck0+CWdKRSQyKHFMws+OAbkBDM7s3ZdUOhK4kEakOFiyAhx4KZxfdcQfssEPSEUmMihtoXgpMA9YC01OWrwaujzMoEUnYqlUwcmSoV9S6dTjtVDOhVQtFJgV3/xT41Myec/e1GYxJRJL0yitwySWwdGkoVbHPPkoI1Ug6YwoNzWyEmX1mZrPzbrFHJiKZtXQp9OwJJ58MDRrAhx+qgF01lE5SeBJ4AjDCWUcvAM/HGJOIZNqGDXDIITBqFNx2G0ycCDk5SUclCUgnKWzr7mMB3P1Ld/8LOiVVpGr46qtQt6hGDbjvPvj0U7jxRth666Qjk4SkkxTWmdlWwJdmdrGZ9QDqxhyXiMRp40Z4+OHQPTR4cFh2wglhUFmqtXTKXPQDtiOUt/gbsCPwuziDEpEYzZ4d5kcePx6OPhqOV8NfNikxKbj7R9Hd1cB5AGama9pFstFjj8Fll0GdOvD449C7ty5Ck80U231kZgeY2clmVj963MbMngY+Ku552WjIEOjaFSZPTjoSkRg1bRpaBjNmwAUXKCHIFopMCmb2d+A5oBfwupndArwDTAFaZiS6DBo2LCSE9u3hnHOSjkakgqxbB3/5S7hBKGD30kuw227JxiWVVnHdRycB+7n7z2b2K2ARsK+7z81MaJnXvj2MG5d0FCIV5H//gwsvhM8/h9/9LhSwU8tASlBc99Fad/8ZwN2/A2ZX5YQgUmWsWQNXXhmuRv7pJ3j99TCWoIQgaSguKexpZi9Ft1FAs5THL6Xz4mbWzcxmmdkcMyu0XpKZnWlmM8xsupkNK8tBiEiKhQvDPMl/+ANMm6bpMaVUius+Oq3A4wdL88JmVgMYBBwDLAY+NrPR7j4jZZsWwA3AIe6+0sx2Kc0+RCSyciW8+CL07RuuNZg7F3bfPemoJAsVVxDvrXK+didgTl6Xk5mNIIxTzEjZ5iJgkLuvjPa5tJz7FKl+Ro2CSy+FZcugSxfYe28lBCmzdK5oLquGhMHpPIujZalaAi3N7H0z+9DMuhX2QmbW18wmmtnEZcuWxRSuSJb55hs44ww49VT49a9hwoSQEETKIZ0rmuPefwugK9AIGG9m+7r7qtSN3H0IMAQgJydHcwCKbNgAhx0GixbB7bfDNdeoXpFUiLSTgpnVdvd1pXjtJUDjlMeNomWpFgMfuft6YF5UkrsF8HEp9iNSfSxeHLqGatSA+++HZs1U3loqVIndR2bWycymAl9Ej/czswfSeO2PgRZm1szMagE9gdEFtnmZ0Eogumq6JaDTXkUK2rgRHnggJICHHw7Ljj9eCUEqXDpjCvcD3YEVAO4+BTiipCe5ey5wGTAWmAm84O7Tzay/mZ0YbTYWWGFmMwhXS1/r7itKfxgiVdjnn8Phh8MVV4RrD7p3TzoiqcLS6T7ayt0X2OYXvmxI58XdfQwwpsCym1PuO3B1dBORgoYODQXstt0WnnoKzjtPF6FJrNJJCovMrBPg0bUHlwOajlMkE/baC3r0gAcfhF13TToaqQbSSQqXELqQmgDfAv+JlolIRVu7Fvr3D/dvvx2OOCLcRDIknaSQ6+49Y49EpLp7//1QwG7WLOjTRwXsJBHpDDR/bGZjzOx8M9M0nCIVbfVquPzycN3BunUwdiw8+qgSgiSixKTg7nsBtwEdgalm9rKZqeUgUlEWLw4DypdfDlOnwrHHJh2RVGNplblw9/+5+xVAB+AHwuQ7IlJWK1Zsut6gVatQwO6++2D77ZONS6q9dC5e297MepnZv4AJwDLg4NgjE6mK3GHkyFDJ9IorwvgBaCY0qTTSGWieBvwLuMvd/xtzPCJV19dfhzkORo2Cjh3hjTdUwE4qnXSSwp7uvjH2SESqsrwCdkuWwF13Qb9+UDPpepQiWyryr9LM7nH3PwL/NLMtKpO6+6mxRpYBQ4bAsGiut8mTwxzNIhVq0SJo2DAUsBs0KBSwa9ky6ahEilTcV5Xno5+lmnEtmwwbtikZtG8P55yTdERSZWzYEJLADTeElsEf/qBpMSUrFDfz2oTobit33ywxmNllQHlnZqsU2reHceOSjkKqlJkzw0VoH3wQKpn26JF0RCJpS+eU1N8VsuzCig5EpEoYMiR805g9G555Bl57DZo0SToqkbQVN6ZwFmEOhGZm9lLKqrrAqsKfJVLNtWgBp5wSJsDZZZekoxEpteLGFCYQ5lBoBAxKWb4a+DTOoESyxs8/wy23hJIUd9yhAnaS9YobU5gHzCNURRWRgsaPD4XrvvgCLr5YBeykSihyTMHM3o1+rjSz71JuK83su8yFKFLJ/PADXHopdOkSzjJ6661QskIJQaqA4rqP8trA9TMRiEjW+OorePJJuPrqMPfBdtslHZFIhSmypZByFXNjoIa7bwA6A78H9F8g1cvy5fDQQ+H+PvvAvHlwzz1KCFLlpHNK6suEqTj3Ap4AWgDDYo1KpLJwh+efDwXsrroqnGoKmhpTqqx0ksJGd18PnAo84O79gIbxhiVSCXz1FZx8MvTsCXvsAZMmqUSFVHlpTcdpZmcA5wEnR8u2ji8kkUpgwwY4/PBQwO7uu+HKK1XATqqFdK9oPoJQOnuumTUDhscbVryGDIGuXUPdI5HNLFgQEkKNGmEMYepU+OMflRCk2khnOs5pwBXARDPbB1jk7n+LPbIYpRbCUxE8AUIiuPfeMAta3oxoxx4LzZsnG5dIhpX49cfMDgOeAZYABvzazM5z9/fjDi5OKoQn+aZNCwXsJkyA7t3DOIJINZVOm3ggcIK7zwAws1aEJJETZ2AiGTF4cJgWc8cdQxOyZ09dhCbVWjpjCrXyEgKAu88EasUXkkgGeDRvVKtWcMYZMGMGnH22EoJUe+m0FD4xs8HAs9HjXqggnmSrn36Cm28OA8l33hlKVXTpknRUIpVGOi2Fi4G5wJ+i21zCVc0i2WXcOGjXLlyJvGbNptaCiOQrtqVgZvsCewGj3P2uzIQkUsG+/x7+9KdwLvJee8Hbb6u8tUgRiquS+mdCiYtewJtmVtgMbCKV39dfw7PPwjXXwGefKSGIFKO47qNeQDt3PwM4ALiktC9uZt3MbJaZzTGz64vZ7jQzczPTGU1SMZYtgwceCPf32Qfmz4cBA2DbbRMNS6SyKy4prHP3HwHcfVkJ227BzGoQZmw7HmgNnG1mrQvZri5wJfBRaV5fpFDu4dTSVq3Clch5BewaNEg2LpEsUdwH/Z5m9lJ0GwXslfL4pWKel6cTMMfd57r7L8AI4KRCtvs/4E5gbamjF0m1aBH06AG9eoUrkT/9VAXsREqpuIHm0wo8frCUr90QWJTyeDFwYOoGZtYBaOzur5nZtUW9kJn1BfoCNGnSpJRhSLWQmxsKWn3zDQwcCJdfHk47FZFSKW6O5rfi3LGZbQXcC/QuaVt3HwIMAcjJydF5hLLJ/PnQuHEoWPfII7DnnuEmImVSqnGCUlpCmLUtT6NoWZ66QFtgnJnNBw4CRmuwWdKSmxtKWrdqtWlGtKOPVkIQKac46wF/DLSISm0vAXoC+TVJ3f17UuZ/NrNxwDXuPjHGmKQq+OyzUMBu4kQ46SQ4rWBPp4iUVdotBTOrXZoXdvdc4DJgLDATeMHdp5tZfzM7sXRhikQeegg6dgzzHjz/PIwaBbvvnnRUIlVGOqWzOwGPATsCTcxsP6CPu19e0nPdfQwwpsCym4vYtms6AZfVkCHhTEXYNJeCZBH3UKyubdtQyXTgQKhfv+TniUippNNSuB/oDqwAcPcphJnYssqwYfDuu+G+JtfJIj/+CP36hTIVEKbIfOYZJQSRmKQzprCVuy+wzUsKb4gpnlh16aKJdbLKW2/BRRfBvHnhFNO81oKIxCadlsKiqAvJzayGmV0FzI45LqnOVq2CPn3C2UQ1a8L48XD//UoIIhmQTlK4BLgaaAJ8Szh1tNR1kETS9u23MGIEXHcdTJkChx2WdEQi1UaJ3UfuvpRwOqlIfPISwZVXwt57h4vSNG4gknHpnH30KLDFVcTu3jeWiKR6cYfnngvJYM0aOOEEaNFCCUEkIel0H/0HeCu6vQ/sAqyLMyipJhYuhN/8Bs47L7QOJk8OCUFEEpNO99HzqY/N7Bngvdgikuohr4Dd0qVhEPnSS1XATqQSKEuZi2bArhUdiFQTc+fCHnuEs4oefTRMj9m0adJRiUikxO4jM1tpZt9Ft1XAm8AN8YcmVUpuLtx5J7RuDYMGhWVHHaWEIFLJFNtSsHDF2n5sqm660d1VulpKZ/LkUMDuk0/glFPgjDOSjkhEilBsSyFKAGPcfUN0U0KQ0nnwQTjgAFiyBEaOhJdegt12SzoqESlCOmcfTTaz/WOPRKqWvO8P7dqF6TFnzFCJa5EsUGT3kZnVjMpf7w98bGZfAj8CRmhEdMhQjJJN1qyBG2+ErbcOk+Acfni4iUhWKG5MYQLQAdDcB5KeN96Avn3D9QcqYCeSlYpLCgbg7l9mKBbJVitXwtVXw5NPhovQxo+HQw9NOioRKYPikkIDM7u6qJXufm8M8Ug2Wro0DCLfcAPcfDPUqZN0RCJSRsUlhRrA9kQtBpHNfPMNDB8eJsDJK2BXr17SUYlIORWXFL529/4Zi0Sygzs8/XRIBj/9BN27h3pFSggiVUJxp6SqhSCbmz8funWD3r3DlckqYCdS5RTXUjgqY1FI5ZebC0ccAcuXhzIVF18MW6VzmYuIZJMik4K7f5fJQKSSmjMHmjULBewefxzf1PeLAAASoklEQVT23DMUtBORKklf9aRw69fD7bdDmzabCtgdcYQSgkgVV5bS2VLVffJJKGA3eXIoXnfWWUlHJCIZopaCbO7++6FTp3DK6UsvwQsvwK6aPkOkulBSkCCvgN3++8NvfxsK2J1ySrIxiUjGqfuoulu9OlyJXLs23HMPHHZYuIlItaSWQnX2+uvQti089FBoKWi6DJFqT0mhOlqxAs4/H44/HrbbDt5/H+69VxVNRURJoVpasQJGjYKbboJPP4XOnZOOSEQqiViTgpl1M7NZZjbHzK4vZP3VZjbDzD4zs7fMTCfBx+Xrr8OkN+7QsiUsWAD9+4exBBGRSGxJwcxqAIOA44HWwNlm1rrAZp8COe7eDhgJ3BVXPNWWe7gSuVWr0DKYMycs33nnZOMSkUopzpZCJ2COu89191+AEcBJqRu4+zvu/lP08EOgUYzxVD/z5sGxx4YL0fbbD6ZMUQE7ESlWnKekNgQWpTxeDBxYzPYXAv8ubIWZ9QX6AjRp0qSi4qvacnPhyCPD+MHDD4dpMlXATkRKUCmuUzCzc4EcoEth6919CDAEICcnR+dNFueLL0LRupo14YknYK+9oHHjpKMSkSwR51fHJUDqp1GjaNlmzOxo4EbgRHdfF2M8Vdv69XDbbeG6gwcfDMu6dlVCEJFSibOl8DHQwsyaEZJBT+Cc1A3MbH/gEaCbuy+NMZaqbeLEMG7w2WfQsyecfXbSEYlIloqtpeDuucBlwFhgJvCCu083s/5mdmK02QDCPNAvmtlkMxsdVzxV1n33wYEHhslvXnklzJu8yy5JRyUiWSrWMQV3HwOMKbDs5pT7R8e5/yrNPVyBnJMTWgl33QU77ZR0VCKS5SrFQLOUwg8/wHXXQZ06MHAgHHJIuImIVACdo5hNxowJM6ENGRLOLlIBOxGpYEoK2WD5cjj3XPjNb2DHHeF//4MBA1TATkQqnJJCNli5Ev71L/jrX8NUmQcWdw2giEjZaUyhslqyBJ57Dq69NpSmWLBAA8kiEju1FCobd3j0UWjdGm65Bb78MixXQhCRDFBSqEy+/BKOOirUKerQIVyM1rx50lGJSDWi7qPKIjc3JITvvoNHHoE+fVTATkQyTkkhabNmhaJ1NWvCU0+F+41UQVxEkqGvokn55Re49VbYd18YNCgs69JFCUFEEqWWQhImTAilKaZNg3POgV69ko5IRARQSyHz/vEP6Nx507UHzz0H9esnHZWICKCkkDl5JSk6dYKLLoLp06F792RjEhEpQN1Hcfv+e/jTn2CbbUIr4eCDw01EpBJSSyFO//pXuAht6FCoXVsF7ESk0lNSiMOyZWEA+cQToV49+PBDuPNOFbATkUpPSSEO338fylzfemuYKvOAA5KOSEQkLRpTqCiLFsGzz8L114fSFAsWhDLXIiJZRC2F8tq4EQYPDpPf3HbbpgJ2SggikoWUFMrjiy/gyCPhkkvCqaZTp6qAnYhkNXUflVVuLhxzDKxaBY89BhdcoIFkEcl6SgqlNXNmmPSmZk145plQwG733ZOOSqRSWr9+PYsXL2bt2rVJh1Jt1KlTh0aNGrH11luX6flKCulatw5uvz3cBgyAq66Cww5LOiqRSm3x4sXUrVuXpk2bYmpJx87dWbFiBYsXL6ZZs2Zleg2NKaTjww/DpDf9+8PZZ8N55yUdkUhWWLt2LfXq1VNCyBAzo169euVqmSkplOSee0JZitWrw7UHTz8dLkgTkbQoIWRWed9vJYWibNwYfnbuDBdfHMpcH398sjGJiMRMSaGgVavCXAdXXhkeH3wwPPQQ7LBDsnGJSJm9/PLLmBmff/55/rJx48bRvUCl4t69ezNy5EggDJJff/31tGjRgg4dOtC5c2f+/e9/lzuWv//97zRv3py9996bsWPHFrrN22+/TYcOHWjbti3nn38+ubm5m8Xdvn172rRpQ5cuXcodT0FKCqlefjkUsHvqKahbVwXsRKqI4cOHc+ihhzJ8+PC0n3PTTTfx9ddfM23aND755BNefvllVq9eXa44ZsyYwYgRI5g+fTqvv/46l156KRs2bNhsm40bN3L++eczYsQIpk2bxh577MFTTz0FwKpVq7j00ksZPXo006dP58UXXyxXPIXR2UcAS5fCZZfBiy9C+/bw6qthYFlEKsxVV8HkyRX7mu3bh4r0xVmzZg3vvfce77zzDj169ODWW28t8XV/+uknHn30UebNm0ft2rUB2HXXXTnzzDPLFe8rr7xCz549qV27Ns2aNaN58+ZMmDCBzp0752+zYsUKatWqRcuWLQE45phj+Pvf/86FF17IsGHDOPXUU2nSpAkAu+yyS7niKYxaCgA//ABvvgl/+1uYKlMJQaTKeOWVV+jWrRstW7akXr16TJo0qcTnzJkzhyZNmrBDGt3G/fr1o3379lvc7rjjji22XbJkCY0bN85/3KhRI5YsWbLZNvXr1yc3N5eJEycCMHLkSBYtWgTA7NmzWblyJV27dqVjx448/fTTJcZXWtW3pbBwYbj47M9/DqUpFi4MXUYiEouSvtHHZfjw4VwZjRH27NmT4cOH07FjxyLP0int2TsDBw4sd4wF9z9ixAj69evHunXrOPbYY6lRowYAubm5TJo0ibfeeouff/6Zzp07c9BBB+W3KipCrEnBzLoB9wE1gKHufkeB9bWBp4GOwArgLHefH2dM+QXsrrsu3D/rrJAUlBBEqpzvvvuOt99+m6lTp2JmbNiwATNjwIAB1KtXj5UrV26xff369WnevDkLFy7khx9+KLG10K9fP955550tlvfs2ZPrr79+s2UNGzbM/9YP4eK+hg0bbvHczp0789///heAN954g9mzZwOhZVGvXj222247tttuOw4//HCmTJlSoUkBd4/lRkgEXwJ7ArWAKUDrAttcCgyO7vcEni/pdTt27Ohl0aWL+7kHfO5+2GHu4H7MMe7z5pXptUQkPTNmzEh0/4888oj37dt3s2WHH364v/vuu7527Vpv2rRpfozz58/3Jk2a+KpVq9zd/dprr/XevXv7unXr3N196dKl/sILL5QrnmnTpnm7du187dq1PnfuXG/WrJnn5uZusd23337r7u5r1671I4880t966y13D+/nkUce6evXr/cff/zR27Rp41OnTt3i+YW978BET+OzO84xhU7AHHef6+6/ACOAkwpscxLwVHR/JHCUxXSlSw3PZcBnx4VKpk88AWPHQtOmcexKRCqJ4cOHc8opp2y27LTTTmP48OHUrl2bZ599lgsuuID27dtz+umnM3ToUHaMyt7fdtttNGjQgNatW9O2bVu6d++e1hhDcdq0acOZZ55J69at6datG4MGDcrvGjrhhBP46quvABgwYACtWrWiXbt29OjRgyOPPBKAVq1a0a1bN9q1a0enTp3o06cPbdu2LVdMBZnHdNqlmZ0OdHP3PtHj84AD3f2ylG2mRdssjh5/GW2zvMBr9QX6AjRp0qTjggULSh3PVVdBsyXvceX9e8Fuu5X1sESkFGbOnEmrVq2SDqPaKex9N7NJ7p5T0nOzYqDZ3YcAQwBycnLKlMXCINehFRiViEjVE2f30RKgccrjRtGyQrcxs5rAjoQBZxERSUCcSeFjoIWZNTOzWoSB5NEFthkNnB/dPx142+PqzxKRROhfOrPK+37HlhTcPRe4DBgLzARecPfpZtbfzE6MNnsMqGdmc4CrgesLfzURyUZ16tRhxYoVSgwZ4tF8CnXq1Cnza8Q20ByXnJwcz7vST0QqN828lnlFzbxWpQaaRSQ7bb311mWeAUySodpHIiKST0lBRETyKSmIiEi+rBtoNrNlQOkvaQ7qA8tL3Kpq0TFXDzrm6qE8x7yHuzcoaaOsSwrlYWYT0xl9r0p0zNWDjrl6yMQxq/tIRETyKSmIiEi+6pYUhiQdQAJ0zNWDjrl6iP2Yq9WYgoiIFK+6tRRERKQYSgoiIpKvSiYFM+tmZrPMbI6ZbVF51cxqm9nz0fqPzKxp5qOsWGkc89VmNsPMPjOzt8xsjyTirEglHXPKdqeZmZtZ1p++mM4xm9mZ0e96upkNy3SMFS2Nv+0mZvaOmX0a/X2fkEScFcXMHjezpdHMlIWtNzO7P3o/PjOzDhUaQDoTOWfTDagBfAnsCdQCpgCtC2xzKTA4ut8TeD7puDNwzEcA20b3L6kOxxxtVxcYD3wI5CQddwZ+zy2AT4Gdo8e7JB13Bo55CHBJdL81MD/puMt5zIcDHYBpRaw/Afg3YMBBwEcVuf+q2FLoBMxx97nu/gswAjipwDYnAU9F90cCR5mZZTDGilbiMbv7O+7+U/TwQ8JMeNksnd8zwP8BdwJVoXZzOsd8ETDI3VcCuPvSDMdY0dI5Zgd2iO7vCHyVwfgqnLuPB74rZpOTgKc9+BDYycwqbOL5qpgUGgKLUh4vjpYVuo2HyYC+B+plJLp4pHPMqS4kfNPIZiUec9Ssbuzur2UysBil83tuCbQ0s/fN7EMz65ax6OKRzjHfApxrZouBMcDlmQktMaX9fy8VzadQzZjZuUAO0CXpWOJkZlsB9wK9Ew4l02oSupC6ElqD481sX3dflWhU8TobeNLd7zGzzsAzZtbW3TcmHVg2qoothSVA45THjaJlhW5jZjUJTc4VGYkuHukcM2Z2NHAjcKK7r8tQbHEp6ZjrAm2BcWY2n9D3OjrLB5vT+T0vBka7+3p3nwfMJiSJbJXOMV8IvADg7h8AdQiF46qqtP7fy6oqJoWPgRZm1szMahEGkkcX2GY0cH50/3TgbY9GcLJUicdsZvsDjxASQrb3M0MJx+zu37t7fXdv6u5NCeMoJ7p7Ns/lms7f9suEVgJmVp/QnTQ3k0FWsHSOeSFwFICZtSIkhWUZjTKzRgO/jc5COgj43t2/rqgXr3LdR+6ea2aXAWMJZy487u7Tzaw/MNHdRwOPEZqYcwgDOj2Ti7j80jzmAcD2wIvRmPpCdz8xsaDLKc1jrlLSPOaxwLFmNgPYAFzr7lnbCk7zmP8IPGpm/QiDzr2z+UuemQ0nJPb60TjJX4GtAdx9MGHc5ARgDvATcEGF7j+L3zsREalgVbH7SEREykhJQURE8ikpiIhIPiUFERHJp6QgIiL5lBSk0jGzDWY2OeXWtJhtmxZVTbKU+xwXVeKcEpWI2LsMr3Gxmf02ut/bzHZPWTfUzFpXcJwfm1n7NJ5zlZltW959S/WgpCCV0c/u3j7lNj9D++3l7vsRiiUOKO2T3X2wuz8dPewN7J6yro+7z6iQKDfF+RDpxXkVoKQgaVFSkKwQtQj+a2afRLeDC9mmjZlNiFoXn5lZi2j5uSnLHzGzGiXsbjzQPHruUVGd/qlRnfva0fI7bNP8FHdHy24xs2vM7HRCfannon1uE33Dz4laE/kf5FGL4sEyxvkBKYXQzOxhM5toYR6FW6NlVxCS0ztm9k607Fgz+yB6H180s+1L2I9UI0oKUhltk9J1NCpathQ4xt07AGcB9xfyvIuB+9y9PeFDeXFU9uAs4JBo+QagVwn77wFMNbM6wJPAWe6+L6ECwCVmVg84BWjj7u2A21Kf7O4jgYmEb/Tt3f3nlNX/jJ6b5yxgRBnj7EYoa5HnRnfPAdoBXcysnbvfTyglfYS7HxGVvvgLcHT0Xk4Eri5hP1KNVLkyF1Il/Bx9MKbaGngw6kPfQKjpU9AHwI1m1gh4yd2/MLOjgI7Ax1F5j20ICaYwz5nZz8B8QvnlvYF57j47Wv8U8AfgQcL8DI+Z2avAq+kemLsvM7O5Uc2aL4B9gPej1y1NnLUIZUtS36czzawv4f96N8KEM58VeO5B0fL3o/3UIrxvIoCSgmSPfsC3wH6EFu4Wk+a4+zAz+wj4DTDGzH5PmJ3qKXe/IY199EotmGdmvypso6geTydCEbbTgcuAI0txLCOAM4HPgVHu7hY+odOOE5hEGE94ADjVzJoB1wAHuPtKM3uSUBiuIAPedPezSxGvVCPqPpJssSPwdVQj/zxCcbTNmNmewNyoy+QVQjfKW8DpZrZLtM2vLP35qWcBTc2sefT4PODdqA9+R3cfQ0hW+xXy3NWE8t2FGUWYPetsQoKgtHFGBd9uAg4ys30IM4/9CHxvZrsCxxcRy4fAIXnHZGbbmVlhrS6pppQUJFs8BJxvZlMIXS4/FrLNmcA0M5tMmEvh6eiMn78Ab5jZZ8CbhK6VErn7WkIFyhfNbCqwERhM+IB9NXq99yi8T/5JYHDeQHOB110JzAT2cPcJ0bJSxxmNVdxDqIQ6hTA38+fAMEKXVJ4hwOtm9o67LyOcGTU82s8HhPdTBFCVVBERSaGWgoiI5FNSEBGRfEoKIiKST0lBRETyKSmIiEg+JQUREcmnpCAiIvn+HwTHIEI2TjLcAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Sample predictions: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexstructureChainIdalphabetacoilfoldTypefeaturesindexedLabelpredictions
040402F49.C0.0000000.3684210.631579beta[0.37820821383903763, -0.18090368550399255, 1....1alpha
181813R4Z.A0.0474860.4301680.522346beta[0.0735305376855242, 0.036073933585219826, 0.3...1beta
2105651QJP.A0.0000000.7810220.218978beta[-0.026677575515692723, 0.19532147567061817, 0...1beta
3113103ODT.B0.0101350.5777030.412162beta[-0.031560742926115215, 0.23036281214668772, 0...1beta
426862NW8.B0.6704550.0000000.329545alpha[0.12461797985568887, -0.04350474204440586, 0....0alpha
5120973Q7M.A0.0000000.4854650.514535beta[-0.000829671137034893, 0.272390945494175, 0.4...1beta
650484D0Q.A0.0186340.6211180.360248beta[0.16090462882337825, -0.06614755367904546, 0....1beta
713544AXO.A0.0476190.5476190.404762beta[0.062057863970597586, -0.06096343169609706, 0...1beta
820932OJ5.C0.0493830.5925930.358025beta[-0.004402128235641413, 0.24601144136906397, 0...1beta
95213ZN3.A0.7357720.0000000.264228alpha[0.005519157673778205, -0.09471539311624806, 0...0alpha
\n", "
" ], "text/plain": [ " index structureChainId alpha beta coil foldType \\\n", "0 4040 2F49.C 0.000000 0.368421 0.631579 beta \n", "1 8181 3R4Z.A 0.047486 0.430168 0.522346 beta \n", "2 10565 1QJP.A 0.000000 0.781022 0.218978 beta \n", "3 11310 3ODT.B 0.010135 0.577703 0.412162 beta \n", "4 2686 2NW8.B 0.670455 0.000000 0.329545 alpha \n", "5 12097 3Q7M.A 0.000000 0.485465 0.514535 beta \n", "6 5048 4D0Q.A 0.018634 0.621118 0.360248 beta \n", "7 1354 4AXO.A 0.047619 0.547619 0.404762 beta \n", "8 2093 2OJ5.C 0.049383 0.592593 0.358025 beta \n", "9 521 3ZN3.A 0.735772 0.000000 0.264228 alpha \n", "\n", " features indexedLabel predictions \n", "0 [0.37820821383903763, -0.18090368550399255, 1.... 1 alpha \n", "1 [0.0735305376855242, 0.036073933585219826, 0.3... 1 beta \n", "2 [-0.026677575515692723, 0.19532147567061817, 0... 1 beta \n", "3 [-0.031560742926115215, 0.23036281214668772, 0... 1 beta \n", "4 [0.12461797985568887, -0.04350474204440586, 0.... 0 alpha \n", "5 [-0.000829671137034893, 0.272390945494175, 0.4... 1 beta \n", "6 [0.16090462882337825, -0.06614755367904546, 0.... 1 beta \n", "7 [0.062057863970597586, -0.06096343169609706, 0... 1 beta \n", "8 [-0.004402128235641413, 0.24601144136906397, 0... 1 beta \n", "9 [0.005519157673778205, -0.09471539311624806, 0... 0 alpha " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.linear_model import LogisticRegression\n", "\n", "clf = LogisticRegression()\n", "mcc = mltoolkit.MultiClassClassifier(clf, 'foldType', testFraction=0.1)\n", "matrics = mcc.fit(df)\n", "for k,v in matrics.items(): print(f\"{k}\\t{v}\")\n", " \n", "# Plot ROC \n", "mltoolkit.plot_roc(mcc.TPR, mcc.FPR, mcc.AUC)\n", " \n", "print(\"Sample predictions: \")\n", "mcc.prediction.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Simple Multilayer Perception Classifier" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Class\tTrain\tTest\n", "\n", "alpha\t1121\t132\n", "\n", "beta\t1134\t119\n", "\n", "Total time taken: 1.9564759731292725\n", "\n", "Methods\tMLPClassifier\n", "AUC\t0.9354469060351412\n", "F Score\t0.8559670781893004\n", "Accuracy\t0.8605577689243028\n", "Precision\t0.8387096774193549\n", "Recall\t0.8739495798319328\n", "False Positive Rate\t0.16129032258064516\n", "True Positive Rate\t0.8818897637795275\n", "\t\n", "Confusion Matrix\n", "['alpha' 'beta']\n", "[[112 20]\n", " [ 15 104]]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/miniconda3/envs/mmtf-workshop-2018/lib/python3.6/site-packages/sklearn/neural_network/multilayer_perceptron.py:562: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.\n", " % self.max_iter, ConvergenceWarning)\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3XnclXP+x/HXR6ks2SqGFkVFSVK3JVuIhLKMrcZSBlkmlGEwlsEYg4ZsWSqUrdCIhkb8bM0YVEgqW9oTLYpC6b77/P74Xvfd6e5ezr2cc93nnPfz8TiPzrmu65zrc537dD7n+/1e1+dr7o6IiAjAZnEHICIiNYeSgoiIFFFSEBGRIkoKIiJSRElBRESKKCmIiEgRJQVJmpmdaWavxR1HTWJmq81stxj229zM3Mxqp3vfqWBmM8zs8Eo8T5/JaqakkKHMbK6Z/RJ9KX1rZiPMbOtU7tPdn3b3bqncRyIzO8jM3jSzVWb2g5n9y8zapmv/JcTztpmdn7jM3bd299kp2l9rM3vezJZFxz/NzK4ws1qp2F9lRcmpZVVew933cve3y9nPJokw3Z/JXKCkkNl6uvvWQAdgX+DamOOplJJ+7ZpZZ+A14CVgF6AF8Anwbip+mde0X9xmtjvwAbAA2NvdtwVOA/KA+tW8r9iOvaa97wK4u24ZeAPmAkclPL4TeCXhcV3gH8B84DvgYWCLhPUnAlOBH4Gvge7R8m2BR4HFwCLgVqBWtK4v8N/o/kPAP4rF9BJwRXR/F+CfwFJgDnBZwnY3AWOAp6L9n1/C8f0HeLCE5f8GnojuHw4sBP4MLIvekzOTeQ8Snns18C3wJLA98HIU84rofpNo+78BBcAaYDXwQLTcgZbR/RHAEOAVYBXhS333hHi6AV8APwAPAu+UdOzRtk8l/j1LWN882nef6PiWAdclrN8feA9YGf0tHwDqJKx34A/AV8CcaNm9hCT0I/AhcGjC9rWi9/nr6Ng+BJoCE6PX+il6X86Itu9B+HytBP4HtC/22b0amAasBWqT8HmOYp8SxfEdcHe0fH60r9XRrTMJn8lom72A14Hvo+f+Oe7/q5l2iz0A3Sr5h9v4P1ET4FPg3oT1g4FxwA6EX5b/Av4erds/+mI6mtBabAzsGa0bCzwCbAXsCEwCLozWFf0HBA6LvkAserw98AshGWwWfWncCNQBdgNmA8dE294ErANOirbdotixbUn4Aj6ihOM+F1gc3T8cyAfuJiSALtGX0x5JvAeFz70jeu4WQAPglGj/9YHngRcT9v02xb7E2TQpLI/e39rA08DoaF3D6Evut9G6y6P3oLSk8C1wbhl//+bRvodFse9D+IJtE63vBBwY7as58BkwoFjcr0fvTWGiPCt6D2oDf4xiqBetu4rwGdsDsGh/DYq/B9HjfYElwAGEZNKH8Hmtm/DZnUpIKlskLCv8PL8HnB3d3xo4sNgx107YV182fCbrExLgH4F60eMD4v6/mmm32APQrZJ/uPCfaDXhV5sDbwDbReuM8OWY+Cu1Mxt+ET4CDC7hNXeKvlgSWxS9gbei+4n/AY3wy+2w6PEFwJvR/QOA+cVe+1rg8ej+TcDEMo6tSXRMe5awrjuwLrp/OOGLfauE9c8BNyTxHhwO/Fr4pVdKHB2AFQmP36b8pDA8Yd1xwOfR/XOA9xLWGSGplpYU1hG13kpZX/gF2SRh2SSgVynbDwDGFov7yHI+YyuAfaL7XwAnlrJd8aTwEPDXYtt8AXRJ+Oz+voTPc2FSmAjcDDQs5ZhLSwq9gY9T+f8uF27qz8tsJ7n7/5lZF+AZwq/RlUAjwq/dD82scFsj/GqD8AttfAmvtyuwObA44XmbEb68NuLubmajCf8RJwK/I3R5FL7OLma2MuEptQhdQoU2ec0EK4D1wM7A58XW7UzoKina1t1/Sng8j9BaKe89AFjq7muKVpptSWhddCe0fADqm1ktdy8oI95E3ybc/5nwS5copqJjjt6/hWW8znLCsVZqf2bWmtCCyiO8D7UJrbdEG/0NzOxK4LwoVge2IXymIHxmvk4iHgh//z5mdmnCsjrR65a472LOA24BPjezOcDN7v5yEvutSIxSCg00ZwF3f4fwK/Uf0aJlhK6cvdx9u+i2rYdBaQj/IXcv4aUWEFoKDROet42771XKrkcBp5rZroTWwT8TXmdOwmts5+713f24xLDLOJ6fCF0Ip5Ww+nRCq6jQ9ma2VcLjZsA3SbwHJcXwR0L3yAHuvg2hiwxCMikz5iQsJrSAwguGTNWk9M35P0JXVmU9REioraJj+TMbjqNQ0fGY2aHAnwjv7/buvh2hi7HwOaV9ZkqyAPhbsb//lu4+qqR9F+fuX7l7b0L35R3AmOhvXN77v4DQVSlVoKSQPe4Bjjazfdx9PaGvebCZ7QhgZo3N7Jho20eBc82sq5ltFq3b090XE874ucvMtonW7R61RDbh7h8TvnyHAxPcvbBlMAlYZWZXm9kWZlbLzNqZ2X4VOJ5rCL82LzOz+ma2vZndSugCurnYtjebWZ3oi60H8HwS70FJ6hMSyUoz2wH4S7H131H5L51XgL3N7KTojJs/AL8pY/u/AAeZ2SAz+00Uf0sze8rMtktif/UJYxirzWxP4OIkts8nDLLXNrMbCS2FQsOBv5pZKwvam1mDaF3x92UYcJGZHRBtu5WZHW9mSZ01ZWZnmVmj6G9Y+JlaH8W2ntL/Bi8DO5vZADOrG31uDkhmn7KBkkKWcPelwBOEwV0IZ3fMAt43sx8Jvzz3iLadRBiwHUz4NfgOockPoe+7DjCT0I0zhrK7MZ4Bjor+LYylgPDl3IFw5lFh4ti2AsfzX+AYwsDsYkK30L7AIe7+VcKm30ZxfkMY2L3I3Qu7nEp9D0pxD2HQdhnwPvBqsfX3ElpGK8zsvmSPJTqeZYSWz52ErqG2hDNs1pay/deEBNgcmGFmPxBaYlMI40jluZLQpbeK8CX9bDnbTyAc75eE93oNG3fx3E0Yr3mNkGweJbxXEMaIRprZSjM73d2nEMaYHiD8bWYR+v6T1Z1wzKsJ73kvd//F3X8mnAX2brSvAxOf5O6rCCdP9CR8Lr4CjqjAfoUNZ46IZJzoCtin3L2sbpgaycw2I5wSe6a7vxV3PCKF1FIQSRMzO8bMtjOzumzo438/5rBENqKkIJI+nQlnxywjdHGc5O6/xBuSyMbUfSQiIkXUUhARkSIZd/Faw4YNvXnz5nGHISKSUT788MNl7t6ovO0yLik0b96cKVOmxB2GiEhGMbN5yWyn7iMRESmipCAiIkWUFEREpIiSgoiIFFFSEBGRIilLCmb2mJktMbPppaw3M7vPzGZFE5J3TFUsIiKSnFS2FEYQqh2W5ligVXTrR6j/LiIiMUrZdQruPtHMmpexyYmECdidUNp4OzPbOarpLyKSsYYOhWeeKX+7ZNUr+Int1i3lNwc25557qu91SxLnmEJjNq7XvjBatgkz62dmU8xsytKlS9MSnIhIZT3zDEydWj2vte+KN3l0SntumfFbzNdXz4uWISOuaHb3ocBQgLy8PFXwk6xU3b8uJT5Tp0KHDvD221V4kZUr4aqrYPhwaNkShg9mcJfU/46Ps6WwiDDRdqEm0TKRnFSdvy4lXh06wO9+V4UXKCiAgw6Cxx6DP/0Jpk2DLiXOilvt4mwpjAP6m9lowqTvP2g8QdKlJv4qr5Zfl5LZli+HHXaAWrXgb3+Dpk0hLy+tIaQsKZjZKOBwoKGZLSRMRL45gLs/DIwHjiPM3/ozYc5gkUqp6Jf8O++Ef9P04yspVf51KZnLHZ5+Gi6/HG6/HS64AE4+OZZQUnn2Ue9y1jvwh1TtX3JLYddLhw7Jbd+lS/gC7tcvtXGJlGvBArjoIhg/Hg48EA4+ONZwMmKgWXJTRX79q+tFMtKoUXDhhWEM4Z57oH//0HUUI5W5kBqrIgOv6nqRjLT99nDAATB9eug6ijkhgFoKkoS4BmX161+yTn4+DB4Mv/4K110H3bvDMceAWdyRFVFSyGHJftnHNSirX/+SVT75BM47Dz78EE4/PQwum9WohABKCjkt2cFZDcqKVMHatXDrreGsoh12gOefh1NOqXHJoJCSQg4orUWg7hmRNPjqK7jjjvDL6u67oUGDuCMqkwaac0BpA7bqnhFJkdWrw3UHAO3aweefw8iRNT4hgFoKOUMtApE0ef310Nc6bx507Aht2sBuu8UdVdLUUshSQ4fC4YeHm+rpiKTBihVhILlbN6hTJ5yh0aZN3FFVmJJClkrsMlI3kUiKFRSEK5FHjoRrrw1nGh16aNxRVYq6j7KYuoxEUmzZsg0F7G67DZo1C11GGUwtBRGRinKHJ56A1q3DfAcAJ52U8QkBlBRERCpm3jw49ljo0yeMGRx2WNwRVSt1H2WgZK5ErkjFUBFJ0lNPwcUXh5bC/ffDJZfAZtn12zq7jiZHJFMoToPLIinQqFEYUJ4xI1Q0zbKEAGopZCwNIoukwbp1cNdd4d8bbgjF67p1q7ElKqpD9qW5LFZ47YGuOxBJg48/DmWtr70WZs4MXUaQ1QkBlBQySmIBO3UNiaTImjXw5z/DfvvBN9/AP/8ZJsPJ8mRQSN1HNVRJg8kqYCeSBrNmwT/+AeecE7qOtt8+7ojSSi2FGqqkwWS1EERSZPVqePLJcL9dO/jiC3jssZxLCKCWQo1T2EJQq0AkTSZMCAXsFiyAvLxw7UGLFnFHFRu1FGoYjRuIpMny5eECtO7dYcst4T//ycgCdtVNLYUaSC0EkRQrLGA3a1aYK/n666FevbijqhGUFEQkdyxdGia6qVUrzIa266669L8YdR+JSPZzh8cfDwXshg0Ly048UQmhBEoKIpLd5s4NVyL//vew995wxBFxR1SjqfsoBmUVtFMhO5Fq9OSToYCdGTz4IFx4YVbWK6pOendiUFZBO511JFKNdtoplLaeMSMkByWEcqmlkEa6BkEkxdatgzvvDGcX3XhjKF7XrVvcUWUUpc000jUIIin00UehXtH114crkgsL2EmFqKWQZmohiFSzX36Bm28O9YoaNYKxY8PUmFIpKW0pmFl3M/vCzGaZ2TUlrG9mZm+Z2cdmNs3MjktlPHEoLHetktciKTJ7Ntx9N/TtG0pcKyFUScqSgpnVAoYAxwJtgd5m1rbYZtcDz7n7vkAv4MFUxROXxEFldRuJVJMff4QRI8L9vfaCr76C4cNzsoBddUtl99H+wCx3nw1gZqOBE4GZCds4sE10f1vgmxTGExt1GYlUo/Hj4aKLYNGiMAlOmzbhymSpFqnsPmoMLEh4vDBalugm4CwzWwiMBy4t6YXMrJ+ZTTGzKUuXLk1FrCJS0y1bBmefDccfD/Xrw7vvqoBdCsR99lFvYIS7NwGOA540s01icveh7p7n7nmNGjVKe5AVkTiGoHEEkWpSWMBu9OhwqulHH8GBB8YdVVZKZffRIqBpwuMm0bJE5wHdAdz9PTOrBzQElqQwrmqXeIXyO++Ef7t0Cf9qHEGkCr77LpxRVKtWOLto112hffu4o8pqqWwpTAZamVkLM6tDGEgeV2yb+UBXADNrA9QDMq5/KHEwuUsXeOSRMIZQeOvXL8bgRDKROzz6KOyxR/jVBdCzpxJCGqSspeDu+WbWH5gA1AIec/cZZnYLMMXdxwF/BIaZ2UDCoHNf98y54kRXKIukwOzZcMEF8Oab4VfWUUfFHVFOSenFa+4+njCAnLjsxoT7M4GDUxlDKukKZZFqNnIkXHJJ6C56+OGQHFSvKK10RXMlqIUgkiK77AJHHgkPPQRNmsQdTU5SUkhSaYPJaiGIVMGvv8Ltt8P69XDTTXD00eEmsVFSSFJiy6AwGWgAWaQKJk8OE99Mnx6uP3AP8x5IrJQUKkBdRSLV4Oefw7UGgwfDzjvDuHHhzCKpETSCIyLpNWcO3H9/GESeMUMJoYZRS0FEUu+HH+CFF+Dcc0MBu1mzoGnT8p8naaeWgoik1iuvhERw/vnw+edhmRJCjaWkICKpsXQpnHkm9OgRSlq/9x7suWfcUUk51H0kItWvoAAOOSSMH9x8M1xzDdSpE3dUkoSkkkJUu6iZu89KcTwiksm+/RZ23DFckXzXXdC8ObRrF3dUUgHldh+Z2fHAp8Dr0eMOZjY21YHVFIWlsFUCW6QM69eHSpCtW4d/IXQbKSFknGTGFG4BDgBWArj7VKBlKoOqSVTfSKQcs2ZB165hNrT99oNjjok7IqmCZLqP1rn7Stv4SsOMqWRaHXTRmkgpHn88FLCrUweGDYPzztNVyRkumaTwmZmdDmxmZi2Ay4D3UxuWiGSEZs1Cy2DIEGhcfLZdyUTJdB/1BzoB64EXgLXA5akMSkRqqLVrQ+G6G6MK+F27wosvKiFkkWSSwjHufrW77xvdrgGOTXVgIlLDfPABdOoUTjGdPz8UsJOsk0xSuL6EZddVdyAiUkP99BNccQV07hzKVbz8MowYobGDLFXqmIKZHQN0Bxqb2d0Jq7YhdCWJSC6YNw8efDCcXXT77bDNNnFHJClU1kDzEmA6sAaYkbB8FXBNKoMSkZitXAljxoR6RW3bhtNONRNaTig1Kbj7x8DHZva0u69JY0wiEqeXXoKLL4YlS0Kpij33VELIIcmMKTQ2s9FmNs3Mviy8pTwyEUmvJUugVy846SRo1Ajef18F7HJQMklhBPA4YISzjp4Dnk1hTCKSbgUFcPDBMHYs3HorTJkCeXlxRyUxSCYpbOnuEwDc/Wt3vx6dkiqSHb75JtQtqlUL7r0XPv4YrrsONt887sgkJskkhbVmthnwtZldZGY9gfopjit2KoQnWW39enjoodA99PDDYdlxx4VBZclpyZS5GAhsRShv8TdgW+D3qQyqJlAhPMlaX34Z5keeOBGOOgqOVcNfNig3Kbj7B9HdVcDZAGaWtde0Dx26cUJQITzJKo8+Cv37Q7168Nhj0LevLkKTjZTZfWRm+5nZSWbWMHq8l5k9AXxQ1vMymVoIktWaNw8tg5kz4dxzlRBkE2Vd0fx34BTgE+B6M3sZuAS4A7goPeHFQy0EyRpr18Jf/xru33prKGDXtWu8MUmNVlb30YnAPu7+i5ntACwA9nb32ekJTUSq5H//C/MbfP45/P73oYCdWgZSjrK6j9a4+y8A7v498KUSgkgGWL0aLr88XI3888/w6qthLEEJQZJQVlLYzcxeiG5jgRYJj19I5sXNrLuZfWFms8ysxHpJZna6mc00sxlm9kxlDkJEEsyfH+ZJ/sMfYPp0TY8pFVJW99EpxR4/UJEXNrNawBDgaGAhMNnMxrn7zIRtWgHXAge7+woz27Ei+xCRyIoV8Pzz0K9fuNZg9mzYZZe4o5IMVFZBvDeq+Nr7A7MKu5zMbDRhnGJmwjYXAEPcfUW0zyVV3KdI7hk7NsyTvHQpdOkCe+yhhCCVlswVzZXVmDA4XWhhtCxRa6C1mb1rZu+bWfeSXsjM+pnZFDObsnTp0hSFK5Jhvv0WTjsNfvtb+M1vYNKkkBBEqiCZK5pTvf9WwOFAE2Cime3t7isTN3L3ocBQgLy8PM0BKFJQAIceCgsWwG23wZVXql6RVIukk4KZ1XX3tRV47UVA04THTaJliRYCH7j7OmBOVJK7FTC5AvsRyR0LF4auoVq14L77oEULlbeWalVu95GZ7W9mnwJfRY/3MbP7k3jtyUArM2thZnWAXsC4Ytu8SGglEF013RrQaa8ixa1fD/ffHxLAQw+FZcceq4Qg1S6ZMYX7gB7AcgB3/wQ4orwnuXs+0B+YAHwGPOfuM8zsFjM7IdpsArDczGYCbwFXufvyih+GSBb7/HM47DC47LJw7UGPHnFHJFksme6jzdx9nm184UtBMi/u7uOB8cWW3Zhw34EroltsCovgwYa6RyI1wvDhoYDdllvCyJFw9tm6CE1SKpmWwgIz2x9wM6tlZgOArJqOs7AIHqgQntQwu+8OPXvCZ5/BOecoIUjKJdNSuJjQhdQM+A74v2hZVlERPKkR1qyBW24J92+7DY44ItxE0iSZpJDv7r1SHolIrnv33VDA7osv4PzzVcBOYpFM99FkMxtvZn3MLOun4RRJu1Wr4NJLw3UHa9fChAkwbJgSgsSi3KTg7rsDtwKdgE/N7EUzU8tBpLosXBgGlC+9FD79FLp1izsiyWFJlblw9/+5+2VAR+BH4OmURiWS7ZYv33C9QZs2oYDdvffC1lvHG5fkvGQuXtvazM40s38Bk4ClwEEpj0wkG7nDmDGhkulll4XxA4Cdd443LpFIMgPN04F/AXe6+39SHI9I9lq8OMxxMHYsdOoEr72mAnZS4ySTFHZz9/Upj0QkmxUWsFu0CO68EwYOhNpx16MU2VSpn0ozu8vd/wj808w2qUzq7r9NaWQi2WDBAmjcOBSwGzIkFLBr3TruqERKVdZPlWejfys045qIEFoGQ4bAtdeGlsEf/qBpMSUjlDXz2qTobht33ygxmFl/oKozs4lkp88+CxehvfdeqGTas2fcEYkkLZlTUn9fwrLzqjuQOAwdCocfvqHukUiVDR0aaqZ8+SU8+SS88go0axZ3VCJJK2tM4QzCHAgtzOyFhFX1gZUlPyuzFBbCUxE8qTatWsHJJ4cJcHbcMe5oRCqsrDGFSYQ5FJoAQxKWrwI+TmVQ6aRCeFIlv/wCN90USlLcfrsK2EnGK2tMYQ4wh1AVVUSKmzgxFK776iu46CIVsJOsUOqYgpm9E/27wsy+T7itMLPv0xeiSA3z449wySXQpUs4y+iNN0LJCiUEyQJldR8VtoEbpiMQkYzxzTcwYgRccUWY+2CrreKOSKTalNpSSLiKuSlQy90LgM7AhYD+F0huWbYMHnww3N9zT5gzB+66SwlBsk4yp6S+SJiKc3fgcaAV8ExKoxKpKdzh2WdDAbsBA8KppgA77RRvXCIpkkxSWO/u64DfAve7+0CgcWrDEqkBvvkGTjoJevWCXXeFDz9UiQrJeklNx2lmpwFnAydFyzZPXUgiNUBBARx2WChg949/wOWXq4Cd5IRkPuW/By4hlM6ebWYtgFGpDUskJvPmQZMmoYDdgw/CbrtBy5ZxRyWSNslMxzkduAyYYmZ7Agvc/W8pj0wknQoK4O67wyxohTOideumhCA5p9yWgpkdCjwJLAIM+I2Zne3u76Y6OJG0mD49FLCbNAl69AjjCCI5Kpnuo8HAce4+E8DM2hCSRF4qAxNJi4cfDtNibrttKIbVq5cuQpOclszZR3UKEwKAu38G1EldSCJp4NG8UW3awGmnwcyZ0Lu3EoLkvGRaCh+Z2cPAU9HjM8migniSY37+GW68MQwk33FHKFXRpUvcUYnUGMm0FC4CZgN/im6zCVc1i2SWt9+G9u3DlcirV29oLYhIkTJbCma2N7A7MNbd70xPSCLV7Icf4E9/ChPg7L47vPmmyluLlKKsKql/JpS4OBN43cxKmoFNpOZbvBieegquvBKmTVNCEClDWd1HZwLt3f00YD/g4oq+uJl1N7MvzGyWmV1TxnanmJmbmc5okuqxdCncf3+4v+eeMHcuDBoEW24Za1giNV1ZSWGtu/8E4O5Ly9l2E2ZWizBj27FAW6C3mbUtYbv6wOXABxV5fZESuYdTS9u0gT/+cUMBu0aN4o1LJEOU9UW/m5m9EN3GArsnPH6hjOcV2h+Y5e6z3f1XYDRwYgnb/RW4A1hT4ehFEi1YAD17wplnhiuRP/5YBexEKqisgeZTij1+oIKv3RhYkPB4IXBA4gZm1hFo6u6vmNlVpb2QmfUD+gE0a9asgmFITsjPh8MPh2+/hcGD4dJLw2mnIlIhZc3R/EYqd2xmmwF3A33L29bdhwJDAfLy8nQeoWwwdy40bRoqmD7ySChgt9tucUclkrEqNE5QQYsIs7YVahItK1QfaAe8bWZzgQOBcRpslqTk54eS1m3abJgR7aijlBBEqiiVBeInA62iUtuLgF7A7wpXuvsPJMz/bGZvA1e6+5QUxiTZYNq0UMBuyhQ48UQ4pXhPp4hUVtItBTOrW5EXdvd8oD8wAfgMeM7dZ5jZLWZ2QsXCFIk8+CB06hTmPXj2WRg7FnbZJe6oRLJGMqWz9wceBbYFmpnZPsD57n5pec919/HA+GLLbixl28OTCVhylHsoVteuXahkOngwNGxY/vNEpEKS6T66D+hBuLoZd//EzHRJqKTHTz/B9deHgeRBg8IUmYcdFndUIlkrme6jzdx9XrFlBakIRmQjb7wBe+8N99wDa9eqgJ1IGiSTFBZEXUhuZrXMbADwZYrjkly2ciWcf344m6h2bZg4Ee67T3MdiKRBMknhYuAKoBnwHeHU0QrXQRJJ2nffwejRcPXV8MkncOihcUckkjPKHVNw9yWE00lFUqcwEVx+OeyxR7goTQPJImmXzNlHw4BNOnPdvV9KIpLc4g5PPx2SwerVcNxx0KqVEoJITJLpPvo/4I3o9i6wI7A2lUFJjpg/H44/Hs4+O7QOpk4NCUFEYpNM99GziY/N7EngvymLSHJDYQG7JUvCIPIll6iAnUgNUJkyFy2Anao7EMkRs2fDrruGs4qGDQvTYzZvHndUIhIpt/vIzFaY2ffRbSXwOnBt6kOTrJKfD3fcAW3bwpAhYVnXrkoIIjVMmS0FMzNgHzZUN13vriuIpIKmTg0F7D76CE4+GU47Le6IRKQUZbYUogQw3t0LopsSglTMAw/AfvvBokUwZgy88ALsvHPcUYlIKZI5+2iqme2b8kgkuxT+fmjfPkyPOXOmSlyLZIBSu4/MrHZU/npfYLKZfQ38BBihEdExTTFKJlm9Gq67DjbfPEyCowJ2IhmlrDGFSUBHQHMfSHJeew369QvXH1x66YZy1yKSMcpKCgbg7l+nKRbJVCtWwBVXwIgR4SK0iRPhkEPijkpEKqGspNDIzK4obaW7352CeCQTLVkSBpGvvRZuvBHq1Ys7IhGppLIGmmsBWwP1S7llrKFDw8W0U6fGHUkG+/bbMPsZbChgd9ttSggiGa6slsJid78lbZGk0TPPhITQoQP87ndxR5Nh3OGJJ2DgQPj5Z+jRI9QratAg7shEpBqUO6aQrTp0gLffjjuKDDN3Llx4YRhQPvjPfBVbAAASrUlEQVRgGD5cBexEskxZSaFr2qKQmi8/H444ApYtC2UqLroINkvmMhcRySSlJgV3/z6dgUgNNWsWtGgRCtg99hjstlsoaCciWUk/9aRk69aFgeO99tpQwO6II5QQRLJcZUpnS7b76KNQwG7q1FC87owz4o5IRNJELQXZ2H33wf77h1NOX3gBnnsOdtL0GSK5ImeSQuG1Cbo+oRSFBez23RfOOScUsDv55HhjEpG0y5mkUHhtAuj6hI2sWgX9+8OVV4bHhx4aBpS33z7euEQkFjk1pqBrE4p59dVw3cGCBTBggArYiUjutBQkwfLl0KcPHHssbLUVvPsu3H23EoKIKCnkpOXLYexYuOEG+Phj6Nw57ohEpIZIaVIws+5m9oWZzTKza0pYf4WZzTSzaWb2hpnpJPhUWbw4THrjDq1bw7x5cMstULdu3JGJSA2SsqRgZrWAIcCxQFugt5m1LbbZx0Ceu7cHxgB3piqenOUeBo7btAktg1mzwnINJItICVLZUtgfmOXus939V2A0cGLiBu7+lrv/HD18H2iSwnhyz5w50K1buBBtn33gk09UwE5EypTKs48aAwsSHi8EDihj+/OAf5e0wsz6Af0AmjVrVl3xZbf8fDjyyDB+8NBDYZpMFbATkXLUiFNSzewsIA/oUtJ6dx8KDAXIy8vzNIaWeb76KhStq10bHn8cdt8dmjaNOyoRyRCp/Om4CEj8NmoSLduImR0FXAec4O5rUxhPdlu3Dm69Fdq1gwceCMsOP1wJQUQqJJUthclAKzNrQUgGvYCNriM2s32BR4Du7r4khbFktylTwrjBtGnQqxf07h13RCKSoVLWUnD3fKA/MAH4DHjO3WeY2S1mdkK02SDCPNDPm9lUMxuXqniy1r33wgEHhMlvXnoJRo2CHXeMOyoRyVApHVNw9/HA+GLLbky4f1Qq95/VCktS5OWFVsKdd8J228UdlYhkuBox0CwV8OOPcPXVUK8eDB4c5ko++OC4oxKRLKFzFDPJ+PFhJrShQ8PZRa4TsUSkeikpZIJly+Css+D442HbbeF//4NBg1TATkSqnZJCJlixAv71L/jLX8JUmQeUdQ2giEjlaUyhplq0CJ5+Gq66KpSmmDdPA8kiknJqKdQ07jBsGLRtCzfdBF9/HZYrIYhIGigp1CRffw1du4Y6RR07hovRWraMOyoRySHqPqop8vNDQvj+e3jkETj/fBWwE5G0U1KI2xdfhKJ1tWvDyJHhfhNVEBeReOinaFx+/RVuvhn23huGDAnLunRRQhCRWKmlEIdJk0JpiunT4Xe/gzPPjDsiERFALYX0u+ce6Nx5w7UHTz8NDRvGHZWICKCkkD6FJSn23x8uuABmzIAePeKNSUSkGHUfpdoPP8Cf/gRbbBFaCQcdFG4iIjWQWgqp9K9/hYvQhg+HunVVwE5EajwlhVRYujQMIJ9wAjRoAO+/D3fcoQJ2IlLjKSmkwg8/hDLXN98cpsrcb7+4IxIRSYrGFKrLggXw1FNwzTWhNMW8eaHMtYhIBlFLoarWr4eHHw6T39x664YCdkoIIpKBlBSq4quv4Mgj4eKLw6mmn36qAnYiktHUfVRZ+flw9NGwciU8+iice64GkkUk4ykpVNRnn4VJb2rXhiefDAXsdtkl7qhEaqR169axcOFC1qxZE3coOaNevXo0adKEzTffvFLPV1JI1tq1cNtt4TZoEAwYAIceGndUIjXawoULqV+/Ps2bN8fUkk45d2f58uUsXLiQFi1aVOo1NKaQjPffD5Pe3HIL9O4NZ58dd0QiGWHNmjU0aNBACSFNzIwGDRpUqWWmpFCeu+4KZSlWrQrXHjzxRLggTUSSooSQXlV9v5UUSrN+ffi3c2e46KJQ5vrYY+ONSUQkxZQUilu5Msx1cPnl4fFBB8GDD8I228Qbl4hU2osvvoiZ8fnnnxcte/vtt+lRrFJx3759GTNmDBAGya+55hpatWpFx44d6dy5M//+97+rHMvf//53WrZsyR577MGECRNK3ObNN9+kY8eOtGvXjj59+pCfn7/R+smTJ1O7du2iWKuTkkKiF18MBexGjoT69VXATiRLjBo1ikMOOYRRo0Yl/ZwbbriBxYsXM336dD766CNefPFFVq1aVaU4Zs6cyejRo5kxYwavvvoql1xyCQUFBRtts379evr06cPo0aOZPn06u+66KyNHjixaX1BQwNVXX023bt2qFEtpdPYRwJIl0L8/PP88dOgAL78cBpZFpNoMGABTp1bva3boECrSl2X16tX897//5a233qJnz57cfPPN5b7uzz//zLBhw5gzZw5169YFYKedduL000+vUrwvvfQSvXr1om7durRo0YKWLVsyadIkOnfuXLTN8uXLqVOnDq1btwbg6KOP5u9//zvnnXceAPfffz+nnHIKkydPrlIspVFLAeDHH+H11+FvfwtTZSohiGSNl156ie7du9O6dWsaNGjAhx9+WO5zZs2aRbNmzdgmiW7jgQMH0qFDh01ut99++ybbLlq0iKZNmxY9btKkCYsWLdpom4YNG5Kfn8+UKVMAGDNmDAsWLCh6/tixY7n44ovLjauycrelMH9+uPjsz38OpSnmzw9dRiKSEuX9ok+VUaNGcXk0RtirVy9GjRpFp06dSj1Lp6Jn7wwePLjKMRbf/+jRoxk4cCBr166lW7du1KpVC4ABAwZwxx13sNlmqfs9n9KkYGbdgXuBWsBwd7+92Pq6wBNAJ2A5cIa7z01lTEUF7K6+Otw/44yQFJQQRLLO999/z5tvvsmnn36KmVFQUICZMWjQIBo0aMCKFSs22b5hw4a0bNmS+fPn8+OPP5bbWhg4cCBvvfXWJst79erFNddcs9Gyxo0bF/3qh3BxX+PGjTd5bufOnfnPf/4DwGuvvcaXX34JwJQpU+jVqxcAy5YtY/z48dSuXZuTTjopiXcjSe6ekhshEXwN7AbUAT4B2hbb5hLg4eh+L+DZ8l63U6dOXhlduriftd/n7oce6g7uRx/tPmdOpV5LRJIzc+bMWPf/yCOPeL9+/TZadthhh/k777zja9as8ebNmxfFOHfuXG/WrJmvXLnS3d2vuuoq79u3r69du9bd3ZcsWeLPPfdcleKZPn26t2/f3tesWeOzZ8/2Fi1aeH5+/ibbfffdd+7uvmbNGj/yyCP9jTfe2GSbPn36+PPPP1/ifkp634EpnsR3dyrHFPYHZrn7bHf/FRgNnFhsmxOBwmH1MUBXS9GVLrU8n0HTjgmVTB9/HCZMgObNU7ErEakhRo0axcknn7zRslNOOYVRo0ZRt25dnnrqKc4991w6dOjAqaeeyvDhw9k2Knt/66230qhRI9q2bUu7du3o0aNHUmMMZdlrr704/fTTadu2Ld27d2fIkCFFXUPHHXcc33zzDQCDBg2iTZs2tG/fnp49e3LkkUdWab8VYZ6i0y7N7FSgu7ufHz0+GzjA3fsnbDM92mZh9PjraJtlxV6rH9APoFmzZp3mzZtX4XgGDIAWi/7L5fftDjvvXNnDEpEK+Oyzz2jTpk3cYeSckt53M/vQ3fPKe25GDDS7+1BgKEBeXl6lslgY5DqkGqMSEck+qew+WgQ0TXjcJFpW4jZmVhvYljDgLCIiMUhlUpgMtDKzFmZWhzCQPK7YNuOAPtH9U4E3PVX9WSISC/2XTq+qvt8pSwrung/0ByYAnwHPufsMM7vFzE6INnsUaGBms4ArgGtKfjURyUT16tVj+fLlSgxp4tF8CvXq1av0a6RsoDlV8vLyvPBKPxGp2TTzWvqVNvNaVg00i0hm2nzzzSs9A5jEQ7WPRESkiJKCiIgUUVIQEZEiGTfQbGZLgYpf0hw0BJaVu1V20THnBh1zbqjKMe/q7o3K2yjjkkJVmNmUZEbfs4mOOTfomHNDOo5Z3UciIlJESUFERIrkWlIYGncAMdAx5wYdc25I+THn1JiCiIiULddaCiIiUgYlBRERKZKVScHMupvZF2Y2y8w2qbxqZnXN7Nlo/Qdm1jz9UVavJI75CjObaWbTzOwNM9s1jjirU3nHnLDdKWbmZpbxpy8mc8xmdnr0t55hZs+kO8bqlsRnu5mZvWVmH0ef7+PiiLO6mNljZrYkmpmypPVmZvdF78c0M+tYrQEkM5FzJt2AWsDXwG5AHeAToG2xbS4BHo7u9wKejTvuNBzzEcCW0f2Lc+GYo+3qAxOB94G8uONOw9+5FfAxsH30eMe4407DMQ8FLo7utwXmxh13FY/5MKAjML2U9ccB/wYMOBD4oDr3n40thf2BWe4+291/BUYDJxbb5kRgZHR/DNDVzCyNMVa3co/Z3d9y95+jh+8TZsLLZMn8nQH+CtwBZEPt5mSO+QJgiLuvAHD3JWmOsbolc8wObBPd3xb4Jo3xVTt3nwh8X8YmJwJPePA+sJ2ZVdvE89mYFBoDCxIeL4yWlbiNh8mAfgAapCW61EjmmBOdR/ilkcnKPeaoWd3U3V9JZ2AplMzfuTXQ2szeNbP3zax72qJLjWSO+SbgLDNbCIwHLk1PaLGp6P/3CtF8CjnGzM4C8oAucceSSma2GXA30DfmUNKtNqEL6XBCa3Cime3t7itjjSq1egMj3P0uM+sMPGlm7dx9fdyBZaJsbCksApomPG4SLStxGzOrTWhyLk9LdKmRzDFjZkcB1wEnuPvaNMWWKuUdc32gHfC2mc0l9L2Oy/DB5mT+zguBce6+zt3nAF8SkkSmSuaYzwOeA3D394B6hMJx2Sqp/++VlY1JYTLQysxamFkdwkDyuGLbjAP6RPdPBd70aAQnQ5V7zGa2L/AIISFkej8zlHPM7v6Duzd09+bu3pwwjnKCu2fyXK7JfLZfJLQSMLOGhO6k2ekMspolc8zzga4AZtaGkBSWpjXK9BoHnBOdhXQg8IO7L66uF8+67iN3zzez/sAEwpkLj7n7DDO7BZji7uOARwlNzFmEAZ1e8UVcdUke8yBga+D5aEx9vrufEFvQVZTkMWeVJI95AtDNzGYCBcBV7p6xreAkj/mPwDAzG0gYdO6byT/yzGwUIbE3jMZJ/gJsDuDuDxPGTY4DZgE/A+dW6/4z+L0TEZFqlo3dRyIiUklKCiIiUkRJQUREiigpiIhIESUFEREpoqQgNY6ZFZjZ1IRb8zK2bV5aNckK7vPtqBLnJ1GJiD0q8RoXmdk50f2+ZrZLwrrhZta2muOcbGYdknjOADPbsqr7ltygpCA10S/u3iHhNjdN+z3T3fchFEscVNEnu/vD7v5E9LAvsEvCuvPdfWa1RLkhzgdJLs4BgJKCJEVJQTJC1CL4j5l9FN0OKmGbvcxsUtS6mGZmraLlZyUsf8TMapWzu4lAy+i5XaM6/Z9Gde7rRstvtw3zU/wjWnaTmV1pZqcS6ks9He1zi+gXfl7Umij6Io9aFA9UMs73SCiEZmYPmdkUC/Mo3Bwtu4yQnN4ys7eiZd3M7L3ofXzezLYuZz+SQ5QUpCbaIqHraGy0bAlwtLt3BM4A7ivheRcB97p7B8KX8sKo7MEZwMHR8gLgzHL23xP41MzqASOAM9x9b0IFgIvNrAFwMrCXu7cHbk18sruPAaYQftF3cPdfElb/M3puoTOA0ZWMszuhrEWh69w9D2gPdDGz9u5+H6GU9BHufkRU+uJ64KjovZwCXFHOfiSHZF2ZC8kKv0RfjIk2Bx6I+tALCDV9insPuM7MmgAvuPtXZtYV6ARMjsp7bEFIMCV52sx+AeYSyi/vAcxx9y+j9SOBPwAPEOZneNTMXgZeTvbA3H2pmc2OatZ8BewJvBu9bkXirEMoW5L4Pp1uZv0I/693Jkw4M63Ycw+Mlr8b7acO4X0TAZQUJHMMBL4D9iG0cDeZNMfdnzGzD4DjgfFmdiFhdqqR7n5tEvs4M7FgnpntUNJGUT2e/QlF2E4F+gNHVuBYRgOnA58DY93dLXxDJx0n8CFhPOF+4Ldm1gK4EtjP3VeY2QhCYbjiDHjd3XtXIF7JIeo+kkyxLbA4qpF/NqE42kbMbDdgdtRl8hKhG+UN4FQz2zHaZgdLfn7qL4DmZtYyenw28E7UB7+tu48nJKt9SnjuKkL57pKMJcye1ZuQIKhonFHBtxuAA81sT8LMYz8BP5jZTsCxpcTyPnBw4TGZ2VZmVlKrS3KUkoJkigeBPmb2CaHL5acStjkdmG5mUwlzKTwRnfFzPfCamU0DXid0rZTL3dcQKlA+b2afAuuBhwlfsC9Hr/dfSu6THwE8XDjQXOx1VwCfAbu6+6RoWYXjjMYq7iJUQv2EMDfz58AzhC6pQkOBV83sLXdfSjgzalS0n/cI76cIoCqpIiKSQC0FEREpoqQgIiJFlBRERKSIkoKIiBRRUhARkSJKCiIiUkRJQUREivw/GelB5aoqlVkAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Sample predictions: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexstructureChainIdalphabetacoilfoldTypefeaturesindexedLabelpredictions
0178743ABH.B0.7951390.0069440.197917alpha[0.10236471290385704, -0.1388118245258592, 0.4...0alpha
170093IXS.J0.0000000.7666670.233333beta[0.11298573969138992, 0.3066096729081538, 0.62...1beta
298913WG3.A0.0245400.5276070.447853beta[-0.048722811627312235, 0.1715540477195702, 0....1beta
353383FT1.C0.0000000.5252530.474747beta[0.06991478345460363, 0.1251197224576967, 0.54...1beta
4120364E5X.E0.0000000.4900000.510000beta[0.03959714150940529, 0.03838062713468316, 0.4...1beta
598453VN0.A0.8750000.0000000.125000alpha[0.14008687153458596, -0.1254805113375187, 0.8...0alpha
6134482RBD.A0.7295600.0125790.257862alpha[0.02859262450981666, 0.011427379076314323, 0....0beta
77663MMG.D0.0000000.5000000.500000beta[0.20015033653804232, 0.42783163700784954, 0.6...1beta
838794Q94.A0.8139530.0077520.178295alpha[0.07949779724785641, -0.042684748657603765, 0...0alpha
96933M7O.A0.0000000.5912410.408759beta[0.042166234335358835, 0.04255943773140818, 0....1beta
\n", "
" ], "text/plain": [ " index structureChainId alpha beta coil foldType \\\n", "0 17874 3ABH.B 0.795139 0.006944 0.197917 alpha \n", "1 7009 3IXS.J 0.000000 0.766667 0.233333 beta \n", "2 9891 3WG3.A 0.024540 0.527607 0.447853 beta \n", "3 5338 3FT1.C 0.000000 0.525253 0.474747 beta \n", "4 12036 4E5X.E 0.000000 0.490000 0.510000 beta \n", "5 9845 3VN0.A 0.875000 0.000000 0.125000 alpha \n", "6 13448 2RBD.A 0.729560 0.012579 0.257862 alpha \n", "7 766 3MMG.D 0.000000 0.500000 0.500000 beta \n", "8 3879 4Q94.A 0.813953 0.007752 0.178295 alpha \n", "9 693 3M7O.A 0.000000 0.591241 0.408759 beta \n", "\n", " features indexedLabel predictions \n", "0 [0.10236471290385704, -0.1388118245258592, 0.4... 0 alpha \n", "1 [0.11298573969138992, 0.3066096729081538, 0.62... 1 beta \n", "2 [-0.048722811627312235, 0.1715540477195702, 0.... 1 beta \n", "3 [0.06991478345460363, 0.1251197224576967, 0.54... 1 beta \n", "4 [0.03959714150940529, 0.03838062713468316, 0.4... 1 beta \n", "5 [0.14008687153458596, -0.1254805113375187, 0.8... 0 alpha \n", "6 [0.02859262450981666, 0.011427379076314323, 0.... 0 beta \n", "7 [0.20015033653804232, 0.42783163700784954, 0.6... 1 beta \n", "8 [0.07949779724785641, -0.042684748657603765, 0... 0 alpha \n", "9 [0.042166234335358835, 0.04255943773140818, 0.... 1 beta " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.neural_network import MLPClassifier\n", "\n", "layers = [featureCount, 32, 32, classCount]\n", "clf = MLPClassifier(solver='sgd', \\\n", " alpha=1e-5, \\\n", " hidden_layer_sizes = (32, 32))\n", "mcc = mltoolkit.MultiClassClassifier(clf, 'foldType', testFraction=0.1)\n", "\n", "matrics = mcc.fit(df)\n", "for k,v in matrics.items(): print(f\"{k}\\t{v}\")\n", " \n", "# Plot ROC \n", "mltoolkit.plot_roc(mcc.TPR, mcc.FPR, mcc.AUC)\n", " \n", "print(\"Sample predictions: \")\n", "mcc.prediction.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Terminate Spark" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "spark.stop()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" } }, "nbformat": 4, "nbformat_minor": 4 }