{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Let's imagine we have two distributions." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "plt.style.use('ggplot')\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import scipy.stats" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "distrib_positive = scipy.stats.norm(loc=1, scale=2)\n", "distrib_negative = scipy.stats.norm(loc=-1, scale=2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's draw two sets of samples from these distributions and plot them." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "positives = distrib_positive.rvs(size=100) \n", "negatives = distrib_negative.rvs(size=100)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's build a dataframe and plot them." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame({'values': np.concatenate((positives, negatives)), \n", " 'true_label': [True] * positives.size + [False] * positives.size})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's add a column that makes use of the fact that we can cast boolean values to 0 or 1." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valuestrue_labelnumerical_label
00.863974True1.0
12.066209True1.0
21.477000True1.0
3-2.070876True1.0
45.881839True1.0
\n", "
" ], "text/plain": [ " values true_label numerical_label\n", "0 0.863974 True 1.0\n", "1 2.066209 True 1.0\n", "2 1.477000 True 1.0\n", "3 -2.070876 True 1.0\n", "4 5.881839 True 1.0" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['numerical_label'] = df['true_label'].astype(float)\n", "\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAADxCAYAAAAk56srAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XmcZFV99/HPXWrpqu6erUCcYRWJARE3Aip5HhdEUSOoMUfwZRQ1TlzAPU8292hEfYxBJegECYIKHvXRjEoEo3GBl8LgQhRwmbAMA8rQy0zvtdx7nz9udU11TS91u6p7bvV8369Xvbrq1r3n/Kq66vzqnHvqlBNFESIiIgDuwQ5ARETSQ0lBREQalBRERKRBSUFERBqUFEREpEFJQUREGvyDHYCIyKHIGHMF8CfAHmvtyfPc7wCXAM8FpoALrLU/Xem41FMQETk4rgTOXuT+5wAn1C9bgctWISYlBRGRg8Fa+wNgZJFdzgWustZG1tofA+uNMQ9f6bh6ffhIX8cWkSSczg6/J4Jj29pzampq+IILLrinadM2a+22BJVtAe5rur27vu13CcpIrNeTAg888AClUomhoaGDHcqCFF/n0h5j2uOD9Me40vFt3ry5C6UcS7t5pVCINllrSx1UNl9FK/5BuOeTgojIqora7Gx02Cch7hkc1XT7SOCBjktdgpKCiEgSYZunYr2Oa9oOXGiMuRY4HdhnrV3RoSNQUhARSabdnsISjDHXAE8DSsaY3cC7gQyAtfZTwHXE01F3Ek9JfWVXKl6CkoKISBJdSgrW2vOXuD8C3tCVyhJQUhARSaJLSSGtlBRERJJQUhARkQYlhc6ldY2PdIvw/d/gOGWq1ROpn38CwPN24boPxntFBWq1R5DJ/KZ+/YR5S3OcSXz/14ThJnz/t7juMNXqcUA/8Md43r1kMj8jCB5OGK4nm91BtfoEwMdxpgmCEr5/F44zguftoVb7AyqVp8yJC8B1HyKT+TlR5AFVPG+EavVoMpl7qFROJwgeCcyQzd5EJvMLKpWnUKs9inz+63je3URRniDYQrn8PKKoj2z2Flx3CHgurjuG592N647gunspl/834OB5u6nVjsb37yKT+TWO8yBhuIla7ThyuRtx3SnK5TNw3Qfx/fuZnj6XWu1xTc/nbnK57fj+LiqVx1Gtnobn7QYCguAPCMNBfP/XOM4EUZTFdSeAGVx3lDB8OOXyWcCDFIufIAw3Mj19HpDB93+F49Tq/z8fqJHJ3EkUedRqJzI7ZzGTuYm+vi8ShkdQLj8d399FGBaAPI6zD8cJqdWOpVp9LJ63B9fdQxAcQSbzS3z/53jevURRkXL5WVQqZ83z3w/x/TtxnPV43gyuu48wPAzXfRDHmcDzfl//3xxXf63srb/2pgjDw+qxHjjjxnHG8P3fEgRHEIZbFnktz89xxvH93xCGRxAE7R8fx7eTINhCGK74F3wP1O7sox61Wj2FK4FPAlctcH/zGh+nE6/xcfqqRJZKEevXv5Z8/vtAlWr10YyMXEMUFRkYuJhC4fO47mi8Z5QjijI4TgXIMT39LPbt+2eaJ0l73r1s3PhKPO8uoIbj7P/+SxT1AUdw2GH318toiSTa3+g7TnXOfbXacQwNXU8UFQHI57/KunXvbMR2II+JiVeSz9+A7+9qqsOZE1O87Z31RLS7viXLYYf14br75pQXhkVcd4ooAsepLVAvFItXN673929jcvJljI19iGLxEwwOfhgID9gPIAyLQK7+mOb/3lAYbsJ1R8hk4vvXrXsv5fIZ5HI3AwHV6ikMD1/Bpk2vJpO5DXApl5/C6OgVbNz44vp+sYGBSxd8DEFwGI4T1p+DsBHz/sf1BWq1R/LQQ99v2lpj48aXk83uwHEqHH44RFEIuC3Pl8/4+Ospl5/B+vVvxvfvBSCKfCqVMxgZ+SzNzYXv/4ING16P591HFG1gYmIrk5OvWzD2Vr5/Jxs2/CWet6t+/KuA9y55XCazox7f/QTBJiYmLmJq6oK26+2KNd5TWJWUl9Y1PtIql/tP8vlv47rjuO4MudxPGBj4EK77ewqFL+J5IzhOhONEuO4MnjeO65Zx3TH6+r5JJnPLnPIGB99JJvNrXLeK60Y4Do2L607jOHfjupU52/ffX21cWu/LZO5mYOBDjXoGBj6O543OW058CSgWryCT2dVSRzRPvTNkMrubtlXwvH0HlOd5YzhODdetLVLvgZdi8fM4zgT9/Z/GccIF9/O8yabne/59XHe45f4Z8vnv4LoTuO402ezNbNr0CrLZH+O607juJPn89+jvv4Rc7ua2Y/b9h/C8YRyntmDMvr+TXO5bjf9JofDZRm8pPq6G64bzPF81isXLWbfu3WQy9zS9vqrkcj+gUJibLNetey+ZzF24bhXP20OxeCWOM932a3zduveQyfxP0/FXA2NLHjc4+P56fFV8//f0928DFv4wsCIip71Lj0rLOYW21/gwxmwlXjEQay2lUgnf9ymVOvk2+cpKGl+cDMpztvX1TZDLubjuzBLHTrN+/TRRtL8+3w8XOSJuFJarr2+cbLZEPNxVXXL/1h7BwROxaVPmgOd5OVqfv/luZzKjc7Y7TpVicWW+h7Ru3TBhGP//4yQdtHVcnPgPbGAdJ2RgYC+FwsKvKc+rsmlTFmjvde77c2PyvAquO0WpdMQSx82t13VrlEpFYKCteruihxv8dqQlKbS9xkd9QanZRaWioaGhNbemi+s+hVLp6MYQSxBsYnT0+VQq6ymVjiGb/cWc/ZuHX2q14xgePoUw3F9fsXg6AwM347pT89YXDz9Vm24f2LDNvy3D6Og5VCpxXRs2HE8+f/eiSSYM1+G643MaqoXqiyIX1w0X3K/59nxlLCQuu5+hoQyl0iPIZn+56GNdquyo/kptjiUM1+N5e4F42Gds7JUMDn4cz9sDQK22hZGRP6dUsgcMyy1cz/7/88IxeQwNndn4//v+89i48Vp8f24Cmu/4Wu1IKpXHUSj8as7/p1Z7OMPDzyEI9r+mBgcfS6HwM1w3HnKsVI5ieNgF2nudDww8nmLxJ43jq9UjgcOXfJ+sW3cyhcJ/N5JXtXo0w8NlYOnk3p21j1BSWCUHZY2PtArDIxgZ+QwDAxfjOAGTk+dRqTwdgJGRz7Fu3d/juvfiupMEwTFUKqeQzf43UeQxNvZOwnDjnPImJy8EfHK5HwKTZDJ34TgVwnCAWu1EPO8iguBjZDK/JAwHgSquO0UY9hMEjyAeu47wvPtw3T04TpUw3MS+fe9oxAUwOvpp1q17B7nc94jPXUwSn4iuAllqtc2MjHyVfP5rDA5ejOtOEATrqVSeSD7/XWbPaURRgamp51OtPomBgX+ql3MGMzP9+P4dZDI7iaIMlcpjCMPD8bwhoiiD7+/E93cTRQHgEUV+o2cVRfFIqePESXbPnh8CLsPDX2T9+r8kn78JiIiiLLXasfXjalQqT6RWO4Z8/kY87/56Ap0BpnGcgCgaYHT0g2zY8AVc90aiyGN8/CLK5afXYw+YnHw55fKzCcOjKRavAFzGxt5MrfYYHnroPymVnovrTtb/9wUcxweqRFG2/pxEBMHRTE6+jFxuB46zD5jG93/bSDzxYywyPHw1Ybj/03atdiJ7936S/v5LyWYDqtVpoiiH48QfEHz/t4BLtXo8e/deSRiuIwzXk8v9ENeNJxRMTLyJIPjDOa+p+HVWJJv9KWG4kX37PkCSxX7Gx/+OKCqQze4gDDewb98H2LRp6dHsffveTxgOksn8giA4nLGx97ddZ9es8RPNThStTnfeGHMs8I0FZh89D7iQePbR6cDHrbWntVFspFVSO5f2+CD9MaY9Pkh/jKu0SmqnH/MjRjcuvRfAhpFu1LfqVmtKairX+BARSUzDR51L6xofIiKJKSmIiEiDkoKIiDQoKYiISMMan32kpCAikoR6CiIi0qCkICIiDUoKIiLSoKQgIiINOtEsIiIN6imIiEiDkoKIiDQoKYiISIOSgoiINCgpiIhIg2YfiYhIg3oKIiLSoKQgIiINSgoiItKgpCAiIg1KCiIi0qDZRyIi0qCegoiINCgpiIhIg5KCiIg0dCkpGGPOBi4BPOBya+3FLfcfDXwWWF/f52+stdd1pfJFrO0zJiIi3Ra67V0WYYzxgEuB5wAnAecbY05q2e0dgLXWPh44D/iXFXg0B1BSEBFJInLauyzuNGCntfYua20FuBY4t7UmYLB+fR3wQFcfxwI0fCQikkSC4SNjzK1NN7dZa7fVr28B7mu6bzdwesvh7wFuMMZcBBSBZyaOdRmUFEREkkiQFKy1py5w13yFRC23zweutNZ+1BjzZOBqY8zJ1tqw7QCWQcNHIiJJdGf4aDdwVNPtIzlweOjVgAWw1v4IyAOlLj2KBamnICKSRHdmH+0ATjDGHAfcT3wi+aUt++wCzgSuNMacSJwUHupG5YtZtaSQ1ulXIiKJdGGZC2ttzRhzIXA9cXt3hbX2dmPM+4BbrbXbgbcB/2qMeQvx0NIF1trWIaauW5Wk0DT96izibtMOY8x2a+0dTbvNTr+6rD416zrg2NWIT0SkbV36nkL9Q+91Ldve1XT9DuCMrlSWwGqdU0jt9CsRkUS6c04htVZr+Khr06+MMVuBrQDWWkqlEr7vUyqt+PmXZVN8nUt7jGmPD9IfY9rja+jhBr8dq5UUujb9qj7Pd3aubzQ0NESpVGJoaKj7UXeJ4utc2mNMe3yQ/hhXOr7Nmzd3p6A1nhRWa/gotdOvREQS0fBRV6R2+pWISCJr/Ed2VuXRWWtrwOz0qzvjTfH0K2PMOfXd3ga8xhhzG3ANqzT9SkQkEfUUuiOt069ERBLp4Qa/HfpGs4hIEkoKIiLSoKQgIiINa/xEs5KCiEgS6imIiEiDkoKIiDQoKYiISIOSgoiINCgpiIhIw6E++8gY84x2CrLWfrfzcEREUi6FPYVuttPt9BQ+08Y+EfCINvYTEeltKUwKdLGdXjIpWGuPayciEZFDQgqTQjfb6cTnFIwxGeBJwGZr7ReNMcV6UJPdCkpEJLVSmBRaddJOJzpjYox5DPAb4F/Z3115KnBFoohFRHpVypfO7rSdTnoa/TLgXdbaPwSq9W3fB/44YTkiIr0pdNu7HDwdtdNJI3808Ln69Qga3ZG+hOWIiPSmlPcU6LCdTpoU7gGe2LzBGHMasDNhOSIivSn9SeEeOmink55ofifwTWPMp4CsMeZvgdcCr0lYjohIb0r/ieaO2ulEPQVr7TeA5wCHEY9RHQO8yFp7Q6KQRUR6Vcp7Cp2204mnpFprfwq8PulxIiJrQg8sc9FJO50oKRhjssA7gPOBzcADwLXAB6y1M8sJQESkp6R8+KjTdjppT+Ey4FHAG4F7ibslfwtsAV6VsCwRkd6T8qRAh+100qTwAuB4a+3e+u07jDE3E5/VVlIQkbUv/Umho3Y66eDY74FCy7Y+4HcJyxER6U0pP9FMh+100qWzrwa+ZYz5BLAbOAp4A3BVW6GKiPS6FPYUutlOL3fp7L9ruf2XwIfaqVBEpKelc/ZR19ppLZ0tIpJECnsKB3XpbBGRQ1qXkoIx5mzgEsADLrfWXjzPPgZ4D/EaRrdZa1/alcoXkfR7CoPEAT4VKAGNZ8dae3RXIxMRSaMuJAVjjAdcCpxFPO6/wxiz3Vp7R9M+JxBPJT3DWjtqjDm8zbI7aqeTDo79C/AE4H3ARuAiYBfwsYTliIj0pu7MPjoN2GmtvctaWyH+ctm5Lfu8BrjUWjsKYK3d02aEHbXTSYePngWcaK0dNsYE1tp/N8bcCnx9qQrT2lUSEUkkwYnmevs4a5u1dlv9+hbgvqb7dgOntxz+B/UybiJuN99jrf1WG9Uuu52G5EnBBfbVr08YY9YTz3195GIHrWRXSURkVSUYPrLWnrrAXfMVErXc9oETgKcBRwI/NMac3PSltIUsq51urjSJ24jHqb4D/JC4oZ8g/um3xTS6SgDGmNmu0h1N+yy3qyQisnq6c6J59vsDs44kXqOodZ8fW2urwN3GmF8TJ4kdS5S93HYaSJ4UXsP+DPdG4IPAeuDlSxzXta6SMWYrsBXAWkupVML3fUqlUsKHsnoUX+fSHmPa44P0x5j2+Bq6kxR2ACcYY44D7gfOA1qHy79GvKjdlcaYEnEbeVcbZS+3nQYSJoXZT/r16w8Bf9HmoV3rKtXH5GbH5aKhoSFKpRJDQ0NthrL6FF/n0h5j2uOD9Me40vFt3ry5OwV1ISlYa2vGmAuB64k/BF9hrb3dGPM+4FZr7fb6fc8yxtwBBMBfWWuH2yh7ue000N4yF20tdGetvWKRu1eyqyQisnq69D0Fa+11wHUt297VdD0C3lq/LKpL7TTQXk/hz9vYJwIWq2wlu0oiIqsnnctcdKOdBtpb5uLp7UQ0yxhzhrX2ppYyVqyrJCKyqtK5zEXH7fSslVjm4j+AwdaN3ewqiYgcNClMCsswbzsNK5MU1sQzJiIyr7WRFBZ8ECuRFFpnFYmIrB1rIyks2E5rlVQRkSTWRlJYkJKCiEgS6Zx91DU6pyAiksTa6Cks/5yCMaattGitDet/B9qPS0Skx6QwKXSznW6np1Bj8ZPHTv1+r52gRER6WgqTAl1sp9tJCvqNZhGRWelMCqv3G83W2nu7VZmISM9L4YnmbrbTiU80G2POYf7f/mxrWVYRkZ6Wzp7CHJ2004lSnjHm3cCn68f9GTAMPBtY6peARETWhu78RvOK6bSdTtoPehVwlrX2LUCl/vf5wLEJyxER6U0pTwp02E4nTQrrrbW/rF+vGGMy1tpbiLspIiJrX/qTQkftdNKk8D/GmEfXr/8SeJ0x5s+B0YTliIj0pvQnhY7a6aQnmt8BbKpf/xvgC0A/8IaE5YiI9KYUzj5q0VE7nfQ3mq9run4L8Mgkx4uI9LyUzz7qtJ1OOvvo5caYU1q2PbbeNRERWftSPnzUaTudtB/0D8B9LdvuA96fsBwRkd6U8qRAh+100nMKg8BYy7Z9wPqE5YiI9KaUDx/RYTudtKdwB/CnLdteCNyZsBwRkd6U/p5CR+100p7CXwPXGWNeAvwP8QmMM4HnJixHRKQ3pX/2UUftdKJHZ629ETgZ2AEUgVuAk621NyUpR0SkZ6W8p9BpO514QTxr7S7g4qTHiYisCek/p9BRO93OL69ts9ZurV+/mgV+yEGrpIrIISGFSaGb7XQ7PYW7m67vbCtCEZG1KoVJgS620+38yM4HAYwxHvFc1y9Ya2c6qVREpGel8ERzN9vpth+dtTYA/kkJQUQOaSk+0dyNdjppyvu6Meb5y61MRKTnpTgp1HXUTiedfZQHvmyM+RFxF6VxMkMnmkXkkJDOcwrNOmqnkyaFX9YviRljzgYuATzgcmvtvNOljDEvBr4E/JG19tbl1CUismLSnxSW3U5D8qWz37ucSuonPy4FzgJ2AzuMMduttXe07DcAvBG4eTn1iIisuJQnheW207MSf3nNGHMWcB5wuLX2+caYU4FBa+13FznsNGCntfauehnXAucSr9HR7B+ADwNvTxqXiMiqSOHso1bLbKeBhEnBGHMR8CbgcuDF9c3TwMeBpyxy6BbmLuW6Gzi9pezHA0dZa79hjFkwKRhjtgJbAay1lEolfN+nVColeSirSvF1Lu0xpj0+SH+MaY+voUs9hZUaUu+gnQaS9xTeDJxprb3HGPPX9W2/Ah61xHHzPYuNkx/GGBf4GHDBUgFYa7cB22bLGBoaolQqMTQ0tNShB43i61zaY0x7fJD+GFc6vs2bN3enoC4khRUeUl9uOw0kn5I6wP5P/LONegaoLHHcbuCopttHAg+0lHsy8D1jzD3Ak4Dt9S6PiEh6dGdKamNI3VpbAWaH1FvNDqkn+d7BcttpIHlP4QfEPwT9gaZtbwT+a4njdgAnGGOOA+4nHut66eyd1tp9QKPfaIz5HvB2zT4SkdRJ0FMwxjS3YdvqIx3QxSH1eSy3nQaSJ4WLiL8Y8RpgwBjza+Jf+Fn0ixLW2pox5kLgeuLxsyustbcbY94H3Gqt3Z4wDhGRgyNBUrDWLjTa0bUh9Xksq52elXRK6u+MMX9E3PU5mjjT3WKtDds49jrgupZt71pg36cliUtEZNV0Z/ZRkiF1gCOIh9TPWWoEpZN2Gpb3ewoR8UkPfZdARA493Zl9tKJD6p2000mnpD6WuEvzOKC/vtkBImttNmnlIiI9pwtJYSWH1Dttp5P2FK4BvkJ80mI64bEiIr2vS99TWMEh9Y7a6aRJ4QjgXfWuiYjIoSfly1zQYTud9IzJZ2ka9xIROeSEbnuXg6ejdjppT+Fi4EfGmL8DHmy+w1r7jOUGISLSM9LfU+ionU6aFL5M/FugX0XnFETkUJT+pNBRO500KTwO2FT/WraIyKEn/Umho3Y66cDXD4GTllORiMiakP6f4+yonU7aU7gbuMEY81UOHKuadyqViMiakv6eQkftdNKkUAC+CWSZ+xVtEZFDQ/p/ZKejdjrp2kevTFqBiMiakvKeQqftdNJlLh6xSCB3dRKIiEhPSHlS6LSdTjp8tJN4edfmZ2X2W3NewrJERHpPypMCHbbTSYeP5gymGWOOAN5NfLZbRGTtS3lS6LSd7uiMibX298S/B/rBTsoREekZ6V/mYo6k7XTi31OYx6OIz3aLiKx9Ke8pLKDtdjrpieYf0vSTcUCR+EsS/5CkHBGRnpXypNBpO520p3B5y+1J4DZr7W8TliMi0ptSnhTosJ1OOvB1LZADTgfOBM4B3mmMuSphOSIivSn9y1x01E4n7SlcCTwW+DotX58WETkkpL+ncCUdtNNJk8LZwHHW2r1JKxIRWRNSNLNoAR2100mTwi7ibomIyKEp/T2FjtrppEnhKuDfjTGXcODqe99dbhAiIj0j/Umho3Y6aVK4sP73H1u2R8CC622IiKwZ6U8KHbXTSZe5OC7J/iIia07Kk0Kn7XQ3vtEsInLoSHlS6JSSgohIEumffdQRJQURkSTUUxARkQYlhe4wxpwNXEL8Iw+XW2svbrn/rcBfADXgIeBV1tp7Vys+EZG2rPGksCqDY8YYD7gUeA7xan3nG2NOatntZ8Cp1tpTgC8DH16N2EREEkn/2kcdWa2ewmnAztnfBzXGXAucC9wxu4O19r+a9v8x8LJVik1EpH060dwVW4D7mm7vJl7BbyGvBv5jvjuMMVuBrQDWWkqlEr7vUyqVuhVr1ym+zqU9xrTHB+mPMe3xNfRwL6Adq5UU5nsWo3m2YYx5GXAq8NT57rfWbgO2zZYxNDREqVRiaGioK4GuBMXXubTHmPb4IP0xrnR8mzdv7k5BSgpdsRs4qun2kcADrTsZY54J/D3wVGtteZViExFpn5JCV+wATjDGHAfcD5wHvLR5B2PM44FPA2dba/esUlwiIskoKXTOWlszxlwIXE88JfUKa+3txpj3Abdaa7cDHwH6gS8ZYwB2WWvPWY34RETa1qWkkNZp+qv2PQVr7XXAdS3b3tV0/ZmrFYuIyLJ1YfZR0zT9s4iH13cYY7Zba+9o2m12mv6UMeZ1xNP0X9Jx5UvQN5pFRJLoTk8htdP0lRRERJJIkBSMMbc23dxWnz0JXZym321KCiIiSSRICtbaUxe4q2vT9LtNSUFEJInuDB+ldpq+koKISBLdSQqpnaa/thfxEBHpttBt77IIa22N+LeUrwfujDfF0/SNMbNT8Zun6f/cGLN9JR/WLPUURESS6NL3FNI6TV9JQUQkCX2jWUREGpQURESkQUlBREQa9CM7IiLSoJ6CiIg0KCmIiEiDkoKIiDQoKYiISIOSgoiINGj2kYiINKinICIiDUoKIiLSoKQgIiINSgoiItKgpCAiIg2afSQiIg3qKYiISIOSgoiINCgpiIhIg5KCiIg06ESziIg0qKcgIiINSgrdYYw5G7gE8IDLrbUXt9yfA64CnggMAy+x1t7TlcoD6L+0H/83PjPPnGHmBTNkf5yl8IUCwaaAyddPUry0iDviMv2yaSqnVeJj/qUf/9c+5TPLTL9wGoC+L/eR354n898ZokLE2N+OUX5+eU517v0u6962jsyuDLWH1fCO8eh7ch/TL5pm8O2D9H27j6gvYuqFU/h7fIJSwMSbJuj/dD/efR7T50xTPmt/mc64w8BHBnD2OUy+YpLaE2pz6vNv9em/qh+mIHNnBlyoPKGCEznMPHuG7I1ZnGmH4IiA3I05nMAhCiP83T5O2YHjwP9nn8I1BbI3ZXH2OgTHB4xeMYr7O5cNWzfg3e/hzDg4kUMwEBBtinB/58Zl+RHkIewLcUIHd9gFB6qPqVI+o0zxqiJEUD2+SnBiQPl/l8l/M49/t0+wKSDyIsJjQ8bfPk7hcwUKXyjgPujiTsfd9MiJcByHh/kPo3ZiDafqUD2lyvhfj0MNBj42QOaWDO6EG9fxyABnXxxr5EdMnT9F7sYc/i6f6edN44w4DHxigKg/YtJMkrstR5gLccsuwZaA6WdOU/xckWgwYvzt4+DSeP4rp1TI/yCPd5eHN+wR+RFRJsLb5JF7c47yc+P/W/5LeQY+OUCUjRh79xiVP67gTDv0/99+vIc8ps6bwpl26PtaH8GWgPG3jJO7IUff9X3Ujq8xcdHEge/OAIqXFcn8KkP5GWWmX7T/NZn7Xo7qiVUmXzcJbYxu5L6ZY/ADg0S5iNHLRwmOD5K8ow5tazwpOFEUrXglxhgP+A1wFrAb2AGcb629o2mf1wOnWGtfa4w5D3ihtfYlSxQdPfDAA5RKJYaGhhbcacNrNpC/IY9TcwgGA6afPU3f9/rwHvIACAsh7lT8TgoODxj92CiFawr0fasvPqY/YOL1EzhVh/5P9TcaK4DIjRj96CgzZgYA9wGXw559GN5IXHZEhIND2BcSDoR4ezwcnDn3AQQDAe6EGze6GwLG/n6M6fOnYRpKf1oie1sWgNoRNfZeupfKkyoAZG/Ksv6i9fgPzm1BZsuOvAgnOLC+5utQb3hbXuzB+gBnysGtLH8MtbWe2efMCQ98YwUbAtxR94D9F1I9poqDg3/v/sc+X31hNsSpODg4BLkAt+I2HmvjeZr960REfoRbjR9z5dEVcCH7i/jOhUmaAAAIaElEQVT5n+95atTtR+z9wF7cvS6DHx5sPO9hNmT0slH6P9VPbkcufqwDAU7k4E64RE5E7dga3rCHO+YSeREzz5hh9MrROeWvf+16+v6j/pocCOIEEEDx00W8CY8oEzH9nGn2XrZ33vhm3ye57Tk2Xrhxf3y5kD037iHcHLb1vK+Upd7Hndq8eTPQ5otrYREb5n9+DzC6vhv1rbrVOmNyGrDTWnuXtbYCXAuc27LPucBn69e/DJxpjOn4CXUmHTI/y+DU4qK8MY/C9YVGQgBwpvZX4+3xKH6mSPYn2f3HTHj0Xd9H/tv5OQkBwAkdBj822Lhd/LdiIyEAjQbKnXbnJITm+wDc8f0NlTfqUfhKAYDszVkyv8g09vN/71O8vLi/viuKBySE5rJn3/it9bU2nPM1dO5et6OEMF89wLwJAUiUEAAy92bmJISF6nMr+8v1yt6cx9p4nmb/Rk4jIQBkbs+QuX3/879QQgBwag7Fa4oUvlKY87y7FZf+T/bP+T964x7uhNso09/l447VbwcO2duyOCNNcU47c1+T4x75b+XJ35DHm4hfb07VIXtrFmYWDBGAwX8cnBtf2WXgIwOLHyT7RU57lx61WsNHW4D7mm7vBk5faB9rbc0Ysw/YBMz56GCM2Qpsre9HqVTC931KpdL8NRfB9Vsacnfxf1iuL4eTmbuPn/VhgR62m3Mb9bsbl9mIOkBTpy2TzVAqlXAOd+IBt6YPcdlCtlGf1+fRDfN9wk67VYnZrV/a/BDt5+Z/nWQKmfg1tVCD3fIwnIzDpiM2weznjTJ4mbn/az/rz3nNQLxP6WElyHCA2feJl5tbTkRErpRb+D20ShZ9H6eJZh91xXzv3NZxq3b2wVq7Ddg2e//Q0NCS3c7BZw/Sd00f3qRHcHjA+GvG6b+6H3+XT5SJCNYFeHs9nJpD7Zgaw28bpvjFIn2f78ObiI8Ze8UYVGDwnsE5PYEoEzH8kWGqQ9X4QbzMoXR1icy9mfoDiBuuYFNA8PCAzC8zBwzhRJmI4LAAd8TFnXGpPbzG3tfvpTJUgUfBxqdsbJwLqB1bY/gtwwRDccvjvcVj4882krknM+/wUJSNoEJjaKR12GT2Oh5EwdxttWNqeEMe7uSBb4J2GuSo/u87YPgoE+FUnbkxOBG1o2v49/kL9iRay64+tgrh/qGd1jobQ3f9IVTiT+y19TW8cS8+F0IETvxJffa5CbMhZMCdjIdxyk8ug8f+czELDMdBfE5l5E0jOGMOG960AbdcH5LsDxh+9zAD/zRA/jt5nKpD7bAaDg7eQx5hLqT6+Cre3R7+gz5hIWTqmVOMVcbmfCQaPHuQwucLuBMuwWH112QIg7sG43IGQiafN8n4vvF5n7PZ94n/SZ/S80u4VZeIiGggYs+b97R8/Fp9qzR81Lke7gW0Y7WSwm7gqKbbRwIPLLDPbmOMD6wDRrpR+dh7x5h5xgyZOzKUn1qmdlKNmXNnyH8jT3hEyMxzZ8h/M4+7x2XmT2YIN4eMvWuMmafNkLl9/zEA1ZOq5P8zj/8THwow/lfjBCfs/2gYDUQMfXuI4mVF/F/5VE+sUiwVGXn8CLXH1Oj7Uh+FqwoEGwOmLpjC/61P+PCQmefNkPtODv9un5mzZggeUS/Tg5GrR8h/LY874jJz7gzh4fs/tgbHBwz/v2Hy2/NEbkRuR47Ijyj/cRlvn8fMM2fI7sjiTMUJL/e9HIQQFSIyP83g7nPxnuyx5//sIX99nuzNWbz7PKpPrDLx1gmcCYeB9wzg3+HjDrl44x6VkysExwZkf5KFMkTFiHBDSDQYEXkRmdszOKHD9AunKT+5zOBHBwmjkOppVcJjQspPLpP7dg5vt0dUioiciODIgJkXzJDZkaHwbwW8XR7+bh+mINwY4oc+1f4q5TPLuBMutcfVmHrRFE7k0PeVPjI/z+D+zqXypArBliA+2Z0HZmDmT+L/vb8znmjglB36P9pPeHjI1J9NkbslR3BYgDfsERwdUHlihfz2POH6kJkXzIAD+X/P4w67VE+tkvt+Lj7RfL8XnyTvD8k9LMfwXww3TtgOPXKI/kv7CQoBk2+aJNwSMvqvo/Hr7IH4deZUHPI35KkdU6P87DLebz3y38lT+8Ma5afPnbwALPyaPLFK7gc5qidXqfyvypLvh9pjajz0Xw8x8I8DRIMR+967D4pLHiaz1nhSWK0TzT7xieYzgfuJTzS/1Fp7e9M+bwAe03Si+UXWWrNE0W2daD7YFF/n0h5j2uOD9MfYMyeai1Pt7TlZ6EZ9q25VBsestTXgQuB64M54k73dGPM+Y8w59d0+A2wyxuwE3gr8zWrEJiKSyBo/0bwqPYUVpJ5CF6Q9Pkh/jGmPD9IfY8/0FPqWmN41azrfjfpWnb7RLCKShGYfiYhIQw8PDbVDSUFEJAklBRERaehSUjio68EtYm0PjomIdFsXZh/V14O7FHgOcBJwvjHmpJbdXg2MWmsfCXwM+NAKPJoDKCmIiCQRuu1dFnfQ1oNbSs8PH81+db1rX2FfIYqvc2mPMe3xQfpjTHt8wL1EHNPOjlNTU8MXXHDBrU2bttWX6YEurgfXbb3eU3AAxxgzNHs9jRfFt/ZjTHt8vRDjKsXXqWPbratQKJSstac2XbY1lTNfLMtaD67bej0pzGpzgfODRvF1Lu0xpj0+SH+MaY+vm5KsB0e314NbTM8PH9XtO9gBLEHxdS7tMaY9Pkh/jGmPr5t2ACcYY44jXg/uPOClLftsB14B/Ah4MfBda616Cm3atvQuB5Xi61zaY0x7fJD+GNMeX9ekeT24Xl/7SEREumit9BRERKQLlBRERKRBSUFERBqUFEREpEFJQUREGpQURESkQUlBREQa/j/rb5XPQl1ZXwAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.plot.scatter(x='values', y='numerical_label', c='numerical_label', cmap='spring')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can see that there is a lot of overlap between the two labels." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A simple binary classifier would in this case be a threshold based mechanism. What is above the threshold is classified as positive, what is below is classified as negative. \n", "\n", "Using this procedure and setting the threshold at a value of 0, we can predict the following labels:" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "df['predicted'] = (df['values'] > 0.).astype(float)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valuestrue_labelnumerical_labelpredicted
00.863974True1.01.0
12.066209True1.01.0
21.477000True1.01.0
3-2.070876True1.00.0
45.881839True1.01.0
\n", "
" ], "text/plain": [ " values true_label numerical_label predicted\n", "0 0.863974 True 1.0 1.0\n", "1 2.066209 True 1.0 1.0\n", "2 1.477000 True 1.0 1.0\n", "3 -2.070876 True 1.0 0.0\n", "4 5.881839 True 1.0 1.0" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAADxCAYAAAAk56srAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl8JHWd//FXHX0knWSONAgznAK6IKAoCx77Ww9AwQOU1a/gsooHoyIoHvtbdQVvRf0pi8qiIyKHCn7VnzoKAq6uCv4UBlREQGGWYxhAZnLMZHL1UVW/P6rT0+lJJl2TTqaSeT959IPu6m9965NMpz79PepbThRFiIiIALi7OgAREUkPJQUREalTUhARkTolBRERqVNSEBGROiUFERGp83d1ACIiuyNjzOXAy4GN1trDp3jfAS4GXgqMAmdaa38/13GppSAismtcAZy4g/dPAg6pPVYBl85DTEoKIiK7grX218DADoqcAlxlrY2stb8Dlhpj9p7ruBZ695EuxxaRJJzZ7f5QBAe0VHJ0dLT/zDPPfKhh02pr7eoEB1sJPNLwekNt2+MJ6khsoScFHnvsMYrFIn19fbs6lGkpvtlLe4xpjw/SH+Ncx7dixYo21HIAreaVzs6o11pbnMXBpjrQnH8RXvBJQURkXkUtNjZm2SYhbhns2/B6H+CxWdc6AyUFEZEkwhaHYr1ZH2kNcI4x5lrgWGCLtXZOu45ASUFEJJlWWwozMMZcA7wAKBpjNgAfBjIA1tqvANcTT0ddRzwl9Y1tOfAMlBRERJJoU1Kw1p4+w/sR8I62HCwBJQURkSTalBTSSklBRCQJJQUREalTUpi9tK7xkWoR+Pf5OCWHyqGV2vBTzFvv4T4Rz4CIOiOqT66SuS8TPz+kOmV1zoiD/1efsDfEv9/H7XepHFiBLuAfwHvYI/OHDMHeAeHSkOzaLJVnVsAHZ8whKAb4D/g4Aw7eRo/qU6qUn1ueFBeAu8kl88cMkRdBBbwBj8p+FTIPZSgfWyY4OIBxyP4mS+auDOXnlqk+tUr+x3m8Bz2ifESwMqD0shJRR0T2tixunwsvBTffh+c9iOsO4LqbKZX+EXDwvA1Uq/vh+w+QyfwVx3mCMOylWj2QXO4WXHeUUul5uO4T+P6jjI2dQrX6jG2/T28DudwafH895fIzqFSOwfM2AAFB8BTCsAff/yuOM0wUZXHdYWAc1x0kDPemVDoBeIJC4UuE4XLGxk4DMvj+X3CcKpXKocR/alUymXuJIo9q9VAm5ixmMr+ho+M7hOFelEovxPfXE4adQB7H2YLjhFSrB1CpPB3P24jrbiQI9iKT+TO+/0c872GiqECp9GLK5ROm+NcP8f17cZyleN44rruFMNwD130CxxnG8/5GuXwsQXBg/FlxNuP79+E4o4ThHrVYt59x4zhD+P79BMFehOHKmT7RU+y/Fd+/jzDciyBoff84vnUEwUrCcM4v8N1eq7OPFqj5ailcAXwZuGqa9xvX+DiWeI2PY+clsjSKYOnblpL/VR4qUHlahYFrBogKEd0XdtP5rU7cwVpSyEVEmQin7EAOxl48xpb/2DJpjrT3sMfyNy7He8CDKjgN33Sijgj2gj0e3SOuozmUzLZrZZzK5PerB1bpu7GPqBCXyf8gz5Lzl9Rj244Hw28cJn9THn/9to9e5ESTYgKIzo/iRLShVq6jyh7/cSXuWV+YVGEYFnDdUaIIHGfqhAhQKFxdf97VtZqRkTMYGvoMhcKX6On5LBBuVw4gDAtADtcdZLrrhsKwF9cdIFP7XS1Z8lFKpeeRy90KBFQqR9Lffzm9vW8mk7kTcCmVnsvg4OUsX/7qWrlYd/cl0/4MQbAHjhPiultq8YaT3u/q+jbV6sFs2vSrhq1Vli9/PdnsWhynzJ57QhSFgNv0+/LZuvVsSqUXsXTpefj+wwBEkU+5/DwGBq6k8XTh+3exbNnZeN4jRNEyhodXMTLy9mljb+b797Js2VvxvPW1/d8EfHTG/TKZtbX4HiUIehkePpfR0TNbPm5bLPKWwrykvLSu8ZFWuf/Kkf9ZHnerizvukrsjR/dnunH/5tL5nU68AQ8ncnAiB3fcxdvq4ZZc3CGXjus6yNw2+et7z/k9ZP6awa24uJGL0/CfO+biPOjglidvr79fceuP5vcyD2bo/kx3/TjdX+zGG/SmrMfBwQkcCpcXyKzPTD5GtH3d7rhLZkNDubEM3qfOwQEcZ+IR4HlDOE4V1602bJ/5USh8C8cZpqvrqzhOOG05zxvB8wZwnGjaMq7b3/T+OPn8z3HdYVx3jGz2Vnp730A2+ztcdwzXHSGf/yVdXReTy93acsy+vwnP68dxqtPG7PvryOVuqP+bdHZeWW8txftVcd1wit9XlULhMpYs+TCZzEO1nyfCdSvkcr+ms3Nyslyy5KNkMg/guhU8byOFwhU4zljLn/ElSz5CJvM/DftfDQzNuF9Pzydq8VXw/b/R1bUamP7LwJyInNYeC1RaxhRaXuPDGLOKeMVArLUUi0V836dYnM3V5HMraXzuVhe3NDlfdwx3kHNzuOM7zuPumMvSsaVExW3fav1wx//MziwuvezY2kG2mI27uyozf5yaWwSJlLNQyUC2svN11EX09mZw3dKsa3KcmV9nMoOTtjtOhUJhbq5DWrKknzCMP2+eN4jjBC3t57qVKVtbjhPS3b2Zzs5tn2Hfn9xK8bwKvb1ZoLXPue9PjsnzyrjuKMXiXjPsN/m4rlulWCwA3VPvMBcW8Am/FWlJCi2v8VFbUGpiUamor69v0a3p4j7Xpbhfsd7FEvQGDL5ikPLSMsX9i2Tvyk4q39j9Uj2wSv+R/YR92/54CscW6L61G3d06oQSZaJJXUMR0XaJYsptmYjBkwcp95UBWHbQMvIP5neYZMIlIe5WFyeY+XiRG+E29N9GT7kPpyEhxF1G2z+fSRRBFHXR15ehWHwy2eyft3u/sa6Z6o5qn9TGWMJwKZ63GYi7fYaG3khPzxfxvI0AVKsrGRj4F4pFi+O0luSiyMFxohli8ujrO44wjD9vvv8yli+/Ft+fnICm2r9a3Ydy+Rl0dv5lUiKpVvemv/8kgmDbZ7in5+l0dv4B143/7cvlfenvd4HWPufd3UdRKNxR379S2QfYc8a/kyVLDqez80/15FWp7Ed/fwmYObm3Z+0jlBTmyS5Z4yOtwr1CBr4+QPeF3TiBw8hpI5RfGP/xDHxzgCX/vgT3YRd3xCXYP6B8ZJnsn7JEXsTQ+UOEyyd/mxo5ZwR8yN2cgxHIPJDBKTuE3SHVQ6t453oEFwVk/pwh7AmhAu6oS9gVEjw5iLuuI/Ae8XA3ujgVh7A3ZMuHttTjAhj86iBLPrSE3C9z8djFiBMPRFeALFRXVBn4wQD5H+bpubAHd9glWBpQflaZ/C/y9TGNqDNi9BWjVJ5dofsL3XE9/xgy/tVv45eeTiazjijKUC4fQRjuief1EUUZfH8dvr+BKAoAjyjycd3xuM4oTi6OA0HQy8aNNwMu/f3fYenSt5LP/waIiKIs1eoBtf2qlMvPolrdn3z+FjzvUaIog+OMA2M4TkAUdTM4+GmWLfs2rnsLUeSxdeu5lEovpLv7CzhOwMjI6ymVXkIY7kehcDngMjR0HtXqEWza9F8Uiy/FdUfif/uwE8fxgQpRlMVxykBEEOzHyMgZ5HJrcZwtwBi+f3898cQ/Y4H+/qsJw23ftqvVQ9m8+ct0dV1CNhtQqYwRRTkcZxQA378fcKlUDmLz5isIwyWE4VJyuZtx3Y1Uq09hePhdBMHfTfpMDQ2dTxgWyGZ/TxguZ8uWT5JksZ+tWz9IFHWSza4lDJexZcsn6e2duTd7y5ZPEIY9ZDJ3EQR7MjT0iZaP2TaLfKDZiaL5WX3aGHMA8JNpZh+9DDiHePbRscAXrbXHtFBtpFVSZy/t8UH6Y0x7fJD+GOdpldTZfs2PGFzeWsllA+043rybrympqVzjQ0QkMXUfzV5a1/gQEUlMSUFEROqUFEREpE5JQURE6hb57CMlBRGRJNRSEBGROiUFERGpU1IQEZE6JQUREanTQLOIiNSppSAiInVKCiIiUqekICIidUoKIiJSp6QgIiJ1mn0kIiJ1aimIiEidkoKIiNQpKYiISJ2SgoiI1CkpiIhInWYfiYhInVoKIiJSp6QgIiJ1SgoiIlLXpqRgjDkRuBjwgMustRc2vb8fcCWwtFbm/dba69ty8B1Y3CMmIiLtFrqtPXbAGOMBlwAnAYcBpxtjDmsq9iHAWmuPAk4D/nMOfprtKCmIiCQROa09duwYYJ219gFrbRm4Fjil+UhAT+35EuCxtv4c01D3kYhIEgm6j4wxtze8XG2tXV17vhJ4pOG9DcCxTbt/BLjJGHMuUACOTxzrTlBSEBFJIkFSsNYePc1bU1USNb0+HbjCWvt5Y8xzgKuNMYdba8OWA9gJ6j4SEUmiPd1HG4B9G17vw/bdQ28GLIC19rdAHii26aeYlloKIiJJtGf20VrgEGPMgcCjxAPJr2sqsx44DrjCGHMocVLY1I6D78i8JYW0Tr8SEUmkDctcWGurxphzgBuJz3eXW2vvNsZ8DLjdWrsGeC/wNWPMu4m7ls601jZ3MbXdvCSFhulXJxA3m9YaY9ZYa+9pKDYx/erS2tSs64ED5iM+EZGWtek6hdqX3uubtl3Q8Pwe4HltOVgC8zWmkNrpVyIiibRnTCG15qv7qG3Tr4wxq4BVANZaisUivu9TLM75+MtOU3yzl/YY0x4fpD/GtMdXt4BP+K2Yr6TQtulXtXm+E3N9o76+PorFIn19fe2Puk0U3+ylPca0xwfpj3Gu41uxYkV7KlrkSWG+uo9SO/1KRCQRdR+1RWqnX4mIJLLIb7IzLz+dtbYKTEy/ujfeFE+/MsacXCv2XuAsY8ydwDXM0/QrEZFE1FJoj7ROvxIRSWQBn/BboSuaRUSSUFIQEZE6JQUREalb5APNSgoiIkmopSAiInVKCiIiUqekICIidUoKIiJSp6QgIiJ1u/vsI2PMi1qpyFr7i9mHIyKScmop8PUWykTAk2cZi4hI+u3uScFae+B8BCIisiDs7kmhmTEmAzwbWGGt/Y4xpgBgrR1pd3AiIqmzyJNCohETY8wRwH3A19jWrfR84PI2xyUikk6LfOnspMPolwIXWGv/DqjUtv0K+Ie2RiUiklah29pjgUoa+dOAb9aeR1DvNupoZ1AiIqmllsIkDwHPatxgjDkGWNeugEREUm2RJ4WkA83nA9cZY74CZI0xHwDeBpzV9shERNJoAZ/wW5GopWCt/QlwErAH8VjC/sCp1tqb5iA2EZH0UUthMmvt74Gz5yAWEZH0W8CDyK1IlBSMMVngQ8DpwArgMeBa4JPW2vH2hycikjILuBXQiqQthUuBpwLvBB4m7j76ALASeFN7QxMRSSElhUleCRxkrd1ce32PMeZW4tlHSgoisvgpKUzyN6AT2NywrQN4vG0RiYik2e6eFJqWzr4auMEY8yVgA7Av8A7gqrkJT0QkZXb3pMDUS2d/sOn1W4HPzD4cEZGU291nH2npbBGRBmopiIhIXZuSgjHmROBiwAMus9ZeOEUZA3yEeK25O621r2vLwXcg6XUKPcQBPh8oAvXfjrV2v7ZGJiKSRm1ICsYYD7gEOIF4fHatMWaNtfaehjKHEE/5f561dtAYs+cO6mvpzpfW2gdmKpO0pfCfwD7Ax4hXSz0D+Ffg+wnrERFZmNrTUjgGWDdxkjbGXAucAtzTUOYs4BJr7SCAtXbjDupbR9yacGr/n9D82pspsKRJ4cXAodbafmNMYK39kTHmduDHwEU72jGtTSURkUQSDDTXzo8TVltrV9eerwQeaXhvA3Bs0+5PqdXxG+Lz5kestTdMdRxrbT0oY8wbgeOJz6UTFxlfAPy8lZiTJgUX2FJ7PmyMWUp8jcLBO9qp3U0lEZFdJkFLwVp79DRvTVVJ1PTaBw4BXkDcQ3OzMebwhouHp/Nx4BBr7Vjt9f3GmLcS3zXzipliTjq36k7i8QSAm4lP9JfWDrYj9aaStbZMvF7SKU1lkjSVRER2jfaskjpxndeEfYjXkmsu8yNrbcVa+yDwV+IkMRMXOKBp2/600HUEyVsKZ7Etw70T+DSwFHj9DPu1ralkjFkFrAKw1lIsFvF9n2KxmPBHmT+Kb/bSHmPa44P0x5j2+OraM6awFjjEGHMg8ChwGtDcXf5D4sVHrzDGFInPkTMOFBN35f/CGPMN4vPuvsCZzNDFPyFRUmgcubbWbgLe0uKubWsq1frkJvrlor6+PorFIn19fS2GMv8U3+ylPca0xwfpj3Gu41uxYkV7KmpDUrDWVo0x5wA3En8Jvtxae7cx5mPA7dbaNbX3XmyMuQcIgH+11va3UPfnjDF3Aa8BjiLu4n/TdOMRzVpZ5qKlhe6stZfv4O1Wm0q/s9ZWgAeNMRNNpbWtHF9EZF606ToFa+31wPVN2y5oeB4B76k9ktZ9A9BSEmjWSkvhX1ooEwE7Sgpz2VQSEZk/KV/mwhiTI55tdDrQa61dYox5MfAUa+2XZ9q/lWUuXpgwoOdZa3/TVMecNZVEROZV+pe5uIh4HPefgZ/Wtt1d2z77pLATfgr0NG+cy6aSiMi8SX9SeBVwsLV2xBgTAlhrHzXGrGxl57lICqn/jYmI7LT0J4UyTed2Y8weQEs9L3PROdY8q0hEZPFoz3UKc+m7wJW1MVyMMXsTdxtd28rO6R4xERFJm/QnhQ8CDwF3EV9Hdj/xbM+PtbKzls4WEUki5bOPaqtGnAecV+s26quN2bZkLn661He4iYjstJS3FIwxAxPPrbWbJhKCMaalpYNauXitpcRhrZ0Y5e5upbyIyIKU/oHmTPMGY0yGNq59VGXHg8cT63W3dEARkQUtpUnBGHMz8bk4b4z5ddPb+wD/r5V6WkkKukeziMiElCYF4DLiL+l/D3y9YXsEPAH8opVKWrmi+eGdiU5EZFFK6UCztfZKAGPM76y1f9nZehLPPjLGnMzU92ieaflsEZGFL70thQlnG2OutdbWu4uMMc8FjLX2vJl2TpTyjDEfBr5a2+81xFfIvQSY6U5AIiKLQ8pnHxEvhHd707Y72H4R0iklbQe9CTjBWvtuoFz7/yvY/i4/IiKLU/qTQsT253Zvim1TSpoUllpr/1x7XjbGZKy1t7HtFp0iIotb+pPCzcAnJi4nqP3/I7XtM0qaFP7HGPO02vM/A283xvwLMJiwHhGRhSn9SeFdwPHA48aY24iXuDgBOLeVnZMONH8I6K09fz/wbaALeEfCekREFqaUzj6aYK3dYIx5JnAs8fUJjwC3TVxgPJOk92i+vuH5bcDBSfYXEVnw0j/7aGKFid/uzL6JkoIx5vXAH621f2rY9nTgSGvt1TsTgIjIgpLCpGCMuddae2jt+SNMswqFtXa/mepK2n30ceAZTdseAdYASgoisvilMCkAZzU8P2M2FSVNCj3AUNO2LcRrdouILH4pTArW2lsanv9qNnUlTQr3AP8E2IZtrwLunU0QIiILRgqTgjGmpRvoWGsvmKlM0qTwb8D1xpjXAv9DPNB8HPDShPWIiCxM6Zx9tG/D8zzxl/e1wMPAfsAxwPdbqSjp7KNbjDGHE18uvS9wG/Aua+0jSeoREVmwUthSsNa+ceK5MeZa4HRr7fcbtp1KvDTRjBIviGetXQ9cmHQ/EZFFIYVJoclJwD83bfsR8I1Wdm7lzmurrbWras+vZvqpTlolVUQWv/QnhXXEFxR/sWHb2cRd/jNqpaXwYNPBRER2X+lPCm8BfmCM+d/Ao8BK4jtontrKzq3cZOfTAMYYj/iahG9ba8d3OlwRkYUsnQPNddbaPxhjDgGeDawAHgd+a62ttLJ/yz+dtTYAvqCEICK7tfQviDeJtfbXQNYYU2ilfNKU92NjzCuShyUiskikPCkYY44A7gO+xrZ7NT8fuLyV/ZPOPsoD3zPG/Ja4K6k+6KyBZhHZLaSoFTCNS4ELrLVXG2MmbmvwK+IkMaOkSeHPtUdixpgTgYuJ7wB0mbV2ymmtxphXA98F/t5a23xLORGRXSv9SeFpwDdrzyMAa+2IMaajlZ2TXrz20WSxxWqD1JcQ3+hhA7DWGLPGWntPU7lu4J3ArTtzHBGROZf+pPAQ8Cwa7tNsjDmGFmePJr54zRhzAnAasKe19hXGmKOBHmvtL3aw2zHAOmvtA7U6rgVOIV5LqdHHgc8C70sal4jIvEj57CPgfOA6Y8xXiAeYPwC8jckrqU4r6f0UziW+1dtlwKtrm8eIL5J47g52XUk8BjFhA/FdgRrrPgrY11r7E2PMtEnBGLMKWAVgraVYLOL7PsViMcmPMq8U3+ylPca0xwfpjzHt8dW1qaUwV13qtXPoScTXK/wK2B841Vp7RytxJW0pnAccZ619yBjzb7VtfwGeOsN+U/0W64PUtRtLXwScOVMA1trVwOqJOvr6+igWi/T19c206y6j+GYv7TGmPT5If4xzHd+KFSvaU1EbksJcdanX6r0cWGWtPXtnYkvaDupm2zf+iZN6BijPsN8GJq/itw/xzaQb6z0c+KUx5iHiiy7W1LqmRETSoz1TUutd6tbaMjDRpd5soku9pevDateTvRho6X7MU0naUvg18H7gkw3b3gn89wz7rQUOMcYcSHzZ9WnEK60CYK3dAtTbjcaYXwLv0+wjEUmdBC0FY0zjOWx1racD2tilPoWLgI8aYz7c6lXMjZImhXOJL2A7C+g2xvyV+E5sO7ygzVpbNcacA9xI3H92ubX27tqNIW631q5JGriIyC6RIClYa6fr7Whbl/oUzgX2At5jjNlUq9cBorbfo9la+7gx5u+Jmz77EWe626y1MzZVrLXXA9c3bZvyLkDW2hckiUtEZN60Z/ZRki51iE/ya4wxJ7fQgzKv92jGWhsRD3roWgIR2f20Z/bRXHap/xb4EHA68YJ4jxGPWXxyRztNSDol9enETZpnAF21zRPNkmySukREFqQ2JIU57lK/lHhG6DuJb8e5P/AB4nGMN820c9KWwjXE9/l8J/H1CSIiu5c2Xacwh13qrwQOstZurr2+xxhzK/EVzW1PCnsRL7Q05d3XREQWvfQvc/E3oBPY3LCtg/i+CjNKmhSuJO73+lbC/UREFof0L3NxNXCDMeZLbBvQfgdwlTHmRROFpluaKGlSuBD4rTHmg8ATjW9Ya1809S4iIotI+lsKb639/4NN299We0A8TfXJU+2cNCl8j/iezT9AYwoisjtKeVKw1h44m/2TJoVnAL21y7JFRHY/KU8Ks5W0c+xm4LC5CEREZEFI+e04ZytpS+FB4CZjzA/YfkxhyqlUIiKLygI+4bciaVLoBK4Dsky+RFtEZPeQ/tlHs5J07aM3zlUgIiILgloK2xhjppzCBDBxq00RkUVNSWGSdWxbhnXCxNXNXlsiEhFJMyWFbay1kzrTjDF7AR8mnpUkIrL4LfKkMKsRE2vt34jv2/zp9oQjIpJyodvaY4FKfD+FKTyVeFaSiMjit8hbCkkHmm+m4ZZxQIH4YraPtzMoEZHUUlKY5LKm1yPAndba+9sUj4hIui3ypJC04+taIAccCxwHnAycb4y5qt2BiYikkpa5mOQK4OnAj2la5kJEZLewgE/4rUiaFE4EDmy4zZuIyO5lAc8sakXSpLCeuPtIRGT3pJbCJFcBPzLGXMz2q6ROeWs3EZFFRUlhknNq//9U0/Zpb+0mIrKoKClsM9vbvImILHhKCiIiUqekICIidZp9JCIidWopiIhInZJCexhjTgQuJr4Zz2XW2gub3n8P8BagCmwC3mStfXi+4hMRackiTwrz0jlmjPGAS4CTiFdVPd0Yc1hTsT8AR1trjwS+B3x2PmITEUlEax+1xTHAuon7OBtjrgVOAe6ZKGCt/e+G8r8Dzpin2EREWqeB5rZYCTzS8HoD8Uqr03kz8NOp3jDGrAJWAVhrKRaL+L5PsVhsV6xtp/hmL+0xpj0+SH+MaY+vbgG3AloxX0lhqt9iNMU2jDFnAEcDz5/qfWvtamD1RB19fX0Ui0X6+vraEuhcUHyzl/YY0x4fpD/GuY5vxYoV7alISaEtNgD7NrzeB3isuZAx5njg34HnW2tL8xSbiEjrlBTaYi1wiDHmQOBR4DTgdY0FjDFHAV8FTrTWbpynuEREklFSmD1rbdUYcw5wI/GU1MuttXcbYz4G3G6tXQN8DugCvmuMAVhvrT15PuITEWlZm5JCWqfpz9t1Ctba64Hrm7Zd0PD8+PmKRURkp7Vh9lHDNP0TiLvX1xpj1lhr72koNjFNf9QY83biafqvnfXBZ6ArmkVEkmhPSyG10/SVFEREkkiQFIwxtze8XF2bPQltnKbfbkoKIiJJJEgK1tqjp3mrbdP0201JQUQkifZ0H6V2mr6SgohIEu1JCqmdpr+4F/EQEWm30G3tsQPW2irxPe9vBO6NN8XT9I0xE1PxG6fp/9EYs2Yuf6wJaimIiCTRpusU0jpNX0lBRCQJXdEsIiJ1SgoiIlKnpCAiInW6yY6IiNSppSAiInVKCiIiUqekICIidUoKIiJSp6QgIiJ1mn0kIiJ1aimIiEidkoKIiNQpKYiISJ2SgoiI1CkpiIhInWYfiYhInVoKIiJSp6QgIiJ1SgoiIlKnpCAiInUaaBYRkTq1FEREpE5JoT2MMScCFwMecJm19sKm93PAVcCzgH7gtdbah9py8AC6LunCv89n/Phxxl85TvZ3WTq/3UnQGzBy9giFSwq4Ay5jZ4xRPqYc7/OfXfh/9SkdV2LsVWMAdHyvg/yaPJk/ZYg6I4Y+METpFaVJh3MfdVny3iVk1meoPqmKt79Hx3M6GDt1jJ739dDxsw6ijojRV43ib/QJigHD7xqm66tdeI94jJ08RumEbXU6Wx26P9eNs8Vh5A0jVJ9ZnXQ8/3afrqu6YBQy92bAhfIzyziRw/hLxsneksUZcwj2CsjdksMJHKIwwt/g45QcOBD8//DpvKaT7G+yOJsdgoMCBi8fxH3cZdmqZXiPejjjDk7kEHQHRL0R7uNuXJcfQR7CjhAndHD7XXCgckSF0vNKFK4qQASVgyrPj11yAAALDklEQVQEhwaU/rFE/ro8/oM+QW9A5EWEB4Rsfd9WOr/ZSee3O3GfcHHHXPArRJedhfOa7/GkJ1WpVg/FcSpUKkeydeu/AVW6uy8ik7kN1x2mUjmIIDgYx9mC40REkc/o6Onkcrfg++sZG3sZjjNAd/eXiKIuRkYMudydhGEO1y0RBCsZGzueQuGbRFEPW7e+D3Dp7v4cjrOFcvlI8vlf43kP4Hn9RJFPFGXwvF5yufMolV4KQD7/Xbq7v0wUZRka+jDl8j/gOGN0df0fPG8To6On4ThjdHT8kCBYydat7yaXu4mOjhupVg9iePhctv/zDCgULiWT+Qul0osYGzs1/kx2fI9c7pdUKocyMvJ2YObujVzuOnp6PkkU5RgcvIwgOCjJX9TubZEnBSeKojk/iDHGA+4DTgA2AGuB06219zSUORs40lr7NmPMacCrrLWvnaHq6LHHHqNYLNLX1zdtoWVnLSN/Ux6n6hD0BIy9ZIyOX3bgbfIACDtD3NH4DynYM2DwokE6r+mk44aOeJ+ugOGzh3EqDl1f6YpPVhMBuBGDnx9k3IwD4D7mssdL9sAbiOuOiHBwCDtCwu4Qb6OHgzPpPYCgO8AdduOT7rKAoX8fYuz0MRiD4j8Vyd6ZBaC6V5XNl2ym/OwyANnfZFl67lL8JyafQCbqjrwIJ9j+eI3PASInwmn6sAdLA5xRB7e8832ozceZ+J054fZ/WMGyAHfQbSgfwY9fDifeAH64XflKZX8cB3z/4W11R+A0VR2GWRynjONAEORw3TKOE00qv+3/DlHk47oVAMrlpwEu2exdtfJOfd/tftbIZ/PmT+K6m+np+SyOE9SPPzh4KV1dXyGXWxv/rEE3jhPhusNEkUO1egCe14/rDhFFHuPjL2Jw8IpJ9S9d+jY6On6K41QJgu5aAggoFL6K5w0TRRnGxk5i8+ZLp4xv4u8kl1vD8uXnNMSXY+PGWwjDFVPuN19m+juerRUrVgDM9owesWxzayUHl7bjePNuvkZMjgHWWWsfsNaWgWuBU5rKnAJcWXv+PeA4Y8ysf6HOiEPmDxmcalyVN+TReWNnPSEAOKPbDuNt9Ch8vUD2juy2fYY9Om7sIP+z/KSEAOCEDj0X9dRfF75RqCcEoH6Cc8fcSQmh8T0Ad6tbPyl7gx6d3+8EIHtrlsxdmXo5/28+hcsK2453eWG7hNBY90RCaD5e84m6OSEAuJvdWSWEqY4DTJkQgKaEADzpCTj6jikTAkAm8/CkhADbJwSglgTi555XmnRSn9i+7f9RPSHEx7ibTObuhvLTf4lynCqFwjV0dn6/fsKdOH5X15fJZO6qb/O8rbjucL1O31+P6w7VXgdks3fiOAMNdY+Rzd6B41Tr++fzN5DP34TnTdRTIZu9HRifNkaAnp5PNcVXorv7czvcRxpETmuPBWq+uo9WAo80vN4AHDtdGWtt1RizBegFJn11MMasAlbVylEsFvF9n2KxOPWRC+D6TSdyd8f/YLmOHE5mchk/60MwdXk359aP7y7fyZOoAzScbzLZDMViEWdPJ+5wazgvZjuz9eN5HR7tMNU3+l2ukoFg+t/nVK2C9nNrj6kTUzPfzzHVByWT6cRxMkx/wm5K0k6G3t69gIkvHCU8LzOpjO9nmfShATwvQ7H4JGBy2bh8/HfieblJ26MIcrni9H9D82SHf8dpotlHbTHVn27zV65WymCtXQ2snni/r69vxmZnz0t66LimA2/EI9gzYOtZW+m6ugt/vU+UiQiWBHibPZyqQ3X/Kv3v7afwnQId3+rAG473GXrDEJSh56GeSS2BKBPR/7l+Kn3xt0vnDIfi1UUyD2dqP0B8sg16A4K9AzJ/zmzXhRNlIoI9AtwBF3fcpbp3lc1nb6bcV4anwvLnLq+PBVQPqNL/7n6CvvjE473bY/kflpN5KDNl91CUjaAcf2Nv7CJqLosHUTB5W3X/Kl6fhzuy/R9BK0kkqv3zbdd9lIlwKs7kGJyI6n5V/Ef8bS2JgV74ycvhzCshV44/DbW3oggqlacDYb1rZ2I7TO4SCsMuoIzrlqlWl+J5W3GcoFbWqY09xP8PwyyQwXVHiCKPUuk5gEcud0ttH6/+Lbs5KYVhBwMD78Jxhli27F24bjwuFARd9Pd/mO7uL5DP/xzHqVCt7oHjgOdtIgxzVCpH4XkP4vtPEIadjI4ez9BQmcbvRD09J9LZ+S1cd5gg2IOhoTcAIT0962v1dDMy8jK2bt0y5b/HxN+J73+ZYvEVuG6FKIIo6mbjxvNo+v417+ap+2j2FnAroBXzlRQ2APs2vN4HeGyaMhuMMT6wBBigDYY+OsT4i8bJ3JOh9PwS1cOqjJ8yTv4necK9QsZfOk7+ujzuRpfxl48TrggZumCI8ReMk7l72z4AlcMq5P8rj3+HD52w9V+3Ehyy7Zth1B3R97M+CpcW8P/iUzm0QqFYYOCoAapHVOn4bgedV3USLA8YPXMU/36fcO+Q8ZeNk/t5Dv9Bn/ETxgmeXKvTg4GrB8j/MI874DJ+yjjhntu+tQYHBfT/337ya/JEbkRubY7Ijyj9Qwlvi8f48eNk12ZxRuOEl/tlDkKIOiMyv8/gbnHxnuOx8X9vJH9jnuytWbxHPCrPqjD8nmGcYYfuj3Tj3+Pj9rl4Wz3Kh5cJDgjI3pGFEkSFiHBZSNQTEXkRmbszOKHD2KvGKD2nRM/newijkMoxFcL9Q0rPKZH7WQ5vg0dUjIiciGCfgPFXjpNZm6HzG5146z38DT6cdwnhI0fhf/AzVDyfUuk4XHeYavUZjI6eiuNEdHR8n0zmj7ju45TLzyYIVuK6/UAeGGd8/OVkMvfg++sYHz8exynR1fV5wnBPRkdfQy53G0GwB57XTxDsR7n8LPL5NYThUsbHXwk45PM/wnX7qVSOJpf7VW2g+VGiyCMMu8jlnkR//1vqA7Z9fQfT1XUJQdDJyMi7CMOVDA5+jXz+Olz3McbHX47jlMnnb6Ja3Z9S6SV43v3k8z+nWv07SqUXbv85HrqA8fEXkMncTan0fKrVw+LPZOVQcrlfU6kcTrn8v2b8e6hWj2DTpv+mu/tTRFEPW7Z8FCjMuJ/ULPKkMF8DzT7xQPNxwKPEA82vs9be3VDmHcARDQPNp1przQxVtzTQvKspvtlLe4xpjw/SH+OCGWgujLZWcqSzHcebd/PSOWatrQLnADcC98ab7N3GmI8ZY06uFfs60GuMWQe8B3j/fMQmIpLIIh9onpeWwhxSS6EN0h4fpD/GtMcH6Y9xwbQUOnY8u6tuLN+O4807XdEsIpKEZh+JiEjdAu4aaoWSgohIEkoKIiJS16aksEvXg9uBxd05JiLSbm2YfVRbD+4S4CTgMOB0Y8xhTcXeDAxaaw8GLgI+Mwc/zXaUFEREkgjd1h47tsvWg5vJgu8+mrh0vW2XsM8RxTd7aY8x7fFB+mNMe3zAw0Ts30rB0dHR/jPPPPP2hk2ra8v0QBvXg2u3hd5ScADHGNM38TyND8W3+GNMe3wLIcZ5im+2Dmj1WJ2dnUVr7dENj9UN9UwVy06tB9duCz0pTGhxgfNdRvHNXtpjTHt8kP4Y0x5fOyVZD452rwe3Iwu++6hm6mUh00PxzV7aY0x7fJD+GNMeXzutBQ4xxhxIvB7cacDrmsqsAd4A/BZ4NfALa61aCi1aPXORXUrxzV7aY0x7fJD+GNMeX9ukeT24hb72kYiItNFiaSmIiEgbKCmIiEidkoKIiNQpKYiISJ2SgoiI1CkpiIhInZKCiIjU/X9fk0coSE40NgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.plot.scatter(x='values', y='numerical_label', c='predicted', cmap='spring')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, how do we say that the classifier works well or not? \n", "\n", "Standard metrics can be grouped into the following two \"explanations\":\n", "\n", "\n", "- The classifier is right:\n", " - True Positive \n", " - False Negatives\n", "- The classifier is wrong:\n", " - True Negatives (top left blue dots)\n", " - False Positives (bottom right yellow dots)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's compute all of these:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def make_report(df):\n", " \"\"\"Computes TP, FN, TN and FP from the dataframe.\"\"\"\n", " TP = (df['true_label'] == df['predicted']) & (df['true_label'])\n", " FN = (df['true_label'] == df['predicted']) & (~df['true_label'])\n", " TN = (df['true_label'] != df['predicted']) & (df['true_label'])\n", " FP = (df['true_label'] != df['predicted']) & (~df['true_label'])\n", " return \"TP: {}, FN: {}, TN: {}, FP: {}\".format(TP.sum(), FN.sum(), TN.sum(), FP.sum())" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'TP: 66, FN: 67, TN: 34, FP: 33'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "make_report(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, the question is: what happens if we move the threshold up or down?\n", "\n", "First, let's move the threshold up. We only classify as positive the really high values." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'TP: 35, FN: 90, TN: 65, FP: 10'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['predicted'] = (df['values'] > 2.).astype(float)\n", "make_report(df)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.plot.scatter(x='values', y='numerical_label', c='predicted', cmap='spring')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We now have less false positives at the expense of less true positives." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "What if we move the threshold down?" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'TP: 92, FN: 27, TN: 8, FP: 73'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['predicted'] = (df['values'] > -2.).astype(float)\n", "make_report(df)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.plot.scatter(x='values', y='numerical_label', c='predicted', cmap='spring')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compared to our 0 threshold, we now have more false positives but also more true positives." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "It turns out that one way of vizualizing this changing behaviour is to plot the famous ROC curve.\n", "\n", "To do that, we will introduce normalized \"rates\", so that we become able to compare different classifiers on the same curve.\n", "\n", "The abscissa will be the *false positive rate*, the number of false positive samples divided by the negative condition samples.\n", "\n", "The y axis will be the *true positive rate* the number of true positive samples divided by the number of true condition samples.\n", "\n", "Intuitively, why are those statistics chosen? Difficult to say, but we would like the true positive rate as high as possible, while the false positive rate should be as low as possible." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "def compute_roc_point(df):\n", " \"\"\"Returns (FPR, TPR) from the dataframe.\"\"\"\n", " TP = (df['true_label'] == df['predicted']) & (df['true_label'])\n", " T = df['true_label'].sum()\n", " FP = (df['true_label'] != df['predicted']) & (~df['true_label'])\n", " F = (~df['true_label']).sum()\n", " return FP.sum()/F, TP.sum()/T" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.73, 0.92)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "compute_roc_point(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "By varying the threshold, we can now draw a curve." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "roc_curve = []\n", "for thresh in np.arange(-5, 5, 0.05):\n", " df['predicted'] = (df['values'] > thresh).astype(float)\n", " roc_curve.append(compute_roc_point(df))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAENCAYAAAD0eSVZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHWBJREFUeJzt3XmYVNW97vHvqi7oBmSScgBB4WoTZdCoiBonnCImTlGzBKPRSERjMN5oPCfJSXK86j2PSR4fr0k4GuQoanIkS6NC1IhD4CaOFxwRcEBRaFqFZlbsoqtr3T+qIE1XQ+1uqmpX7Xo/z9OPvWuvrvr1j+7X3av2Xtt47xERkWiJhV2AiIgUnsJdRCSCFO4iIhGkcBcRiSCFu4hIBCncRUQiSOEuIhJBCncRkQhSuIuIRFA8xNfWpbEiIl1j8g0IM9xpbGwM8+XLTiKRoKmpKewyyop6kks9yVVNPRk0aFCgcZqWERGJIIW7iEgEKdxFRCIo1Dn39rz3NDc3k06nMSbv+wUVwXtPLBajrq4uMt+TiJS/sgr35uZmunXrRjxeVmXtslQqRXNzMz169Ai7FBGpEnlT1Fp7N3AGsMo5N6qD/Qa4HfgasBm41Dn3aleKSafTkQt2gHg8TjKZDLsMEakiQebcZwDjd7L/dKA++zEZuKOrxUR52iLK35uIlJ+8h8nOub9ba4fuZMjZwH3OOQ+8ZK3tZ60d6Jz7uFBFiogUk0824//2OGxpDruU/L53faBhhZgD2QdY0Wa7IftYTrhbayeTObrHOUcikdhu/6effhrqtMzatWs5//zzAVi1ahU1NTUMGDAAgEWLFjFy5EhSqRTDhw/nN7/5DT179mTgwIEcdNBBpFIp9t13X6ZOnUrfvn1znru2tjbn+20vHo/nHVNt1JNc6kmuXe1J8tWXWP/wvZmNcv8ru4Th3lEnOlxawDk3DZi2dUz7K8qSySQ1NTUFKKlr+vTpw1NPPQXArbfeSq9evbjyyisBqK+v37ZvypQp3HPPPVxxxRXU1dVte/yaa65h+vTpXHPNNTnPnUwm815BV01X2QWlnuRST3J1pSf+w/fwS97IbHzcAEDsp7dihtUXurxQFCLcG4AhbbYHA5FeV2Ds2LEsWbIk5/HDDz+8w8dFpPykH74PtoY7QPda6NsvvIIKrBDhPhuYYq2dCRwJbCjEfHt65l34Fct2ubi2zJBhxCZcvkvPkUqlmDt3LuPGjdvu8dbWVp577jkmTpy4S88vIiWSTsMBI4hde2NmO1aDCXHmoNCCnAr5ADAOSFhrG4B/B7oBOOfuBJ4gcxrkUjKnQn6nWMWGqbm5mVNPPRWAI488cluIb328oaGB0aNHc/zxx4dZpoh0RsxgunUPu4qiCHK2zE4PRbNnyXy/YBVl7eoRdqHV1dXx9NNP7/DxjRs3cskllzBjxgwmTZoUQoUi0ik+HXYFRaW1ZQqkT58+3HTTTdx55520tLSEXY6I7IDfsI703bfBu4swu+8RdjlFE73LQUM0atQoRowYwaxZs7adUikiheeXLsE3fbpt+4vevUlv2pT/C5s+xc95GFItmNPPw3zNFrHKcBnvQ7shkm9/s47NmzfTs2fPkMopriDfm05xy6We5Kr2nnjvSV91HqRSXXuC0WOIXfBdzF7BbnpRbrI36yjvOzGJiAThtyRhyZvQmgI8pFKYk87AnHQGAP3792fdunX5nyjeDTMgulMxbSncRaSs+VSK9O9u3v6cdIA9B247+o4nEpjuWnW1rbIK9xCniIouyt+bSLF47/EPTIMlb2AmXI4Znl2YNhaDgUN2/sVVrqzCPRaLkUqlIrfsbyqVIhbTiUkineWfmY3/+5OY088jdvKZYZdTUcoqRevq6mhubiaZTEZmidy2d2ISkR3zra3w4XvZeXXwn6zEP3g3HHY05pyLQ66u8pRVuBtjdLcikSrk31lI+oFpsPKj7XfsdwCxy67F6C/fTiurcBeR6PHJZvh4Rcc7Uyn8s3/BL3gOBuyJueyHmH67Z/YZA8O+hKmtLV2xEaJwF5Gi8vf+Fj//Hzse0K075syJmPHnYroryAtF4S4iReW/2Ax7DiRmv9vxgCHDMLvr5iOFpnAXkaLw6TT+hWfhg7dhyP/AHHJE2CVVFYW7iHSJ3/wZfP5ZxzvXNpH+8wxY9i7sfyCxC68oaW2icBeRLvAtW0j/yyRIfrHjQX37Z94gPWpcZE5triQKdxHpvC1bIPkFZuwJMPLLuftr4piDj8D0iOZCgJVA4S4iXTesnthXTg67CumArgwQEYkghbuISARpWkZEdsh7D+kO7jWabi19MdIpCncRybH1HHX/yP2wcf2OB2rNl7KlcBeR7fhl72YW8cqeo25O/HrHA2vimCOOK21xEpjCXSTifDIJDcvwy9+Hj97P/PeTlR1Pt0Bmyd2+/TGTfog5UueoVyqFu0iE+ObNsLxdkH/cAD4b5Lv1gf32xxx4MMS7dfwku/XGHHeazlGvcAp3kYhIPzYTP/sB2HpLx779Yd/9MYcdjdlvf9h3f+if0JF4lVC4i0SEX/4B9O1P7OLvZ0J967roUpUU7iJR0qs35mCtvii6iElEJJJ05C5SYL55Myx+Hf/mAvz7S6C1sBf8NMVitHZ0psuGdbDH3gV9LalcCneRAvCrGjNh/uZ8eHdR5nTCHr3gS6MxdXUFfa1utbWkk8mOd444tKCvJZVL4S7SBT6VgqWL8Quzgf7JysyOgUMwp5yJGX1E5gKgeOF/xfomEjQ1NRX8eSVaAv3kWWvHA7cDNcB059wt7fbvC9wL9MuO+bFz7okC1yoSKr9pA37hK7BwAX7Ra/DF5xCPw/DRmHFfxxw8BqNpESkTecPdWlsDTAVOBRqA+dba2c65xW2G/Qxwzrk7rLUjgCeAoUWoV6Qk/Fuvkv5Lm3PGW7bAyo8y2337Yw7/SuaslIMOwdT1CLdYkQ4EOXIfCyx1zn0AYK2dCZwNtA13D/TJft4XaCxkkSKl5he/Bsve++ddhkwMc9hXMAePydzsWQtmSZkLEu77ACvabDcAR7YbcwPwlLX2aqAXcEpBqhMJU/daaq65IewqRLokSLh3dK2yb7c9EZjhnLvVWns0cL+1dpRzbrvztay1k4HJAM45EolEV2qOrHg8rp60E1ZPNvXowRfGlOW/h35OcqknuYKEewMwpM32YHKnXSYB4wGccy9aa+uABLCq7SDn3DRgWnbT6x3/7SV0FkSOUvXEr/4E/7jDb1iXeeDjFeB9Wf576OckVzX1ZNCgQYHGBQn3+UC9tXYYsBKYAFzYbsxy4GRghrX2IKAOWB24WpGQ+GQS/+RD+CcfhpoaGJg9jundFzP68HCLE9kFecPdOZey1k4B5pA5zfFu59wia+2NwALn3GzgOuAua+0PyUzZXOqcaz91I1I2vPfw6guk3d2wdjVm7AmY8y/F9B8QdmkiBWG8Dy2DfWOjTqppq5r+tAyqGD3xK5eTnjkN3n4TBg8lNnEyZviogr5GMennJFc19SQ7LZN33WZdoSqR5ld9jH/yz5nz1AG/JQmvvwx1PTAXXoE5fjympibkKkUKT+EukeU/30T69v8F69dkblyRZY79Kuacb2F69w2xOpHiUrhLJPlUC+k7boG1q4hddzPmgBFhlyRSUrrMTiLHe4//wx3wzkLMJVcr2KUq6chdIsW3tuIfm4l//hnM1y2xo04MuySRUCjcJTL8u2+R/u/fw8qPMEeNw5zV/nIMkeqhcJeK59c24R+6Bz//HzBgT2Lf+zEcejTG5D1bTCSyFO5SsXxLC/6ZWfjHHbS2Ys6YgBl/Hqa2NuzSREKncJeK5Je9R3r6rbCqEb58FDF7mW6UIdKGwl0qUvrxP8Hnm4hdcwNm1GFhlyNSdnQqpFSm1lbYY28Fu8gOKNxFRCJI4S4iEkEKdxGRCFK4i4hEkMJdKo5f/Ql8+C7s1jvsUkTKlsJdKorf/Bnp394EaU9swuSwyxEpWwp3qRi+tZX0738Fqz4mdtVPMHsFu1GwSDXSRUxSFP7jFbD6k11+nmSfPviNGzPP+coLsPh1zKU/wHxp9C4/t0iUKdyloPymDfiH78M//wwU4P6869ttm9POJXbMKbv8vCJRp3CXgvCtrfh5f8XP/iMkmzGnnIU54jgC3Md3p/r168v69RsyG91rYdCQXS9WpAoo3GWX+XfeIv1AZh11DjqE2MTJmIGFCeFuiQSmSu5qL1JICnfBJ5uh4cPOf2FrK37eE1pHXaQMKdyrmPce//I8/EP3woa1XXuSbt0xZ07AnKZ11EXKicK9SvnlH2SmUpYugaH1xCZOhq6E86D9MLsnCl+giOwShXsV8u8uIn3bz6FHL8y3p2COOQUT0yUPIlGicK8yftXHpO/4D0jsRexffonp3SfskkSkCHS4VkW2XbrvIXb1zxXsIhGmcK8SPpUifecvYfUnmUv399Sl+yJRpnCvEv75Z2DJG5hvfx8zfFTY5YhIkWnOPcJ8SwtsSWY21q4GwBw5LryCRKRkFO4R45s+xS9cgH9zPry9EFIt/9wZi+3qagAiUiEChbu1djxwO1ADTHfO3dLBGAvcAHjgDefchQWsU3bAt7bC+2/j35yPX7gAGpdnduw5CDPudBiw57axZo+BmFhNSJWKSCnlDXdrbQ0wFTgVaADmW2tnO+cWtxlTD/wEOMY5t85au2fHzyaFlv7dzfDWK1BTA/UjMceeihk9BrP3PmGXJiIhCnLkPhZY6pz7AMBaOxM4G1jcZszlwFTn3DoA59yqQhcqO7BmFXxpNLGrforp2SvsakSkTAQJ932AFW22G4Aj240ZDmCtfZ7M1M0NzrknC1Kh5GV266NgF5HtBAn3jt6Ca38XhjhQD4wDBgP/sNaOcs5td68Fa+1kYDKAc45EQmuStBWPxzvdk6aaGuK1tfSLaC+70pOoU09yqSe5goR7A9B2ce7BQGMHY15yzrUAy6y175AJ+/ltBznnpgHTspu+Set0byeRSNCZnvhkkvSmDaRbWjr1dZWksz2pBupJrmrqyaBBwS5ADBLu84F6a+0wYCUwAWh/JsyjwERghrU2QWaa5oPA1Uqn+XSa9D23wcb1mKNPDLscESkzea9Qdc6lgCnAHGBJ5iG3yFp7o7X2rOywOcAaa+1iYC5wvXNuTbGKFvCz/givvIA571LM6DFhlyMiZcb4AtzEuIt8Y2P72Z3qFvRPy/QLz+LvuR1z3FcxF38/0nc+qqY/t4NST3JVU0+y0zJ5f+m1tkyF8cvfx983FQ48GHPhlZEOdhHpOoV7BfHek555F/TsRezKf8XEtXqEiHRM4V5JXnsR3luMOedbmF69w65GRMqYwr1C+JYW0g/NgH32wxxzatjliEiZU7hXCP+3xzI32rCXYWq0+JeI7JzCvQL4TRvwj/8JRo/BjDg07HJEpAIo3CuAf+JBSDYT++Z3wi5FRCqEwr0C+BXLYNhwzMAh+QeLiKBwrxwx/VOJSHBKDBGRCFK4lzmfbIbPNoZdhohUGIV7mfLe4xc8R/oXV8HKjzCjjwi7JBGpILp+vQz5VIr0b2+Cxa/B4GHEJl2HGT4y7LJEpIIo3MtR40ew+DXMaedizr0YE9NFSyLSOZqWKWPmgAMV7CLSJQp3EZEIUriLiESQwl1EJIL0hmoJ+U9W4p9/Bny6w/2bevQk/cVm2Li+xJWJSNQo3EvIP/cUfs4j0L17h/s3Y4DsPW136w17DCxdcSISKQr3UvIeauuo+Z3rcHc13eRXRIpLc+4l4tNp/DtvQZ9+YZciIlVA4V4i/qV58NFSzFkXhl2KiFQBhXsJ+GQz/pH7YWg9ZuzxYZcjIlVA4V4Cfs4jsH4NsQsmYbQuu4iUgJKmyPy6Nfg5D2MOPwZzwIiwyxGRKqFwLzL/yP2QbsWcd0nYpYhIFVG4F5l/eR7mmFMwe+wddikiUkUU7kXkvYd0Wqc/ikjJ6SKmIvGNy0nPvCuz0W/3cIsRkaqjcC8wvyWJf+QP+LmPQW0d5sIrMMecGnZZIlJlFO4F5l+ai39mFubYUzHnfhvTu2/YJYlIFQoU7tba8cDtQA0w3Tl3yw7GnQ88CBzhnFtQsCorSfMXAJgLJmHqeoZcjIhUq7xvqFpra4CpwOnACGCitTbnhG1rbW/gB8DLhS5SREQ6J8jZMmOBpc65D5xzW4CZwNkdjLsJ+BXQXMD6RESkC4KE+z7AijbbDdnHtrHWHgoMcc49VsDaKo5/Yz5+7hPQvRZq9HaGiIQnSAKZDh7zWz+x1saA24BL8z2RtXYyMBnAOUcikQhWZZlLNa5g093/hy2vvEjN4KH0+cHP6D5wUKefJx6PR6YnhaKe5FJPcqknuYKEewMwpM32YKCxzXZvYBQwz1oLsDcw21p7Vvs3VZ1z04Bp2U1f6Tem8M1f4J9w+KdnQbwb5puX4U86g43xOHThe9PNOnKpJ7nUk1zV1JNBg4IdOAYJ9/lAvbV2GLASmABsW5TcObcB2Pa/TGvtPOBHUT5bxnuPn/8P/IP3wPo1mKNPwpx3CaZv/7BLExEBAoS7cy5lrZ0CzCFzKuTdzrlF1tobgQXOudnFLrKc+IZlpB+4C959C/bdn9iV/4rZ/8CwyxIR2Y7x3ucfVRy+sbEx/6gy4T//DD/rj/h5f4VevTDfuDhzoVKspmCvUU1/WgalnuRST3JVU0+y0zIdvRe6HZ3SEYDftJH0LdfD6k8x48Zjzv4WplfvsMsSEdkhhXsevqWF9B3/AWubiP3oZszwUWGXJCKSl5b83QnvPf7+38F7izHfuUbBLiIVoyqP3H2yGQK81+Cf/Qv+xbmYMycS042tRaSCVF24px93+Ef/EHi8GXsC5swJRaxIRKTwqi7cafoUevTEfP2C/GN79MQcfSLG5H1jWkSkrFRfuAPU9iB22jfCrkJEpGj0hqqISAQp3EVEIkjhLiISQVUx5+69hxXL8AsX4Be/DnqDVEQiLrLh7pPN8Pab+Dfn499cAOvXZHYMrceM+1q4xYmIFFnkwt1/vgn/6B/xzz0NqRao7QEjv4w5+AjMqMO1LK+IVIXIhLtPt+L//lTmAqXNn2OOPQUz5lioH4np1i3s8kRESqpiw92//zbpe38LranMA8lm2LAOho8iNnEyZvDQUOsTEQlT5Yb78vfh4xVw2NGYePfM6saHHIkZc4yuKBWRqlex4b5V7KKrML37hl2GiEhZ0XnuIiIRpHAXEYkghbuISAQp3EVEIkjhLiISQQp3EZEIUriLiERQ5Yb7+rVhVyAiUrYq7iIm/2kj6T9Nh4ULYL8DoOduYZckIlJ2Kirc03Mfx7v/gng3zDcvw5x0BqamJuyyRETKTkWFu396FuwzlNjVP9fSvSIiO1FZc+7eYwYOUbCLiORRWeEuIiKBKNxFRCJI4S4iEkGB3lC11o4HbgdqgOnOuVva7b8W+C6QAlYDlznnPipwrSIiElDeI3drbQ0wFTgdGAFMtNaOaDfsNWCMc+5g4CHgV4UuVEREggty5D4WWOqc+wDAWjsTOBtYvHWAc25um/EvARcVskgREemcIOG+D7CizXYDcOROxk8C/trRDmvtZGAygHOORCIRsMyMppoautXV0reTX1cp4vF4p3sSdepJLvUkl3qSK0i4d3S3ad/RQGvtRcAY4ISO9jvnpgHTtj5HU1NTkBozg70n3dJCujlJZ76ukiQSich+b12lnuRST3JVU08GDRoUaFyQcG8AhrTZHgw0th9krT0F+DfgBOdcMtCrB+QbPiQ98y5Yuxr22LuQTy0iEklBwn0+UG+tHQasBCYAF7YdYK09FPg9MN45t6pQxfnWVvyDd+PnPg49emG+9T3M8V8t1NOLiERW3rNlnHMpYAowB1iSecgtstbeaK09Kzvs18BuwIPW2tettbMLUt3i1/DP/gVz9InEbr6D2LjTMTEtFCYiko/xvsPp81LwjY05szvbD3j1BdJ33ELs32/HDB5WorLCU03zhkGpJ7nUk1zV1JPsnHtH74VuR1eoiohEkMJdRCSCFO4iIhGkcBcRiSCFu4hIBCncRUQiSOEuIhJBCncRkQhSuIuIRJDCXUQkgso63P2y9zKfxLuHW4iISIUp23BP/7+/45/8M+boE2GvYOsXi4hIRlmGu3//bfw9t0P9CMzFUzAm7xo5IiLSRtmFu2/6lPTU/w39BxD73k8x3bqFXZKISMUpu3BPz7wLUiliV/8C07tP2OWIiFSksgt3PtsIw+oxAweHXYmISMUqv3AXEZFdpnAXEYkghbuISAQp3EVEIige5ou3XnFO7oPpNIw8tPTFiIhESKjhbsaf3/HjB48pcSUiItESarjHvnFRmC8vIhJZmnMXEYkghbuISAQp3EVEIkjhLiISQQp3EZEIUriLiESQwl1EJIIU7iIiERToIiZr7XjgdqAGmO6cu6Xd/lrgPuBwYA1wgXPuw8KWKiIiQeU9crfW1gBTgdOBEcBEa+2IdsMmAeuccwcAtwG/LHShIiISXJBpmbHAUufcB865LcBM4Ox2Y84G7s1+/hBwsrVWd7UWEQlJkHDfB1jRZrsh+1iHY5xzKWADMKAQBYqISOcFmXPv6Ajcd2EM1trJwGQA5xyJRCLAy1ePeDyunrSjnuRST3KpJ7mChHsDMKTN9mCgcQdjGqy1caAvsLb9EznnpgHTspu+qamp0wVHWSKRQD3ZnnqSSz3JVU09GTRoUKBxQcJ9PlBvrR0GrAQmABe2GzMbuAR4ETgf+JtzLufIXURESiPvnHt2Dn0KMAdYknnILbLW3mitPSs77L+AAdbapcC1wI+LVbCIiORnvA/tAFtH9iIiXZP3bMTQrlC11r5CpkB9ZD/UE/VEPVFPAn7kpeUHREQiSOEuIhJBYYb7tPxDqo56kks9yaWe5FJP2gnzDVURESkSTcuIiERQoCV/d4WWC84VoCfXAt8FUsBq4DLn3EclL7SE8vWkzbjzgQeBI5xzC0pYYskF6Ym11gI3kDm1+A3nXPsLDCMlwO/OvmQWMeyXHfNj59wTJS+0DBT1yF3LBecK2JPXgDHOuYPJrLL5q9JWWVoBe4K1tjfwA+Dl0lZYekF6Yq2tB34CHOOcGwn8z5IXWkIBf05+RuZCy0PJXE3/n6WtsnwUe1pGywXnytsT59xc59zm7OZLZNbzibIgPycAN5H5H11zKYsLSZCeXA5Mdc6tA3DOrSpxjaUWpCce6JP9vC+562BVjWKHu5YLzhWkJ21NAv5a1IrCl7cn1tpDgSHOucdKWViIgvycDAeGW2uft9a+lJ2yiLIgPbkBuMha2wA8AVxdmtLKT7HDvaMj8C4tFxwhgb9fa+1FwBjg10WtKHw77Ym1NkZmyu66klUUviA/J3GgHhgHTASmW2v7FbmuMAXpyURghnNuMPA14P7sz0/VKfY33ZnlgtnZcsEREqQnWGtPAf4NOMs5lyxRbWHJ15PewChgnrX2Q+AoYLa1dkzJKiy9oL87s5xzLc65ZcA7ZMI+qoL0ZBLgAJxzLwJ1QFUu9F7ss2W0XHCuvD3JTkH8HhhfBfOokKcnzrkNtPkFtdbOA34U8bNlgvzuPEr2SNVamyAzTfNBSassrSA9WQ6cTKYnB5EJ99UlrbJMFPXIXcsF5wrYk18DuwEPWmtft9bODqnckgjYk6oSsCdzgDXW2sXAXOB659yacCouvoA9uQ643Fr7BvAAcGnEDxZ3SFeoiohEUFW+0SAiEnUKdxGRCFK4i4hEkMJdRCSCFO4iIhGkcBcRiaCiL/krUk6yV7juBbS2efirwPPA59ntJuDOrcvJWms9sJnMpe4bgD+ROae87XOIlBUduUs1OtM5t9vWD/55CXu/7PZE4BftFuI6JLvvBOAC4LLSlizSOQp3kXaya5IsIrOeTft9S8kc5X+51HWJdIamZUTayN5L4CvASDI3TWm//0DgOCJ+AxWpfAp3qUaPWmtT2c/n8c87GDWRmVf/hMzt2Z5t8zWvZu8E1JPMTSKq9g4/UhkU7lKNznHOPbN1w1o7NPtpIrs4VUcOA94HvgncAvQCor4Us1QwzbmLBOSc8845R2Z56l+EXY/IzijcRTrvFmCytXbvsAsR2RGFu0gnOecWAv8XuD7sWkR2ROu5i4hEkI7cRUQiSOEuIhJBCncRkQhSuIuIRJDCXUQkghTuIiIRpHAXEYkghbuISAQp3EVEIuj/A2nJcuaejBmzAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "roc_df = pd.DataFrame(data=roc_curve, columns=['FPR', 'TPR'])\n", "roc_df.plot.line(x='FPR', y='TPR')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "One of the interesting things here is that the lowest threshold yields the top right point while the highest threshold yields the bottom left point." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's put all of this in an animation." ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "
\n", " \n", "
\n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", " Once \n", " Loop \n", " Reflect \n", "
\n", "
\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.animation as animation\n", "from IPython.display import display, HTML\n", "\n", "fig, axes = plt.subplots(ncols=2)\n", "axes[0].plot(roc_df['FPR'], roc_df['TPR'])\n", "axes[0].set_xlabel('FPR')\n", "axes[0].set_ylabel('TPR')\n", "\n", "def data_gen(t=0):\n", " for thresh in np.arange(-5, 5, .25):\n", " df['predicted'] = (df['values'] > thresh).astype(float)\n", " yield df\n", "\n", "def update(df):\n", " x, y = compute_roc_point(df)\n", " m = axes[0].plot(x, y, 'ko')\n", " axes[1].scatter(x=df['values'], y=df['numerical_label'], c=df['predicted'], cmap='spring')\n", " return m\n", "\n", "\n", "ani = animation.FuncAnimation(fig, update, data_gen, interval=100)\n", "display(HTML(ani.to_jshtml()))\n", "plt.close(fig)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 2 }