{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "

Please cite us if you use the software

" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Example-6 (Unbalanced data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Environment check" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Checking that the notebook is running on Google Colab or not." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys\n", "try:\n", " import google.colab\n", " !{sys.executable} -m pip -q -q install pycm\n", "except:\n", " pass" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Binary classification for unbalanced data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from pycm import ConfusionMatrix" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Case1 (Both classes have a good result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$Case_1=\\begin{bmatrix}26900 & 40 \\\\25 & 500 \\end{bmatrix}$$" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict Class1 Class2 \n", "Actual\n", "Class1 0.99852 0.00148 \n", "\n", "Class2 0.04762 0.95238 \n", "\n", "\n", "ACC: {'Class2': 0.9976333515383216, 'Class1': 0.9976333515383216}\n", "MCC: {'Class2': 0.9378574017402594, 'Class1': 0.9378574017402594}\n", "CEN: {'Class2': 0.30489006849060607, 'Class1': 0.012858728415908176}\n", "MCEN: {'Class2': 0.46949279678726225, 'Class1': 0.023280122318969122}\n", "DP: {'Class2': 2.276283896527635, 'Class1': 2.276283896527635}\n", "Kappa: 0.9377606597584491\n", "RCI: 0.8682877002417864\n", "SOA1: Almost Perfect\n" ] } ], "source": [ "case1 = ConfusionMatrix(matrix={\"Class1\": {\"Class1\": 26900, \"Class2\": 40}, \"Class2\": {\"Class1\": 25, \"Class2\": 500}})\n", "case1.print_normalized_matrix()\n", "print('ACC:', case1.ACC)\n", "print('MCC:', case1.MCC)\n", "print('CEN:', case1.CEN)\n", "print('MCEN:', case1.MCEN)\n", "print('DP:', case1.DP)\n", "print('Kappa:', case1.Kappa)\n", "print('RCI:', case1.RCI)\n", "print('SOA1:', case1.SOA1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Case2 (The first class has a good result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$Case_2=\\begin{bmatrix}26900 & 40 \\\\500 & 25 \\end{bmatrix}$$" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict Class1 Class2 \n", "Actual\n", "Class1 0.99865 0.00135 \n", "\n", "Class2 0.95238 0.04762 \n", "\n", "\n", "ACC: {'Class2': 0.982098458478369, 'Class1': 0.982098458478369}\n", "MCC: {'Class2': 0.13048897476798949, 'Class1': 0.13048897476798949}\n", "CEN: {'Class2': 0.4655917826576813, 'Class1': 0.06481573363174531}\n", "MCEN: {'Class2': 0.4264929996758212, 'Class1': 0.11078640690031397}\n", "DP: {'Class2': 0.864594924328404, 'Class1': 0.864594924328404}\n", "Kappa: 0.08122239707598865\n", "RCI: 0.022375346499017443\n", "SOA1: Slight\n" ] } ], "source": [ "case2 = ConfusionMatrix(matrix={\"Class1\": {\"Class1\": 29600, \"Class2\": 40}, \"Class2\": {\"Class1\": 500, \"Class2\": 25}})\n", "case2.print_normalized_matrix()\n", "print('ACC:', case2.ACC)\n", "print('MCC:', case2.MCC)\n", "print('CEN:', case2.CEN)\n", "print('MCEN:', case2.MCEN)\n", "print('DP:', case2.DP)\n", "print('Kappa:', case2.Kappa)\n", "print('RCI:', case2.RCI)\n", "print('SOA1:', case2.SOA1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Case3 (Second class has a good result )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$Case_3=\\begin{bmatrix}40 & 26900 \\\\25 & 500 \\end{bmatrix}$$" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict Class1 Class2 \n", "Actual\n", "Class1 0.00148 0.99852 \n", "\n", "Class2 0.04762 0.95238 \n", "\n", "\n", "ACC: {'Class2': 0.019661387220098307, 'Class1': 0.019661387220098307}\n", "MCC: {'Class2': -0.13000800945464058, 'Class1': -0.13000800945464058}\n", "CEN: {'Class2': 0.06103563616795208, 'Class1': 0.014927427128936136}\n", "MCEN: {'Class2': 0.03655796690365652, 'Class1': 0.01281422838054554}\n", "DP: {'Class2': -0.8416930356875597, 'Class1': -0.8416930356875597}\n", "Kappa: -0.0017678372492452412\n", "RCI: 0.02192606003351106\n", "SOA1: Poor\n" ] } ], "source": [ "case3 = ConfusionMatrix(matrix={\"Class1\": {\"Class1\": 40, \"Class2\": 26900}, \"Class2\": {\"Class1\": 25, \"Class2\": 500}})\n", "case3.print_normalized_matrix()\n", "print('ACC:', case3.ACC)\n", "print('MCC:', case3.MCC)\n", "print('CEN:', case3.CEN)\n", "print('MCEN:', case3.MCEN)\n", "print('DP:', case3.DP)\n", "print('Kappa:', case3.Kappa)\n", "print('RCI:', case3.RCI)\n", "print('SOA1:', case3.SOA1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Multi-class classification for unbalanced data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Case1 (All classes have good result and are unbalanced)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$Case_1=\\begin{bmatrix}4 & 0 &0&1 \\\\0 & 4&1&0\\\\0&1&4&0\\\\0&0&1&4000 \\end{bmatrix}$$" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict Class1 Class2 Class3 Class4 \n", "Actual\n", "Class1 0.8 0.0 0.0 0.2 \n", "\n", "Class2 0.0 0.8 0.2 0.0 \n", "\n", "Class3 0.0 0.2 0.8 0.0 \n", "\n", "Class4 0.0 0.0 2e-05 0.99998 \n", "\n", "\n", "ACC: {'Class3': 0.9999250299880048, 'Class4': 0.9999500199920032, 'Class2': 0.9999500199920032, 'Class1': 0.9999750099960016}\n", "MCC: {'Class3': 0.7302602381427055, 'Class4': 0.9333083339583177, 'Class2': 0.7999750068731099, 'Class1': 0.8944160139432883}\n", "CEN: {'Class3': 0.3649884090288471, 'Class4': 0.0001575200922489127, 'Class2': 0.25701944178769376, 'Class1': 0.13625493172565745}\n", "MCEN: {'Class3': 0.4654427710721536, 'Class4': 0.00029569133318617423, 'Class2': 0.3333333333333333, 'Class1': 0.17964888034078544}\n", "DP: {'Class3': 2.7032690544190636, 'Class4': 3.1691421556058055, 'Class2': 2.869241573973406, 'Class1': 'None'}\n", "Kappa: 0.8666333383326446\n", "RCI: 0.8711441699127427\n", "SOA1: Almost Perfect\n" ] } ], "source": [ "case1 = ConfusionMatrix(\n", " matrix={\n", " \"Class1\": {\"Class1\": 4, \"Class2\": 0, \"Class3\": 0, \"Class4\": 1},\n", " \"Class2\": {\"Class1\": 0, \"Class2\": 4, \"Class3\": 1, \"Class4\": 0},\n", " \"Class3\": {\"Class1\": 0, \"Class2\": 1, \"Class3\": 4, \"Class4\": 0},\n", " \"Class4\": {\"Class1\": 0, \"Class2\": 0, \"Class3\": 1, \"Class4\": 40000}})\n", "case1.print_normalized_matrix()\n", "print('ACC:', case1.ACC)\n", "print('MCC:', case1.MCC)\n", "print('CEN:', case1.CEN)\n", "print('MCEN:', case1.MCEN)\n", "print('DP:', case1.DP)\n", "print('Kappa:', case1.Kappa)\n", "print('RCI:', case1.RCI)\n", "print('SOA1:', case1.SOA1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Case2 (All classes have same result and are balanced)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$Case_2=\\begin{bmatrix}1 & 1 &1&1 \\\\1 & 1&1&1\\\\1&1&1&1\\\\1&1&1&1 \\end{bmatrix}$$" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict Class1 Class2 Class3 Class4 \n", "Actual\n", "Class1 0.25 0.25 0.25 0.25 \n", "\n", "Class2 0.25 0.25 0.25 0.25 \n", "\n", "Class3 0.25 0.25 0.25 0.25 \n", "\n", "Class4 0.25 0.25 0.25 0.25 \n", "\n", "\n", "ACC: {'Class3': 0.625, 'Class4': 0.625, 'Class2': 0.625, 'Class1': 0.625}\n", "MCC: {'Class3': 0.0, 'Class4': 0.0, 'Class2': 0.0, 'Class1': 0.0}\n", "CEN: {'Class3': 0.8704188162777186, 'Class4': 0.8704188162777186, 'Class2': 0.8704188162777186, 'Class1': 0.8704188162777186}\n", "MCEN: {'Class3': 0.9308855421443073, 'Class4': 0.9308855421443073, 'Class2': 0.9308855421443073, 'Class1': 0.9308855421443073}\n", "DP: {'Class3': 0.0, 'Class4': 0.0, 'Class2': 0.0, 'Class1': 0.0}\n", "Kappa: 0.0\n", "RCI: 0.0\n", "SOA1: Slight\n" ] } ], "source": [ "case2 = ConfusionMatrix(\n", " matrix={\n", " \"Class1\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class2\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class3\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class4\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1}})\n", "case2.print_normalized_matrix()\n", "print('ACC:', case2.ACC)\n", "print('MCC:', case2.MCC)\n", "print('CEN:', case2.CEN)\n", "print('MCEN:', case2.MCEN)\n", "print('DP:', case2.DP)\n", "print('Kappa:', case2.Kappa)\n", "print('RCI:', case2.RCI)\n", "print('SOA1:', case2.SOA1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Case3 (A class has a bad result and is a bit unbalanced)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$Case_3=\\begin{bmatrix}1 & 1 &1&1 \\\\1 & 1&1&1\\\\1&1&1&1\\\\10&1&1&1 \\end{bmatrix}$$" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict Class1 Class2 Class3 Class4 \n", "Actual\n", "Class1 0.25 0.25 0.25 0.25 \n", "\n", "Class2 0.25 0.25 0.25 0.25 \n", "\n", "Class3 0.25 0.25 0.25 0.25 \n", "\n", "Class4 0.76923 0.07692 0.07692 0.07692 \n", "\n", "\n", "ACC: {'Class3': 0.76, 'Class4': 0.4, 'Class2': 0.76, 'Class1': 0.4}\n", "MCC: {'Class3': 0.10714285714285714, 'Class4': -0.2358640882624316, 'Class2': 0.10714285714285714, 'Class1': -0.2358640882624316}\n", "CEN: {'Class3': 0.8704188162777186, 'Class4': 0.6392779429225796, 'Class2': 0.8704188162777186, 'Class1': 0.6392779429225794}\n", "MCEN: {'Class3': 0.9308855421443073, 'Class4': 0.647512271542988, 'Class2': 0.9308855421443073, 'Class1': 0.647512271542988}\n", "DP: {'Class3': 0.16596653499824943, 'Class4': -0.3319330699964992, 'Class2': 0.16596653499824943, 'Class1': -0.33193306999649924}\n", "Kappa: -0.07361963190184047\n", "RCI: 0.11603030564493627\n", "SOA1: Poor\n" ] } ], "source": [ "case3 = ConfusionMatrix(\n", " matrix={\n", " \"Class1\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class2\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class3\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class4\": {\"Class1\": 10, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1}})\n", "case3.print_normalized_matrix()\n", "print('ACC:', case3.ACC)\n", "print('MCC:', case3.MCC)\n", "print('CEN:', case3.CEN)\n", "print('MCEN:', case3.MCEN)\n", "print('DP:', case3.DP)\n", "print('Kappa:', case3.Kappa)\n", "print('RCI:', case3.RCI)\n", "print('SOA1:', case3.SOA1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Case4 (A class is very unbalaned and get bad result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$Case_4=\\begin{bmatrix}1 & 1 &1&1 \\\\1 & 1&1&1\\\\1&1&1&1\\\\10000&1&1&1 \\end{bmatrix}$$" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict Class1 Class2 Class3 Class4 \n", "Actual\n", "Class1 0.25 0.25 0.25 0.25 \n", "\n", "Class2 0.25 0.25 0.25 0.25 \n", "\n", "Class3 0.25 0.25 0.25 0.25 \n", "\n", "Class4 0.76923 0.07692 0.07692 0.07692 \n", "\n", "\n", "ACC: {'Class3': 0.999400898652022, 'Class4': 0.000998502246630055, 'Class2': 0.999400898652022, 'Class1': 0.000998502246630055}\n", "MCC: {'Class3': 0.24970032963739885, 'Class4': -0.43266656861311537, 'Class2': 0.24970032963739885, 'Class1': -0.43266656861311537}\n", "CEN: {'Class3': 0.8704188162777186, 'Class4': 0.0029588592520426657, 'Class2': 0.8704188162777186, 'Class1': 0.0029588592520426657}\n", "MCEN: {'Class3': 0.9308855421443073, 'Class4': 0.002903385725603509, 'Class2': 0.9308855421443073, 'Class1': 0.002903385725603509}\n", "DP: {'Class3': 1.6794055876913858, 'Class4': -1.9423127303715728, 'Class2': 1.6794055876913858, 'Class1': -1.9423127303715728}\n", "Kappa: -0.0003990813465900262\n", "RCI: 0.5536610475678804\n", "SOA1: Poor\n" ] } ], "source": [ "case4 = ConfusionMatrix(\n", " matrix={\n", " \"Class1\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class2\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class3\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class4\": {\"Class1\": 10000, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1}})\n", "case3.print_normalized_matrix()\n", "print('ACC:', case4.ACC)\n", "print('MCC:', case4.MCC)\n", "print('CEN:', case4.CEN)\n", "print('MCEN:', case4.MCEN)\n", "print('DP:', case4.DP)\n", "print('Kappa:', case4.Kappa)\n", "print('RCI:', case4.RCI)\n", "print('SOA1:', case4.SOA1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Case5 (A class is very unbalaned and get bad result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$Case_5=\\begin{bmatrix}1 & 1 &1&1 \\\\1 & 1&1&1\\\\1&1&1&1\\\\10&10&10&10 \\end{bmatrix}$$" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict Class1 Class2 Class3 Class4 \n", "Actual\n", "Class1 0.25 0.25 0.25 0.25 \n", "\n", "Class2 0.25 0.25 0.25 0.25 \n", "\n", "Class3 0.25 0.25 0.25 0.25 \n", "\n", "Class4 0.25 0.25 0.25 0.25 \n", "\n", "\n", "ACC: {'Class3': 0.7115384615384616, 'Class4': 0.36538461538461536, 'Class2': 0.7115384615384616, 'Class1': 0.7115384615384616}\n", "MCC: {'Class3': 0.0, 'Class4': 0.0, 'Class2': 0.0, 'Class1': 0.0}\n", "CEN: {'Class3': 0.6392779429225794, 'Class4': 0.6522742127953861, 'Class2': 0.6392779429225794, 'Class1': 0.6392779429225794}\n", "MCEN: {'Class3': 0.647512271542988, 'Class4': 0.7144082229288313, 'Class2': 0.647512271542988, 'Class1': 0.647512271542988}\n", "DP: {'Class3': 0.0, 'Class4': 0.0, 'Class2': 0.0, 'Class1': 0.0}\n", "Kappa: 0.0\n", "RCI: 0.0\n", "SOA1: Slight\n" ] } ], "source": [ "case5 = ConfusionMatrix(\n", " matrix={\n", " \"Class1\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class2\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class3\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class4\": {\"Class1\": 10, \"Class2\": 10, \"Class3\": 10, \"Class4\": 10}})\n", "case5.print_normalized_matrix()\n", "print('ACC:', case5.ACC)\n", "print('MCC:', case5.MCC)\n", "print('CEN:', case5.CEN)\n", "print('MCEN:', case5.MCEN)\n", "print('DP:', case5.DP)\n", "print('Kappa:', case5.Kappa)\n", "print('RCI:', case5.RCI)\n", "print('SOA1:', case5.SOA1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Case6 (A class is very unbalaned and get bad result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$Case_6=\\begin{bmatrix}1 & 1 &1&1 \\\\1 & 1&1&1\\\\1&1&1&1\\\\10000&10000&10000&10000 \\end{bmatrix}$$" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict Class1 Class2 Class3 Class4 \n", "Actual\n", "Class1 0.25 0.25 0.25 0.25 \n", "\n", "Class2 0.25 0.25 0.25 0.25 \n", "\n", "Class3 0.25 0.25 0.25 0.25 \n", "\n", "Class4 0.25 0.25 0.25 0.25 \n", "\n", "\n", "ACC: {'Class3': 0.7499500149955014, 'Class4': 0.25014995501349596, 'Class2': 0.7499500149955014, 'Class1': 0.7499500149955014}\n", "MCC: {'Class3': 0.0, 'Class4': 0.0, 'Class2': 0.0, 'Class1': 0.0}\n", "CEN: {'Class3': 0.0029588592520426657, 'Class4': 0.539296694603886, 'Class2': 0.0029588592520426657, 'Class1': 0.0029588592520426657}\n", "MCEN: {'Class3': 0.002903385725603509, 'Class4': 0.580710610324597, 'Class2': 0.002903385725603509, 'Class1': 0.002903385725603509}\n", "DP: {'Class3': 0.0, 'Class4': 0.0, 'Class2': 0.0, 'Class1': 0.0}\n", "Kappa: 0.0\n", "RCI: 0.0\n", "SOA1: Slight\n" ] } ], "source": [ "case6 = ConfusionMatrix(\n", " matrix={\n", " \"Class1\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class2\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class3\": {\"Class1\": 1, \"Class2\": 1, \"Class3\": 1, \"Class4\": 1},\n", " \"Class4\": {\"Class1\": 10000, \"Class2\": 10000, \"Class3\": 10000, \"Class4\": 10000}})\n", "case6.print_normalized_matrix()\n", "print('ACC:', case6.ACC)\n", "print('MCC:', case6.MCC)\n", "print('CEN:', case6.CEN)\n", "print('MCEN:', case6.MCEN)\n", "print('DP:', case6.DP)\n", "print('Kappa:', case6.Kappa)\n", "print('RCI:', case6.RCI)\n", "print('SOA1:', case6.SOA1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }