{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# PYCM" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Version : 0.1\n", "-----" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from pycm import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "y_actu = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]\n", "y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "cm = ConfusionMatrix(y_actu, y_pred)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pycm.ConfusionMatrix([0, 1, 2])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.actual_vector" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.predict_vector" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 3, 1: 1, 2: 3}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.TP" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 7, 1: 8, 2: 4}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.TN" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 2, 1: 1, 2: 2}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.FP" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0, 1: 2, 2: 3}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.FN" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## TPR (sensitivity, recall, hit rate, or true positive rate)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$TPR=\\frac{TP}{P}=\\frac{TP}{TP+FN}$$" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 1.0, 1: 0.33333, 2: 0.5}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.TPR" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## TNR (specificity or true negative rate)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$TNR=\\frac{TN}{N}=\\frac{TN}{TN+FP}$$" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.77778, 1: 0.88889, 2: 0.66667}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.TNR" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## PPV (precision or positive predictive value)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$PPV=\\frac{TP}{TP+FP}$$" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.6, 1: 0.5, 2: 0.6}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.PPV" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## NPV (negative predictive value)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$NPV=\\frac{TN}{TN+FN}$$" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 1.0, 1: 0.8, 2: 0.57143}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.NPV" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## FNR (miss rate or false negative rate)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$FNR=\\frac{FN}{P}=\\frac{FN}{FN+TP}=1-TPR$$" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.0, 1: 0.66667, 2: 0.5}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.FNR" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## FPR (fall-out or false positive rate)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$FPR=\\frac{FP}{N}=\\frac{FP}{FP+TN}=1-TNR$$" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.22222, 1: 0.11111, 2: 0.33333}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.FPR" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## FDR (false discovery rate)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$FDR=\\frac{FP}{FP+TP}=1-PPV$$" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.6, 1: 0.5, 2: 0.6}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.PPV" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## FOR (false omission rate)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$FOR=\\frac{FN}{FN+TN}=1-NPV$$" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.0, 1: 0.2, 2: 0.42857}" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.FOR" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## ACC (accuracy)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$ACC=\\frac{TP+TN}{P+N}=\\frac{TP+TN}{TP+TN+FP+FN}$$" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.83333, 1: 0.75, 2: 0.58333}" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.ACC" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## F1-Score (harmonic mean of precision and sensitivity)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$F1=2\\frac{PPV.TPR}{PPV+TPR}=\\frac{2TP}{2TP+FP+FN}$$" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.75, 1: 0.4, 2: 0.54545}" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.F1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## MCC (Matthews correlation coefficient)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$MCC=\\frac{TP \\times TN-FP \\times FN}{\\sqrt{(TP+FP)(TP+FN)(TN+FP)(TN+FN)}}$$" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.68313, 1: 0.2582, 2: 0.16903}" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.MCC" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## BM (nformedness or Bookmaker Informedness)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$BM=TPR+TNR-1$$" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.77778, 1: 0.22222, 2: 0.16667}" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.BM" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## MK (Markedness)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$MK=PPV+NPV-1$$" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.6, 1: 0.3, 2: 0.17143}" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.MK" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## LR+ (Positive likelihood ratio)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$(LR+)=\\frac{TPR}{FPR}$$" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 4.50005, 1: 3.0, 2: 1.50002}" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.PLR" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## LR- (Negative likelihood ratio)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$(LR-)=\\frac{FNR}{TNR}$$" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.0, 1: 0.75, 2: 0.75}" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.NLR" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## DOR (Diagnostic odds ratio)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$DOR=\\frac{LR+}{LR-}$$" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 'None', 1: 4.0, 2: 2.00003}" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm.DOR" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Full Run" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict 0 1 2 \n", "Actual\n", "0 3 0 0 \n", "1 0 1 2 \n", "2 2 1 3 \n", "\n", "\n", "\n", "\n", "Classes 0 1 2 \n", "ACC(accuracy) 0.83333 0.75 0.58333 \n", "BM(Informedness or Bookmaker Informedness) 0.77778 0.22222 0.16667 \n", "DOR(Diagnostic odds ratio) None 4.0 2.00003 \n", "F1(F1 Score - harmonic mean of precision and sensitivity) 0.75 0.4 0.54545 \n", "FDR(false discovery rate) 0.4 0.5 0.4 \n", "FN(false negative/miss/Type II error) 0 2 3 \n", "FNR(miss rate or false negative rate) 0.0 0.66667 0.5 \n", "FOR(false omission rate) 0.0 0.2 0.42857 \n", "FP(false positive/Type I error/false alarm) 2 1 2 \n", "FPR(fall-out or false positive rate) 0.22222 0.11111 0.33333 \n", "LR+(Positive likelihood ratio) 4.50005 3.0 1.50002 \n", "LR-(Negative likelihood ratio) 0.0 0.75 0.75 \n", "MCC(Matthews correlation coefficient) 0.68313 0.2582 0.16903 \n", "MK(Markedness) 0.6 0.3 0.17143 \n", "NPV(negative predictive value) 1.0 0.8 0.57143 \n", "PPV(precision or positive predictive value) 0.6 0.5 0.6 \n", "TN(true negative/correct rejection) 7 8 4 \n", "TNR(specificity or true negative rate) 0.77778 0.88889 0.66667 \n", "TP(true positive/hit) 3 1 3 \n", "TPR(sensitivity, recall, hit rate, or true positive rate) 1.0 0.33333 0.5 \n", "\n" ] } ], "source": [ "print(cm)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Input Errors" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "ename": "pycmError", "evalue": "Input Vectors Must Be List", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mpycmError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mcm2\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mConfusionMatrix\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_actu\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python35-32\\lib\\site-packages\\pycm\\pycm.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, actual_vector, predict_vector)\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mactual_vector\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpredict_vector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactual_vector\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mor\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpredict_vector\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 10\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mpycmError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Input Vectors Must Be List\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 11\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactual_vector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m!=\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpredict_vector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mpycmError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Input Vectors Must Be The Same Length\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mpycmError\u001b[0m: Input Vectors Must Be List" ] } ], "source": [ "cm2=ConfusionMatrix(y_actu, 2)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "ename": "pycmError", "evalue": "Input Vectors Must Be The Same Length", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mpycmError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mcm3\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mConfusionMatrix\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_actu\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python35-32\\lib\\site-packages\\pycm\\pycm.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, actual_vector, predict_vector)\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mpycmError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Input Vectors Must Be List\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactual_vector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m!=\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpredict_vector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mpycmError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Input Vectors Must Be The Same Length\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 13\u001b[0m \u001b[0mmatrix_param\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmatrix_params_calc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactual_vector\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpredict_vector\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactual_vector\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactual_vector\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mpycmError\u001b[0m: Input Vectors Must Be The Same Length" ] } ], "source": [ "cm3=ConfusionMatrix(y_actu, [1,2,3])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }