{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<p  style=\"z-index: 101;background: #fde073;text-align: center;line-height: 2.5;overflow: hidden;font-size:22px;\">Please <a href=\"https://www.pycm.io/doc/#Cite\" target=\"_blank\">cite us</a> if you use the software</p>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Example-5 (Sample weights)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Environment check"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Checking that the notebook is running on Google Colab or not."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "try:\n",
    "  import google.colab\n",
    "  !{sys.executable} -m pip -q -q install pycm\n",
    "except:\n",
    "  pass"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Without weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pycm import ConfusionMatrix\n",
    "y_test = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]\n",
    "y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pycm.ConfusionMatrix(classes: [0, 1, 2])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm1=ConfusionMatrix(y_test, y_pred)\n",
    "cm1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predict 0       1       2       \n",
      "Actual\n",
      "0       3       0       0       \n",
      "\n",
      "1       0       1       2       \n",
      "\n",
      "2       2       1       3       \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Overall Statistics : \n",
      "\n",
      "95% CI                                                            (0.30439,0.86228)\n",
      "ACC Macro                                                         0.72222\n",
      "ARI                                                               0.09206\n",
      "AUNP                                                              0.66667\n",
      "AUNU                                                              0.69444\n",
      "Bangdiwala B                                                      0.37255\n",
      "Bennett S                                                         0.375\n",
      "CBA                                                               0.47778\n",
      "CSI                                                               0.17778\n",
      "Chi-Squared                                                       6.6\n",
      "Chi-Squared DF                                                    4\n",
      "Conditional Entropy                                               0.95915\n",
      "Cramer V                                                          0.5244\n",
      "Cross Entropy                                                     1.59352\n",
      "F1 Macro                                                          0.56515\n",
      "F1 Micro                                                          0.58333\n",
      "FNR Macro                                                         0.38889\n",
      "FNR Micro                                                         0.41667\n",
      "FPR Macro                                                         0.22222\n",
      "FPR Micro                                                         0.20833\n",
      "Gwet AC1                                                          0.38931\n",
      "Hamming Loss                                                      0.41667\n",
      "Joint Entropy                                                     2.45915\n",
      "KL Divergence                                                     0.09352\n",
      "Kappa                                                             0.35484\n",
      "Kappa 95% CI                                                      (-0.07708,0.78675)\n",
      "Kappa No Prevalence                                               0.16667\n",
      "Kappa Standard Error                                              0.22036\n",
      "Kappa Unbiased                                                    0.34426\n",
      "Krippendorff Alpha                                                0.37158\n",
      "Lambda A                                                          0.16667\n",
      "Lambda B                                                          0.42857\n",
      "Mutual Information                                                0.52421\n",
      "NIR                                                               0.5\n",
      "NPV Macro                                                         0.79048\n",
      "NPV Micro                                                         0.79167\n",
      "Overall ACC                                                       0.58333\n",
      "Overall CEN                                                       0.46381\n",
      "Overall J                                                         (1.225,0.40833)\n",
      "Overall MCC                                                       0.36667\n",
      "Overall MCEN                                                      0.51894\n",
      "Overall RACC                                                      0.35417\n",
      "Overall RACCU                                                     0.36458\n",
      "P-Value                                                           0.38721\n",
      "PPV Macro                                                         0.56667\n",
      "PPV Micro                                                         0.58333\n",
      "Pearson C                                                         0.59568\n",
      "Phi-Squared                                                       0.55\n",
      "RCI                                                               0.34947\n",
      "RR                                                                4.0\n",
      "Reference Entropy                                                 1.5\n",
      "Response Entropy                                                  1.48336\n",
      "SOA1(Landis & Koch)                                               Fair\n",
      "SOA2(Fleiss)                                                      Poor\n",
      "SOA3(Altman)                                                      Fair\n",
      "SOA4(Cicchetti)                                                   Poor\n",
      "SOA5(Cramer)                                                      Relatively Strong\n",
      "SOA6(Matthews)                                                    Weak\n",
      "SOA7(Lambda A)                                                    Very Weak\n",
      "SOA8(Lambda B)                                                    Moderate\n",
      "SOA9(Krippendorff Alpha)                                          Low\n",
      "SOA10(Pearson C)                                                  Strong\n",
      "Scott PI                                                          0.34426\n",
      "Standard Error                                                    0.14232\n",
      "TNR Macro                                                         0.77778\n",
      "TNR Micro                                                         0.79167\n",
      "TPR Macro                                                         0.61111\n",
      "TPR Micro                                                         0.58333\n",
      "Zero-one Loss                                                     5\n",
      "\n",
      "Class Statistics :\n",
      "\n",
      "Classes                                                           0             1             2             \n",
      "ACC(Accuracy)                                                     0.83333       0.75          0.58333       \n",
      "AGF(Adjusted F-score)                                             0.9136        0.53995       0.5516        \n",
      "AGM(Adjusted geometric mean)                                      0.83729       0.692         0.60712       \n",
      "AM(Difference between automatic and manual classification)        2             -1            -1            \n",
      "AUC(Area under the ROC curve)                                     0.88889       0.61111       0.58333       \n",
      "AUCI(AUC value interpretation)                                    Very Good     Fair          Poor          \n",
      "AUPR(Area under the PR curve)                                     0.8           0.41667       0.55          \n",
      "BB(Braun-Blanquet similarity)                                     0.6           0.33333       0.5           \n",
      "BCD(Bray-Curtis dissimilarity)                                    0.08333       0.04167       0.04167       \n",
      "BM(Informedness or bookmaker informedness)                        0.77778       0.22222       0.16667       \n",
      "CEN(Confusion entropy)                                            0.25          0.49658       0.60442       \n",
      "DOR(Diagnostic odds ratio)                                        None          4.0           2.0           \n",
      "DP(Discriminant power)                                            None          0.33193       0.16597       \n",
      "DPI(Discriminant power interpretation)                            None          Poor          Poor          \n",
      "ERR(Error rate)                                                   0.16667       0.25          0.41667       \n",
      "F0.5(F0.5 score)                                                  0.65217       0.45455       0.57692       \n",
      "F1(F1 score - harmonic mean of precision and sensitivity)         0.75          0.4           0.54545       \n",
      "F2(F2 score)                                                      0.88235       0.35714       0.51724       \n",
      "FDR(False discovery rate)                                         0.4           0.5           0.4           \n",
      "FN(False negative/miss/type 2 error)                              0             2             3             \n",
      "FNR(Miss rate or false negative rate)                             0.0           0.66667       0.5           \n",
      "FOR(False omission rate)                                          0.0           0.2           0.42857       \n",
      "FP(False positive/type 1 error/false alarm)                       2             1             2             \n",
      "FPR(Fall-out or false positive rate)                              0.22222       0.11111       0.33333       \n",
      "G(G-measure geometric mean of precision and sensitivity)          0.7746        0.40825       0.54772       \n",
      "GI(Gini index)                                                    0.77778       0.22222       0.16667       \n",
      "GM(G-mean geometric mean of specificity and sensitivity)          0.88192       0.54433       0.57735       \n",
      "HD(Hamming distance)                                              2             3             5             \n",
      "IBA(Index of balanced accuracy)                                   0.95062       0.13169       0.27778       \n",
      "ICSI(Individual classification success index)                     0.6           -0.16667      0.1           \n",
      "IS(Information score)                                             1.26303       1.0           0.26303       \n",
      "J(Jaccard index)                                                  0.6           0.25          0.375         \n",
      "LS(Lift score)                                                    2.4           2.0           1.2           \n",
      "MCC(Matthews correlation coefficient)                             0.68313       0.2582        0.16903       \n",
      "MCCI(Matthews correlation coefficient interpretation)             Moderate      Negligible    Negligible    \n",
      "MCEN(Modified confusion entropy)                                  0.26439       0.5           0.6875        \n",
      "MK(Markedness)                                                    0.6           0.3           0.17143       \n",
      "N(Condition negative)                                             9             9             6             \n",
      "NLR(Negative likelihood ratio)                                    0.0           0.75          0.75          \n",
      "NLRI(Negative likelihood ratio interpretation)                    Good          Negligible    Negligible    \n",
      "NPV(Negative predictive value)                                    1.0           0.8           0.57143       \n",
      "OC(Overlap coefficient)                                           1.0           0.5           0.6           \n",
      "OOC(Otsuka-Ochiai coefficient)                                    0.7746        0.40825       0.54772       \n",
      "OP(Optimized precision)                                           0.70833       0.29545       0.44048       \n",
      "P(Condition positive or support)                                  3             3             6             \n",
      "PLR(Positive likelihood ratio)                                    4.5           3.0           1.5           \n",
      "PLRI(Positive likelihood ratio interpretation)                    Poor          Poor          Poor          \n",
      "POP(Population)                                                   12            12            12            \n",
      "PPV(Precision or positive predictive value)                       0.6           0.5           0.6           \n",
      "PRE(Prevalence)                                                   0.25          0.25          0.5           \n",
      "Q(Yule Q - coefficient of colligation)                            None          0.6           0.33333       \n",
      "QI(Yule Q interpretation)                                         None          Moderate      Weak          \n",
      "RACC(Random accuracy)                                             0.10417       0.04167       0.20833       \n",
      "RACCU(Random accuracy unbiased)                                   0.11111       0.0434        0.21007       \n",
      "TN(True negative/correct rejection)                               7             8             4             \n",
      "TNR(Specificity or true negative rate)                            0.77778       0.88889       0.66667       \n",
      "TON(Test outcome negative)                                        7             10            7             \n",
      "TOP(Test outcome positive)                                        5             2             5             \n",
      "TP(True positive/hit)                                             3             1             3             \n",
      "TPR(Sensitivity, recall, hit rate, or true positive rate)         1.0           0.33333       0.5           \n",
      "Y(Youden index)                                                   0.77778       0.22222       0.16667       \n",
      "dInd(Distance index)                                              0.22222       0.67586       0.60093       \n",
      "sInd(Similarity index)                                            0.84287       0.52209       0.57508       \n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(cm1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## With random weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from random import randint,seed\n",
    "seed(100)\n",
    "weights = [randint(1, 10) for i in range(len(y_test))]\n",
    "weights[2]*=9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pycm.ConfusionMatrix(classes: [0, 1, 2])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm2=ConfusionMatrix(y_test, y_pred, sample_weight=weights)\n",
    "cm2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predict  0        1        2        \n",
      "Actual\n",
      "0        24       0        0        \n",
      "\n",
      "1        0        7        8        \n",
      "\n",
      "2        12       3        76       \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Overall Statistics : \n",
      "\n",
      "95% CI                                                            (0.75748,0.88868)\n",
      "ACC Macro                                                         0.88205\n",
      "ARI                                                               0.48768\n",
      "AUNP                                                              0.82779\n",
      "AUNU                                                              0.82623\n",
      "Bangdiwala B                                                      0.73932\n",
      "Bennett S                                                         0.73462\n",
      "CBA                                                               0.65617\n",
      "CSI                                                               0.52442\n",
      "Chi-Squared                                                       110.9678\n",
      "Chi-Squared DF                                                    4\n",
      "Conditional Entropy                                               0.65034\n",
      "Cramer V                                                          0.6533\n",
      "Cross Entropy                                                     1.21\n",
      "F1 Macro                                                          0.74286\n",
      "F1 Micro                                                          0.82308\n",
      "FNR Macro                                                         0.23272\n",
      "FNR Micro                                                         0.17692\n",
      "FPR Macro                                                         0.11481\n",
      "FPR Micro                                                         0.08846\n",
      "Gwet AC1                                                          0.76652\n",
      "Hamming Loss                                                      0.17692\n",
      "Joint Entropy                                                     1.82\n",
      "KL Divergence                                                     0.04034\n",
      "Kappa                                                             0.63722\n",
      "Kappa 95% CI                                                      (0.50272,0.77173)\n",
      "Kappa No Prevalence                                               0.64615\n",
      "Kappa Standard Error                                              0.06863\n",
      "Kappa Unbiased                                                    0.63481\n",
      "Krippendorff Alpha                                                0.63621\n",
      "Lambda A                                                          0.41026\n",
      "Lambda B                                                          0.52174\n",
      "Mutual Information                                                0.5544\n",
      "NIR                                                               0.7\n",
      "NPV Macro                                                         0.86908\n",
      "NPV Micro                                                         0.91154\n",
      "Overall ACC                                                       0.82308\n",
      "Overall CEN                                                       0.28807\n",
      "Overall J                                                         (1.82323,0.60774)\n",
      "Overall MCC                                                       0.64625\n",
      "Overall MCEN                                                      0.38181\n",
      "Overall RACC                                                      0.51231\n",
      "Overall RACCU                                                     0.51553\n",
      "P-Value                                                           0.00097\n",
      "PPV Macro                                                         0.75714\n",
      "PPV Micro                                                         0.82308\n",
      "Pearson C                                                         0.67861\n",
      "Phi-Squared                                                       0.8536\n",
      "RCI                                                               0.47399\n",
      "RR                                                                43.33333\n",
      "Reference Entropy                                                 1.16966\n",
      "Response Entropy                                                  1.20474\n",
      "SOA1(Landis & Koch)                                               Substantial\n",
      "SOA2(Fleiss)                                                      Intermediate to Good\n",
      "SOA3(Altman)                                                      Good\n",
      "SOA4(Cicchetti)                                                   Good\n",
      "SOA5(Cramer)                                                      Strong\n",
      "SOA6(Matthews)                                                    Moderate\n",
      "SOA7(Lambda A)                                                    Moderate\n",
      "SOA8(Lambda B)                                                    Moderate\n",
      "SOA9(Krippendorff Alpha)                                          Low\n",
      "SOA10(Pearson C)                                                  Strong\n",
      "Scott PI                                                          0.63481\n",
      "Standard Error                                                    0.03347\n",
      "TNR Macro                                                         0.88519\n",
      "TNR Micro                                                         0.91154\n",
      "TPR Macro                                                         0.76728\n",
      "TPR Micro                                                         0.82308\n",
      "Zero-one Loss                                                     23\n",
      "\n",
      "Class Statistics :\n",
      "\n",
      "Classes                                                           0             1             2             \n",
      "ACC(Accuracy)                                                     0.90769       0.91538       0.82308       \n",
      "AGF(Adjusted F-score)                                             0.94152       0.68599       0.76783       \n",
      "AGM(Adjusted geometric mean)                                      0.91704       0.81486       0.81018       \n",
      "AM(Difference between automatic and manual classification)        12            -5            -7            \n",
      "AUC(Area under the ROC curve)                                     0.9434        0.72029       0.81502       \n",
      "AUCI(AUC value interpretation)                                    Excellent     Good          Very Good     \n",
      "AUPR(Area under the PR curve)                                     0.83333       0.58333       0.86996       \n",
      "BB(Braun-Blanquet similarity)                                     0.66667       0.46667       0.83516       \n",
      "BCD(Bray-Curtis dissimilarity)                                    0.04615       0.01923       0.02692       \n",
      "BM(Informedness or bookmaker informedness)                        0.88679       0.44058       0.63004       \n",
      "CEN(Confusion entropy)                                            0.23219       0.44655       0.28458       \n",
      "DOR(Diagnostic odds ratio)                                        None          32.66667      19.63333      \n",
      "DP(Discriminant power)                                            None          0.83477       0.71286       \n",
      "DPI(Discriminant power interpretation)                            None          Poor          Poor          \n",
      "ERR(Error rate)                                                   0.09231       0.08462       0.17692       \n",
      "F0.5(F0.5 score)                                                  0.71429       0.63636       0.88993       \n",
      "F1(F1 score - harmonic mean of precision and sensitivity)         0.8           0.56          0.86857       \n",
      "F2(F2 score)                                                      0.90909       0.5           0.84821       \n",
      "FDR(False discovery rate)                                         0.33333       0.3           0.09524       \n",
      "FN(False negative/miss/type 2 error)                              0             8             15            \n",
      "FNR(Miss rate or false negative rate)                             0.0           0.53333       0.16484       \n",
      "FOR(False omission rate)                                          0.0           0.06667       0.32609       \n",
      "FP(False positive/type 1 error/false alarm)                       12            3             8             \n",
      "FPR(Fall-out or false positive rate)                              0.11321       0.02609       0.20513       \n",
      "G(G-measure geometric mean of precision and sensitivity)          0.8165        0.57155       0.86927       \n",
      "GI(Gini index)                                                    0.88679       0.44058       0.63004       \n",
      "GM(G-mean geometric mean of specificity and sensitivity)          0.9417        0.67416       0.81477       \n",
      "HD(Hamming distance)                                              12            11            23            \n",
      "IBA(Index of balanced accuracy)                                   0.98718       0.22395       0.6906        \n",
      "ICSI(Individual classification success index)                     0.66667       0.16667       0.73993       \n",
      "IS(Information score)                                             1.85244       2.6009        0.37018       \n",
      "J(Jaccard index)                                                  0.66667       0.38889       0.76768       \n",
      "LS(Lift score)                                                    3.61111       6.06667       1.29252       \n",
      "MCC(Matthews correlation coefficient)                             0.76889       0.52824       0.60381       \n",
      "MCCI(Matthews correlation coefficient interpretation)             Strong        Moderate      Moderate      \n",
      "MCEN(Modified confusion entropy)                                  0.26416       0.4754        0.40758       \n",
      "MK(Markedness)                                                    0.66667       0.63333       0.57867       \n",
      "N(Condition negative)                                             106           115           39            \n",
      "NLR(Negative likelihood ratio)                                    0.0           0.54762       0.20737       \n",
      "NLRI(Negative likelihood ratio interpretation)                    Good          Negligible    Poor          \n",
      "NPV(Negative predictive value)                                    1.0           0.93333       0.67391       \n",
      "OC(Overlap coefficient)                                           1.0           0.7           0.90476       \n",
      "OOC(Otsuka-Ochiai coefficient)                                    0.8165        0.57155       0.86927       \n",
      "OP(Optimized precision)                                           0.84769       0.56327       0.79836       \n",
      "P(Condition positive or support)                                  24            15            91            \n",
      "PLR(Positive likelihood ratio)                                    8.83333       17.88889      4.07143       \n",
      "PLRI(Positive likelihood ratio interpretation)                    Fair          Good          Poor          \n",
      "POP(Population)                                                   130           130           130           \n",
      "PPV(Precision or positive predictive value)                       0.66667       0.7           0.90476       \n",
      "PRE(Prevalence)                                                   0.18462       0.11538       0.7           \n",
      "Q(Yule Q - coefficient of colligation)                            None          0.94059       0.90307       \n",
      "QI(Yule Q interpretation)                                         None          Strong        Strong        \n",
      "RACC(Random accuracy)                                             0.05112       0.00888       0.45231       \n",
      "RACCU(Random accuracy unbiased)                                   0.05325       0.00925       0.45303       \n",
      "TN(True negative/correct rejection)                               94            112           31            \n",
      "TNR(Specificity or true negative rate)                            0.88679       0.97391       0.79487       \n",
      "TON(Test outcome negative)                                        94            120           46            \n",
      "TOP(Test outcome positive)                                        36            10            84            \n",
      "TP(True positive/hit)                                             24            7             76            \n",
      "TPR(Sensitivity, recall, hit rate, or true positive rate)         1.0           0.46667       0.83516       \n",
      "Y(Youden index)                                                   0.88679       0.44058       0.63004       \n",
      "dInd(Distance index)                                              0.11321       0.53397       0.26315       \n",
      "sInd(Similarity index)                                            0.91995       0.62243       0.81392       \n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(cm2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<script type=\"text/javascript\">\n",
    "var sc_project=12845182; \n",
    "var sc_invisible=1; \n",
    "var sc_security=\"001dde8f\"; \n",
    "</script>\n",
    "<script type=\"text/javascript\"\n",
    "src=\"https://www.statcounter.com/counter/counter.js\" async></script>\n",
    "<noscript><div class=\"statcounter\"><a title=\"Web Analytics\"\n",
    "href=\"https://statcounter.com/\" target=\"_blank\"><img class=\"statcounter\"\n",
    "src=\"https://c.statcounter.com/12845182/0/001dde8f/1/\" alt=\"Web Analytics\"\n",
    "referrerPolicy=\"no-referrer-when-downgrade\"></a></div></noscript>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}