{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<p  style=\"z-index: 101;background: #fde073;text-align: center;line-height: 2.5;overflow: hidden;font-size:22px;\">Please <a href=\"https://www.pycm.io/doc/#Cite\" target=\"_blank\">cite us</a> if you use the software</p>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Example-1 (Comparison of three different classifiers)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A comparison of a 3 classifiers in `scikit-learn` on iris dataset.\n",
    "The iris dataset is a classic and very easy multi-class classification dataset."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Environment check"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Checking that the notebook is running on Google Colab or not."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "try:\n",
    "  import google.colab\n",
    "  !{sys.executable} -m pip -q -q install pycm\n",
    "except:\n",
    "  pass"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Install scikit-learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Keyring is skipped due to an exception: invalid syntax (core.py, line 48)\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "!{sys.executable} -m pip -q -q install scikit-learn\n",
    "if \"Example1_files\" not in os.listdir():\n",
    "    os.mkdir(\"Example1_files\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "from sklearn.model_selection import train_test_split\n",
    "from pycm import ConfusionMatrix\n",
    "iris = datasets.load_iris()\n",
    "X = iris.data\n",
    "y = iris.target\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Classifier 1 (C-Support vector)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import svm\n",
    "classifier_1 = svm.SVC(kernel='linear', C=0.01)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred_1 = classifier_1.fit(X_train, y_train).predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predict  0        1        2        \n",
      "Actual\n",
      "0        13       0        0        \n",
      "\n",
      "1        0        10       6        \n",
      "\n",
      "2        0        0        9        \n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "cm1=ConfusionMatrix(y_test, y_pred_1)\n",
    "cm1.print_matrix()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predict     0           1           2           \n",
      "Actual\n",
      "0           1.0         0.0         0.0         \n",
      "\n",
      "1           0.0         0.625       0.375       \n",
      "\n",
      "2           0.0         0.0         1.0         \n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "cm1.print_normalized_matrix()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7673469387755101"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm1.Kappa "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8421052631578947"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm1.Overall_ACC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Substantial'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm1.SOA1  # Landis and Koch benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Excellent'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm1.SOA2  # Fleiss’ benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Good'"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm1.SOA3  # Altman’s benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Excellent'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm1.SOA4  # Cicchetti’s benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Message': 'D:\\\\For Asus Laptop\\\\projects\\\\pycm\\\\Document\\\\Example1_files\\\\cm1.html',\n",
       " 'Status': True}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm1.save_html(os.path.join(\"Example1_files\", \"cm1\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a href=\"Example1_files\\cm1.html\">Open File</a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Classifier 2 (Decision tree)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "classifier_2 = DecisionTreeClassifier(max_depth=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred_2 = classifier_2.fit(X_train, y_train).predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predict  0        1        2        \n",
      "Actual\n",
      "0        13       0        0        \n",
      "\n",
      "1        0        15       1        \n",
      "\n",
      "2        0        0        9        \n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "cm2=ConfusionMatrix(y_test, y_pred_2)\n",
    "cm2.print_matrix()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predict      0            1            2            \n",
      "Actual\n",
      "0            1.0          0.0          0.0          \n",
      "\n",
      "1            0.0          0.9375       0.0625       \n",
      "\n",
      "2            0.0          0.0          1.0          \n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "cm2.print_normalized_matrix()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.95978835978836"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm2.Kappa "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9736842105263158"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm2.Overall_ACC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Almost Perfect'"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm2.SOA1  # Landis and Koch benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Excellent'"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm2.SOA2  # Fleiss’ benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Very Good'"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm2.SOA3  # Altman’s benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Excellent'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm2.SOA4  # Cicchetti’s benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Message': 'D:\\\\For Asus Laptop\\\\projects\\\\pycm\\\\Document\\\\Example1_files\\\\cm2.html',\n",
       " 'Status': True}"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm2.save_html(os.path.join(\"Example1_files\",\"cm2\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a href=\"Example1_files\\cm2.html\">Open File</a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Classifier 3 (AdaBoost)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import AdaBoostClassifier\n",
    "classifier_3 = AdaBoostClassifier()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred_3 = classifier_3.fit(X_train, y_train).predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predict  0        1        2        \n",
      "Actual\n",
      "0        13       0        0        \n",
      "\n",
      "1        0        15       1        \n",
      "\n",
      "2        0        3        6        \n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "cm3=ConfusionMatrix(y_test, y_pred_3)\n",
    "cm3.print_matrix()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predict       0             1             2             \n",
      "Actual\n",
      "0             1.0           0.0           0.0           \n",
      "\n",
      "1             0.0           0.9375        0.0625        \n",
      "\n",
      "2             0.0           0.33333       0.66667       \n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "cm3.print_normalized_matrix()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8354978354978355"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm3.Kappa "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8947368421052632"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm3.Overall_ACC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Almost Perfect'"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm3.SOA1  # Landis and Koch benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Excellent'"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm3.SOA2  # Fleiss’ benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Very Good'"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm3.SOA3  # Altman’s benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Excellent'"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm3.SOA4  # Cicchetti’s benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Message': 'D:\\\\For Asus Laptop\\\\projects\\\\pycm\\\\Document\\\\Example1_files\\\\cm3.html',\n",
       " 'Status': True}"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm3.save_html(os.path.join(\"Example1_files\", \"cm3\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a href=\"Example1_files\\cm3.html\">Open File</a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## How to compare classifiers?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best : Decision tree\n",
      "\n",
      "Rank   Name                Class-Score       Overall-Score\n",
      "1      Decision tree       0.55556           0.95238\n",
      "2      AdaBoost            0.48333           0.92381\n",
      "3      C-Support vector    0.44444           0.80476\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from pycm import Compare\n",
    "\n",
    "cp = Compare({\"C-Support vector\": cm1, \"Decision tree\": cm2, \"AdaBoost\": cm3})\n",
    "print(cp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Message': 'D:\\\\For Asus Laptop\\\\projects\\\\pycm\\\\Document\\\\Example1_files\\\\cp.comp',\n",
       " 'Status': True}"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cp.save_report(os.path.join(\"Example1_files\", \"cp\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a href=\"Example1_files\\cp.comp\">Open File</a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<script type=\"text/javascript\">\n",
    "var sc_project=12845182; \n",
    "var sc_invisible=1; \n",
    "var sc_security=\"001dde8f\"; \n",
    "</script>\n",
    "<script type=\"text/javascript\"\n",
    "src=\"https://www.statcounter.com/counter/counter.js\" async></script>\n",
    "<noscript><div class=\"statcounter\"><a title=\"Web Analytics\"\n",
    "href=\"https://statcounter.com/\" target=\"_blank\"><img class=\"statcounter\"\n",
    "src=\"https://c.statcounter.com/12845182/0/001dde8f/1/\" alt=\"Web Analytics\"\n",
    "referrerPolicy=\"no-referrer-when-downgrade\"></a></div></noscript>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}