{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "

Please cite us if you use the software

" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Example-1 (Comparison of three different classifiers)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A comparison of a 3 classifiers in `scikit-learn` on iris dataset.\n", "The iris dataset is a classic and very easy multi-class classification dataset." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Environment check" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Checking that the notebook is running on Google Colab or not." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys\n", "try:\n", " import google.colab\n", " !{sys.executable} -m pip -q -q install pycm\n", "except:\n", " pass" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Install scikit-learn" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os\n", "!{sys.executable} -m pip -q -q install scikit-learn\n", "if \"Example1_files\" not in os.listdir():\n", " os.mkdir(\"Example1_files\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from sklearn import datasets\n", "from sklearn.model_selection import train_test_split\n", "from pycm import ConfusionMatrix\n", "iris = datasets.load_iris()\n", "X = iris.data\n", "y = iris.target\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classifier 1 (C-Support vector)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from sklearn import svm\n", "classifier_1 = svm.SVC(kernel='linear', C=0.01)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "y_pred_1 = classifier_1.fit(X_train, y_train).predict(X_test)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict 0 1 2 \n", "Actual\n", "0 13 0 0 \n", "\n", "1 0 10 6 \n", "\n", "2 0 0 9 \n", "\n", "\n" ] } ], "source": [ "cm1=ConfusionMatrix(y_test, y_pred_1)\n", "cm1.print_matrix()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict 0 1 2 \n", "Actual\n", "0 1.0 0.0 0.0 \n", "\n", "1 0.0 0.625 0.375 \n", "\n", "2 0.0 0.0 1.0 \n", "\n", "\n" ] } ], "source": [ "cm1.print_normalized_matrix()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7673469387755101" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm1.Kappa " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8421052631578947" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm1.Overall_ACC" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Substantial'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm1.SOA1 # Landis and Koch benchmark" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Excellent'" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm1.SOA2 # Fleiss’ benchmark" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Good'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm1.SOA3 # Altman’s benchmark" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Excellent'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm1.SOA4 # Cicchetti’s benchmark" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Message': 'D:\\\\For Asus Laptop\\\\projects\\\\pycm\\\\Document\\\\Example1_files\\\\cm1.html',\n", " 'Status': True}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm1.save_html(os.path.join(\"Example1_files\", \"cm1\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Open File" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classifier 2 (Decision tree)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "from sklearn.tree import DecisionTreeClassifier\n", "classifier_2 = DecisionTreeClassifier(max_depth=5)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "y_pred_2 = classifier_2.fit(X_train, y_train).predict(X_test)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict 0 1 2 \n", "Actual\n", "0 13 0 0 \n", "\n", "1 0 15 1 \n", "\n", "2 0 0 9 \n", "\n", "\n" ] } ], "source": [ "cm2=ConfusionMatrix(y_test, y_pred_2)\n", "cm2.print_matrix()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict 0 1 2 \n", "Actual\n", "0 1.0 0.0 0.0 \n", "\n", "1 0.0 0.9375 0.0625 \n", "\n", "2 0.0 0.0 1.0 \n", "\n", "\n" ] } ], "source": [ "cm2.print_normalized_matrix()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.95978835978836" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm2.Kappa " ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9736842105263158" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm2.Overall_ACC" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Almost Perfect'" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm2.SOA1 # Landis and Koch benchmark" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Excellent'" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm2.SOA2 # Fleiss’ benchmark" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Very Good'" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm2.SOA3 # Altman’s benchmark" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Excellent'" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm2.SOA4 # Cicchetti’s benchmark" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Message': 'D:\\\\For Asus Laptop\\\\projects\\\\pycm\\\\Document\\\\Example1_files\\\\cm2.html',\n", " 'Status': True}" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm2.save_html(os.path.join(\"Example1_files\",\"cm2\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Open File" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classifier 3 (AdaBoost)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import AdaBoostClassifier\n", "classifier_3 = AdaBoostClassifier()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "y_pred_3 = classifier_3.fit(X_train, y_train).predict(X_test)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict 0 1 2 \n", "Actual\n", "0 13 0 0 \n", "\n", "1 0 15 1 \n", "\n", "2 0 3 6 \n", "\n", "\n" ] } ], "source": [ "cm3=ConfusionMatrix(y_test, y_pred_3)\n", "cm3.print_matrix()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict 0 1 2 \n", "Actual\n", "0 1.0 0.0 0.0 \n", "\n", "1 0.0 0.9375 0.0625 \n", "\n", "2 0.0 0.33333 0.66667 \n", "\n", "\n" ] } ], "source": [ "cm3.print_normalized_matrix()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8354978354978355" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm3.Kappa " ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8947368421052632" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm3.Overall_ACC" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Almost Perfect'" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm3.SOA1 # Landis and Koch benchmark" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Excellent'" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm3.SOA2 # Fleiss’ benchmark" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Very Good'" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm3.SOA3 # Altman’s benchmark" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Excellent'" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm3.SOA4 # Cicchetti’s benchmark" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Message': 'D:\\\\For Asus Laptop\\\\projects\\\\pycm\\\\Document\\\\Example1_files\\\\cm3.html',\n", " 'Status': True}" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cm3.save_html(os.path.join(\"Example1_files\", \"cm3\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Open File" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## How to compare classifiers?" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best : Decision tree\n", "\n", "Rank Name Class-Score Overall-Score\n", "1 Decision tree 0.55556 0.95238\n", "2 AdaBoost 0.48333 0.92381\n", "3 C-Support vector 0.44444 0.80476\n", "\n" ] } ], "source": [ "from pycm import Compare\n", "\n", "cp = Compare({\"C-Support vector\": cm1, \"Decision tree\": cm2, \"AdaBoost\": cm3})\n", "print(cp)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Message': 'D:\\\\For Asus Laptop\\\\projects\\\\pycm\\\\Document\\\\Example1_files\\\\cp.comp',\n", " 'Status': True}" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cp.save_report(os.path.join(\"Example1_files\", \"cp\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Open File" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }