{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%run ../../../common_functions/import_all.py\n", "\n", "from common_functions.setup_notebook import set_css_style, setup_matplotlib, config_ipython\n", "from common_functions.class_helpers import do_plot_conf_mat\n", "\n", "from sklearn.datasets import load_iris\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report\n", "\n", "config_ipython()\n", "setup_matplotlib()\n", "set_css_style()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Classification - performance metrics\n", "\n", "We will use the Iris Dataset to do a little classification with a Random Forest and look at the performance metrics." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Confusion matrix\n", "\n", "This uses a routine we wrote here for the job of computing it and plotting it" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n", " criterion='gini', max_depth=None, max_features='auto',\n", " max_leaf_nodes=None, max_samples=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=100,\n", " n_jobs=None, oob_score=False, random_state=None,\n", " verbose=0, warm_start=False)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "image/png": { "height": 432, "width": 477 } }, "output_type": "display_data" }, { "data": { "text/plain": [ "array([[18, 0, 0],\n", " [ 0, 14, 0],\n", " [ 0, 1, 12]])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load the Iris dataset from sklearn, separating the data matrix and the array of classes\n", "iris = load_iris()\n", "X = iris.data\n", "y = iris.target\n", "\n", "# Initiate the classifier (using default parameters)\n", "rf = RandomForestClassifier()\n", "\n", "# Splitting the dataset into train and test (70%/30%)\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)\n", "\n", "# Fitting model on training set and predict on test set\n", "rf.fit(X_train, y_train)\n", "y_pred = rf.predict(X_test)\n", "\n", "# Plot the confusion matrix\n", "do_plot_conf_mat(y_test, y_pred)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Precision, recall and F1-score" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 1.00 1.00 1.00 18\n", " 1 0.93 1.00 0.97 14\n", " 2 1.00 0.92 0.96 13\n", "\n", " accuracy 0.98 45\n", " macro avg 0.98 0.97 0.98 45\n", "weighted avg 0.98 0.98 0.98 45\n", "\n" ] } ], "source": [ "# sklearn furnishes a report of these metrics for all classes in one go!\n", "print(classification_report(y_test, y_pred))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Accuracy" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9777777777777777" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# From sklearn\n", "accuracy_score(y_test, y_pred)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }