{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Classification using Random Forest" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading required package: daltoolbox\n", "\n", "Registered S3 method overwritten by 'quantmod':\n", " method from\n", " as.zoo.data.frame zoo \n", "\n", "\n", "Attaching package: ‘daltoolbox’\n", "\n", "\n", "The following object is masked from ‘package:base’:\n", "\n", " transform\n", "\n", "\n" ] } ], "source": [ "# DAL ToolBox\n", "# version 1.01.727\n", "\n", "source(\"https://raw.githubusercontent.com/cefet-rj-dal/daltoolbox/main/jupyter.R\")\n", "\n", "#loading DAL\n", "load_library(\"daltoolbox\") " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 5
Sepal.LengthSepal.WidthPetal.LengthPetal.WidthSpecies
<dbl><dbl><dbl><dbl><fct>
15.13.51.40.2setosa
24.93.01.40.2setosa
34.73.21.30.2setosa
44.63.11.50.2setosa
55.03.61.40.2setosa
65.43.91.70.4setosa
\n" ], "text/latex": [ "A data.frame: 6 × 5\n", "\\begin{tabular}{r|lllll}\n", " & Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\\\\n", " & & & & & \\\\\n", "\\hline\n", "\t1 & 5.1 & 3.5 & 1.4 & 0.2 & setosa\\\\\n", "\t2 & 4.9 & 3.0 & 1.4 & 0.2 & setosa\\\\\n", "\t3 & 4.7 & 3.2 & 1.3 & 0.2 & setosa\\\\\n", "\t4 & 4.6 & 3.1 & 1.5 & 0.2 & setosa\\\\\n", "\t5 & 5.0 & 3.6 & 1.4 & 0.2 & setosa\\\\\n", "\t6 & 5.4 & 3.9 & 1.7 & 0.4 & setosa\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 5\n", "\n", "| | Sepal.Length <dbl> | Sepal.Width <dbl> | Petal.Length <dbl> | Petal.Width <dbl> | Species <fct> |\n", "|---|---|---|---|---|---|\n", "| 1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |\n", "| 2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |\n", "| 3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |\n", "| 4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |\n", "| 5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |\n", "| 6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |\n", "\n" ], "text/plain": [ " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", "1 5.1 3.5 1.4 0.2 setosa \n", "2 4.9 3.0 1.4 0.2 setosa \n", "3 4.7 3.2 1.3 0.2 setosa \n", "4 4.6 3.1 1.5 0.2 setosa \n", "5 5.0 3.6 1.4 0.2 setosa \n", "6 5.4 3.9 1.7 0.4 setosa " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "iris <- datasets::iris\n", "head(iris)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
  1. 'setosa'
  2. 'versicolor'
  3. 'virginica'
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'setosa'\n", "\\item 'versicolor'\n", "\\item 'virginica'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'setosa'\n", "2. 'versicolor'\n", "3. 'virginica'\n", "\n", "\n" ], "text/plain": [ "[1] \"setosa\" \"versicolor\" \"virginica\" " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#extracting the levels for the dataset\n", "slevels <- levels(iris$Species)\n", "slevels" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Building samples (training and testing)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# preparing dataset for random sampling\n", "set.seed(1)\n", "sr <- sample_random()\n", "sr <- train_test(sr, iris)\n", "iris_train <- sr$train\n", "iris_test <- sr$test" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\n", "
A matrix: 3 × 3 of type int
setosaversicolorvirginica
dataset505050
training393843
test1112 7
\n" ], "text/latex": [ "A matrix: 3 × 3 of type int\n", "\\begin{tabular}{r|lll}\n", " & setosa & versicolor & virginica\\\\\n", "\\hline\n", "\tdataset & 50 & 50 & 50\\\\\n", "\ttraining & 39 & 38 & 43\\\\\n", "\ttest & 11 & 12 & 7\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A matrix: 3 × 3 of type int\n", "\n", "| | setosa | versicolor | virginica |\n", "|---|---|---|---|\n", "| dataset | 50 | 50 | 50 |\n", "| training | 39 | 38 | 43 |\n", "| test | 11 | 12 | 7 |\n", "\n" ], "text/plain": [ " setosa versicolor virginica\n", "dataset 50 50 50 \n", "training 39 38 43 \n", "test 11 12 7 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tbl <- rbind(table(iris[,\"Species\"]), \n", " table(iris_train[,\"Species\"]), \n", " table(iris_test[,\"Species\"]))\n", "rownames(tbl) <- c(\"dataset\", \"training\", \"test\")\n", "head(tbl)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Model training" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "model <- cla_rf(\"Species\", slevels, mtry=3, ntree=5)\n", "model <- fit(model, iris_train)\n", "train_prediction <- predict(model, iris_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Model adjustment" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " accuracy TP TN FP FN precision recall sensitivity specificity f1\n", "1 0.975 39 81 0 0 1 1 1 1 1\n" ] } ], "source": [ "iris_train_predictand <- adjust_class_label(iris_train[,\"Species\"])\n", "train_eval <- evaluate(model, iris_train_predictand, train_prediction)\n", "print(train_eval$metrics)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Model testing" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " accuracy TP TN FP FN precision recall sensitivity specificity f1\n", "1 0.9666667 11 19 0 0 1 1 1 1 1\n", " accuracy TP TN FP FN precision recall sensitivity specificity f1\n", "1 0.9666667 12 17 1 0 0.9230769 1 1 0.9444444 0.96\n", " accuracy TP TN FP FN precision recall sensitivity specificity f1\n", "1 0.9666667 6 23 0 1 1 0.8571429 0.8571429 1 0.9230769\n" ] } ], "source": [ "# Test\n", "test_prediction <- predict(model, iris_test)\n", "\n", "iris_test_predictand <- adjust_class_label(iris_test[,\"Species\"])\n", "\n", "#Avaliação #setosa\n", "test_eval <- evaluate(model, iris_test_predictand, test_prediction)\n", "print(test_eval$metrics)\n", "\n", "#Avaliação #versicolor\n", "test_eval <- evaluate(model, iris_test_predictand, test_prediction, ref=2)\n", "print(test_eval$metrics)\n", "\n", "#Avaliação #virginica\n", "test_eval <- evaluate(model, iris_test_predictand, test_prediction, ref=3)\n", "print(test_eval$metrics)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "4.4.1" } }, "nbformat": 4, "nbformat_minor": 4 }