{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Classification using naive bayes\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading required package: daltoolbox\n",
      "\n",
      "Registered S3 method overwritten by 'quantmod':\n",
      "  method            from\n",
      "  as.zoo.data.frame zoo \n",
      "\n",
      "\n",
      "Attaching package: ‘daltoolbox’\n",
      "\n",
      "\n",
      "The following object is masked from ‘package:base’:\n",
      "\n",
      "    transform\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# DAL ToolBox\n",
    "# version 1.01.727\n",
    "\n",
    "source(\"https://raw.githubusercontent.com/cefet-rj-dal/daltoolbox/main/jupyter.R\")\n",
    "\n",
    "#loading DAL\n",
    "load_library(\"daltoolbox\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 5</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>Sepal.Length</th><th scope=col>Sepal.Width</th><th scope=col>Petal.Length</th><th scope=col>Petal.Width</th><th scope=col>Species</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;fct&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>5.1</td><td>3.5</td><td>1.4</td><td>0.2</td><td>setosa</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>4.9</td><td>3.0</td><td>1.4</td><td>0.2</td><td>setosa</td></tr>\n",
       "\t<tr><th scope=row>3</th><td>4.7</td><td>3.2</td><td>1.3</td><td>0.2</td><td>setosa</td></tr>\n",
       "\t<tr><th scope=row>4</th><td>4.6</td><td>3.1</td><td>1.5</td><td>0.2</td><td>setosa</td></tr>\n",
       "\t<tr><th scope=row>5</th><td>5.0</td><td>3.6</td><td>1.4</td><td>0.2</td><td>setosa</td></tr>\n",
       "\t<tr><th scope=row>6</th><td>5.4</td><td>3.9</td><td>1.7</td><td>0.4</td><td>setosa</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 5\n",
       "\\begin{tabular}{r|lllll}\n",
       "  & Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\\\\n",
       "  & <dbl> & <dbl> & <dbl> & <dbl> & <fct>\\\\\n",
       "\\hline\n",
       "\t1 & 5.1 & 3.5 & 1.4 & 0.2 & setosa\\\\\n",
       "\t2 & 4.9 & 3.0 & 1.4 & 0.2 & setosa\\\\\n",
       "\t3 & 4.7 & 3.2 & 1.3 & 0.2 & setosa\\\\\n",
       "\t4 & 4.6 & 3.1 & 1.5 & 0.2 & setosa\\\\\n",
       "\t5 & 5.0 & 3.6 & 1.4 & 0.2 & setosa\\\\\n",
       "\t6 & 5.4 & 3.9 & 1.7 & 0.4 & setosa\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 5\n",
       "\n",
       "| <!--/--> | Sepal.Length &lt;dbl&gt; | Sepal.Width &lt;dbl&gt; | Petal.Length &lt;dbl&gt; | Petal.Width &lt;dbl&gt; | Species &lt;fct&gt; |\n",
       "|---|---|---|---|---|---|\n",
       "| 1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |\n",
       "| 2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |\n",
       "| 3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |\n",
       "| 4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |\n",
       "| 5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |\n",
       "| 6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |\n",
       "\n"
      ],
      "text/plain": [
       "  Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n",
       "1 5.1          3.5         1.4          0.2         setosa \n",
       "2 4.9          3.0         1.4          0.2         setosa \n",
       "3 4.7          3.2         1.3          0.2         setosa \n",
       "4 4.6          3.1         1.5          0.2         setosa \n",
       "5 5.0          3.6         1.4          0.2         setosa \n",
       "6 5.4          3.9         1.7          0.4         setosa "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "iris <- datasets::iris\n",
    "head(iris)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".list-inline {list-style: none; margin:0; padding: 0}\n",
       ".list-inline>li {display: inline-block}\n",
       ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n",
       "</style>\n",
       "<ol class=list-inline><li>'setosa'</li><li>'versicolor'</li><li>'virginica'</li></ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate*}\n",
       "\\item 'setosa'\n",
       "\\item 'versicolor'\n",
       "\\item 'virginica'\n",
       "\\end{enumerate*}\n"
      ],
      "text/markdown": [
       "1. 'setosa'\n",
       "2. 'versicolor'\n",
       "3. 'virginica'\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[1] \"setosa\"     \"versicolor\" \"virginica\" "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#extracting the levels for the dataset\n",
    "slevels <- levels(iris$Species)\n",
    "slevels"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Building samples (training and testing)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# preparing dataset for random sampling\n",
    "set.seed(1)\n",
    "sr <- sample_random()\n",
    "sr <- train_test(sr, iris)\n",
    "iris_train <- sr$train\n",
    "iris_test <- sr$test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A matrix: 3 × 3 of type int</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>setosa</th><th scope=col>versicolor</th><th scope=col>virginica</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>dataset</th><td>50</td><td>50</td><td>50</td></tr>\n",
       "\t<tr><th scope=row>training</th><td>39</td><td>38</td><td>43</td></tr>\n",
       "\t<tr><th scope=row>test</th><td>11</td><td>12</td><td> 7</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A matrix: 3 × 3 of type int\n",
       "\\begin{tabular}{r|lll}\n",
       "  & setosa & versicolor & virginica\\\\\n",
       "\\hline\n",
       "\tdataset & 50 & 50 & 50\\\\\n",
       "\ttraining & 39 & 38 & 43\\\\\n",
       "\ttest & 11 & 12 &  7\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A matrix: 3 × 3 of type int\n",
       "\n",
       "| <!--/--> | setosa | versicolor | virginica |\n",
       "|---|---|---|---|\n",
       "| dataset | 50 | 50 | 50 |\n",
       "| training | 39 | 38 | 43 |\n",
       "| test | 11 | 12 |  7 |\n",
       "\n"
      ],
      "text/plain": [
       "         setosa versicolor virginica\n",
       "dataset  50     50         50       \n",
       "training 39     38         43       \n",
       "test     11     12          7       "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tbl <- rbind(table(iris[,\"Species\"]), \n",
    "             table(iris_train[,\"Species\"]), \n",
    "             table(iris_test[,\"Species\"]))\n",
    "rownames(tbl) <- c(\"dataset\", \"training\", \"test\")\n",
    "head(tbl)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Model training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "model <- cla_nb(\"Species\", slevels)\n",
    "model <- fit(model, iris_train)\n",
    "train_prediction <- predict(model, iris_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Model adjustment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
      "1 0.9583333 39 81  0  0         1      1           1           1  1\n"
     ]
    }
   ],
   "source": [
    "iris_train_predictand <- adjust_class_label(iris_train[,\"Species\"])\n",
    "train_eval <- evaluate(model, iris_train_predictand, train_prediction)\n",
    "print(train_eval$metrics)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Model testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
      "1 0.9666667 11 19  0  0         1      1           1           1  1\n",
      "   accuracy TP TN FP FN precision recall sensitivity specificity   f1\n",
      "1 0.9666667 12 17  1  0 0.9230769      1           1   0.9444444 0.96\n",
      "   accuracy TP TN FP FN precision    recall sensitivity specificity        f1\n",
      "1 0.9666667  6 23  0  1         1 0.8571429   0.8571429           1 0.9230769\n"
     ]
    }
   ],
   "source": [
    "# Test\n",
    "test_prediction <- predict(model, iris_test)\n",
    "\n",
    "iris_test_predictand <- adjust_class_label(iris_test[,\"Species\"])\n",
    "\n",
    "#Avaliação #setosa\n",
    "test_eval <- evaluate(model, iris_test_predictand, test_prediction)\n",
    "print(test_eval$metrics)\n",
    "\n",
    "#Avaliação #versicolor\n",
    "test_eval <- evaluate(model, iris_test_predictand, test_prediction, ref=2)\n",
    "print(test_eval$metrics)\n",
    "\n",
    "#Avaliação #virginica\n",
    "test_eval <- evaluate(model, iris_test_predictand, test_prediction, ref=3)\n",
    "print(test_eval$metrics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "4.4.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}