{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Classification using naive bayes\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading required package: daltoolbox\n",
"\n",
"Registered S3 method overwritten by 'quantmod':\n",
" method from\n",
" as.zoo.data.frame zoo \n",
"\n",
"\n",
"Attaching package: ‘daltoolbox’\n",
"\n",
"\n",
"The following object is masked from ‘package:base’:\n",
"\n",
" transform\n",
"\n",
"\n"
]
}
],
"source": [
"# DAL ToolBox\n",
"# version 1.01.727\n",
"\n",
"source(\"https://raw.githubusercontent.com/cefet-rj-dal/daltoolbox/main/jupyter.R\")\n",
"\n",
"#loading DAL\n",
"load_library(\"daltoolbox\") "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"A data.frame: 6 × 5\n",
"\n",
"\t | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
\n",
"\t | <dbl> | <dbl> | <dbl> | <dbl> | <fct> |
\n",
"\n",
"\n",
"\t1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
\n",
"\t2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
\n",
"\t3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
\n",
"\t4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
\n",
"\t5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
\n",
"\t6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
\n",
"\n",
"
\n"
],
"text/latex": [
"A data.frame: 6 × 5\n",
"\\begin{tabular}{r|lllll}\n",
" & Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\\\\n",
" & & & & & \\\\\n",
"\\hline\n",
"\t1 & 5.1 & 3.5 & 1.4 & 0.2 & setosa\\\\\n",
"\t2 & 4.9 & 3.0 & 1.4 & 0.2 & setosa\\\\\n",
"\t3 & 4.7 & 3.2 & 1.3 & 0.2 & setosa\\\\\n",
"\t4 & 4.6 & 3.1 & 1.5 & 0.2 & setosa\\\\\n",
"\t5 & 5.0 & 3.6 & 1.4 & 0.2 & setosa\\\\\n",
"\t6 & 5.4 & 3.9 & 1.7 & 0.4 & setosa\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 5\n",
"\n",
"| | Sepal.Length <dbl> | Sepal.Width <dbl> | Petal.Length <dbl> | Petal.Width <dbl> | Species <fct> |\n",
"|---|---|---|---|---|---|\n",
"| 1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |\n",
"| 2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |\n",
"| 3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |\n",
"| 4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |\n",
"| 5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |\n",
"| 6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |\n",
"\n"
],
"text/plain": [
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n",
"1 5.1 3.5 1.4 0.2 setosa \n",
"2 4.9 3.0 1.4 0.2 setosa \n",
"3 4.7 3.2 1.3 0.2 setosa \n",
"4 4.6 3.1 1.5 0.2 setosa \n",
"5 5.0 3.6 1.4 0.2 setosa \n",
"6 5.4 3.9 1.7 0.4 setosa "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"iris <- datasets::iris\n",
"head(iris)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"- 'setosa'
- 'versicolor'
- 'virginica'
\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 'setosa'\n",
"\\item 'versicolor'\n",
"\\item 'virginica'\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 'setosa'\n",
"2. 'versicolor'\n",
"3. 'virginica'\n",
"\n",
"\n"
],
"text/plain": [
"[1] \"setosa\" \"versicolor\" \"virginica\" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#extracting the levels for the dataset\n",
"slevels <- levels(iris$Species)\n",
"slevels"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Building samples (training and testing)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# preparing dataset for random sampling\n",
"set.seed(1)\n",
"sr <- sample_random()\n",
"sr <- train_test(sr, iris)\n",
"iris_train <- sr$train\n",
"iris_test <- sr$test"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"A matrix: 3 × 3 of type int\n",
"\n",
"\t | setosa | versicolor | virginica |
\n",
"\n",
"\n",
"\tdataset | 50 | 50 | 50 |
\n",
"\ttraining | 39 | 38 | 43 |
\n",
"\ttest | 11 | 12 | 7 |
\n",
"\n",
"
\n"
],
"text/latex": [
"A matrix: 3 × 3 of type int\n",
"\\begin{tabular}{r|lll}\n",
" & setosa & versicolor & virginica\\\\\n",
"\\hline\n",
"\tdataset & 50 & 50 & 50\\\\\n",
"\ttraining & 39 & 38 & 43\\\\\n",
"\ttest & 11 & 12 & 7\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A matrix: 3 × 3 of type int\n",
"\n",
"| | setosa | versicolor | virginica |\n",
"|---|---|---|---|\n",
"| dataset | 50 | 50 | 50 |\n",
"| training | 39 | 38 | 43 |\n",
"| test | 11 | 12 | 7 |\n",
"\n"
],
"text/plain": [
" setosa versicolor virginica\n",
"dataset 50 50 50 \n",
"training 39 38 43 \n",
"test 11 12 7 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"tbl <- rbind(table(iris[,\"Species\"]), \n",
" table(iris_train[,\"Species\"]), \n",
" table(iris_test[,\"Species\"]))\n",
"rownames(tbl) <- c(\"dataset\", \"training\", \"test\")\n",
"head(tbl)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model training"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"model <- cla_nb(\"Species\", slevels)\n",
"model <- fit(model, iris_train)\n",
"train_prediction <- predict(model, iris_train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model adjustment"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
"1 0.9583333 39 81 0 0 1 1 1 1 1\n"
]
}
],
"source": [
"iris_train_predictand <- adjust_class_label(iris_train[,\"Species\"])\n",
"train_eval <- evaluate(model, iris_train_predictand, train_prediction)\n",
"print(train_eval$metrics)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model testing"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
"1 0.9666667 11 19 0 0 1 1 1 1 1\n",
" accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
"1 0.9666667 12 17 1 0 0.9230769 1 1 0.9444444 0.96\n",
" accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
"1 0.9666667 6 23 0 1 1 0.8571429 0.8571429 1 0.9230769\n"
]
}
],
"source": [
"# Test\n",
"test_prediction <- predict(model, iris_test)\n",
"\n",
"iris_test_predictand <- adjust_class_label(iris_test[,\"Species\"])\n",
"\n",
"#Avaliação #setosa\n",
"test_eval <- evaluate(model, iris_test_predictand, test_prediction)\n",
"print(test_eval$metrics)\n",
"\n",
"#Avaliação #versicolor\n",
"test_eval <- evaluate(model, iris_test_predictand, test_prediction, ref=2)\n",
"print(test_eval$metrics)\n",
"\n",
"#Avaliação #virginica\n",
"test_eval <- evaluate(model, iris_test_predictand, test_prediction, ref=3)\n",
"print(test_eval$metrics)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "4.4.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}