{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading required package: daltoolbox\n",
"\n",
"Registered S3 method overwritten by 'quantmod':\n",
" method from\n",
" as.zoo.data.frame zoo \n",
"\n",
"\n",
"Attaching package: ‘daltoolbox’\n",
"\n",
"\n",
"The following object is masked from ‘package:base’:\n",
"\n",
" transform\n",
"\n",
"\n"
]
}
],
"source": [
"# DAL ToolBox\n",
"# version 1.1.727\n",
"\n",
"source(\"https://raw.githubusercontent.com/cefet-rj-dal/daltoolbox/main/jupyter.R\")\n",
"\n",
"#loading DAL\n",
"load_library(\"daltoolbox\") "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Classification\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"A data.frame: 6 × 5\n",
"\n",
"\t | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
\n",
"\t | <dbl> | <dbl> | <dbl> | <dbl> | <fct> |
\n",
"\n",
"\n",
"\t1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
\n",
"\t2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
\n",
"\t3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
\n",
"\t4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
\n",
"\t5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
\n",
"\t6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
\n",
"\n",
"
\n"
],
"text/latex": [
"A data.frame: 6 × 5\n",
"\\begin{tabular}{r|lllll}\n",
" & Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\\\\n",
" & & & & & \\\\\n",
"\\hline\n",
"\t1 & 5.1 & 3.5 & 1.4 & 0.2 & setosa\\\\\n",
"\t2 & 4.9 & 3.0 & 1.4 & 0.2 & setosa\\\\\n",
"\t3 & 4.7 & 3.2 & 1.3 & 0.2 & setosa\\\\\n",
"\t4 & 4.6 & 3.1 & 1.5 & 0.2 & setosa\\\\\n",
"\t5 & 5.0 & 3.6 & 1.4 & 0.2 & setosa\\\\\n",
"\t6 & 5.4 & 3.9 & 1.7 & 0.4 & setosa\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 5\n",
"\n",
"| | Sepal.Length <dbl> | Sepal.Width <dbl> | Petal.Length <dbl> | Petal.Width <dbl> | Species <fct> |\n",
"|---|---|---|---|---|---|\n",
"| 1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |\n",
"| 2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |\n",
"| 3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |\n",
"| 4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |\n",
"| 5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |\n",
"| 6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |\n",
"\n"
],
"text/plain": [
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n",
"1 5.1 3.5 1.4 0.2 setosa \n",
"2 4.9 3.0 1.4 0.2 setosa \n",
"3 4.7 3.2 1.3 0.2 setosa \n",
"4 4.6 3.1 1.5 0.2 setosa \n",
"5 5.0 3.6 1.4 0.2 setosa \n",
"6 5.4 3.9 1.7 0.4 setosa "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"iris <- datasets::iris\n",
"head(iris)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"- 'setosa'
- 'versicolor'
- 'virginica'
\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 'setosa'\n",
"\\item 'versicolor'\n",
"\\item 'virginica'\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 'setosa'\n",
"2. 'versicolor'\n",
"3. 'virginica'\n",
"\n",
"\n"
],
"text/plain": [
"[1] \"setosa\" \"versicolor\" \"virginica\" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#extracting the levels for the dataset\n",
"slevels <- levels(iris$Species)\n",
"slevels"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Building samples (training and testing)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# preparing dataset for random sampling\n",
"set.seed(1)\n",
"sr <- sample_random()\n",
"sr <- train_test(sr, iris)\n",
"iris_train <- sr$train\n",
"iris_test <- sr$test"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"A matrix: 3 × 3 of type int\n",
"\n",
"\t | setosa | versicolor | virginica |
\n",
"\n",
"\n",
"\tdataset | 50 | 50 | 50 |
\n",
"\ttraining | 39 | 38 | 43 |
\n",
"\ttest | 11 | 12 | 7 |
\n",
"\n",
"
\n"
],
"text/latex": [
"A matrix: 3 × 3 of type int\n",
"\\begin{tabular}{r|lll}\n",
" & setosa & versicolor & virginica\\\\\n",
"\\hline\n",
"\tdataset & 50 & 50 & 50\\\\\n",
"\ttraining & 39 & 38 & 43\\\\\n",
"\ttest & 11 & 12 & 7\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A matrix: 3 × 3 of type int\n",
"\n",
"| | setosa | versicolor | virginica |\n",
"|---|---|---|---|\n",
"| dataset | 50 | 50 | 50 |\n",
"| training | 39 | 38 | 43 |\n",
"| test | 11 | 12 | 7 |\n",
"\n"
],
"text/plain": [
" setosa versicolor virginica\n",
"dataset 50 50 50 \n",
"training 39 38 43 \n",
"test 11 12 7 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"tbl <- rbind(table(iris[,\"Species\"]), \n",
" table(iris_train[,\"Species\"]), \n",
" table(iris_test[,\"Species\"]))\n",
"rownames(tbl) <- c(\"dataset\", \"training\", \"test\")\n",
"head(tbl)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model training"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"model <- cla_majority(\"Species\", slevels)\n",
"model <- fit(model, iris_train)\n",
"train_prediction <- predict(model, iris_train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model adjustement"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
"1 0.3583333 0 81 0 39 NaN 0 0 1 NaN\n"
]
}
],
"source": [
"iris_train_predictand <- adjust_class_label(iris_train[,\"Species\"])\n",
"train_eval <- evaluate(model, iris_train_predictand, train_prediction)\n",
"print(train_eval$metrics)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model testing"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
"1 0.2333333 0 19 0 11 NaN 0 0 1 NaN\n"
]
}
],
"source": [
"# Test \n",
"test_prediction <- predict(model, iris_test)\n",
" \n",
"iris_test_predictand <- adjust_class_label(iris_test[,\"Species\"])\n",
"test_eval <- evaluate(model, iris_test_predictand, test_prediction)\n",
"print(test_eval$metrics)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "4.4.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}