{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Classification using KNN"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading required package: daltoolbox\n",
"\n",
"Registered S3 method overwritten by 'quantmod':\n",
" method from\n",
" as.zoo.data.frame zoo \n",
"\n",
"\n",
"Attaching package: ‘daltoolbox’\n",
"\n",
"\n",
"The following object is masked from ‘package:base’:\n",
"\n",
" transform\n",
"\n",
"\n"
]
}
],
"source": [
"# DAL ToolBox\n",
"# version 1.01.727\n",
"\n",
"source(\"https://raw.githubusercontent.com/cefet-rj-dal/daltoolbox/main/jupyter.R\")\n",
"\n",
"#loading DAL\n",
"load_library(\"daltoolbox\") "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"A data.frame: 6 × 5\n",
"\n",
"\t | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
\n",
"\t | <dbl> | <dbl> | <dbl> | <dbl> | <fct> |
\n",
"\n",
"\n",
"\t1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
\n",
"\t2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
\n",
"\t3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
\n",
"\t4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
\n",
"\t5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
\n",
"\t6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
\n",
"\n",
"
\n"
],
"text/latex": [
"A data.frame: 6 × 5\n",
"\\begin{tabular}{r|lllll}\n",
" & Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\\\\n",
" & & & & & \\\\\n",
"\\hline\n",
"\t1 & 5.1 & 3.5 & 1.4 & 0.2 & setosa\\\\\n",
"\t2 & 4.9 & 3.0 & 1.4 & 0.2 & setosa\\\\\n",
"\t3 & 4.7 & 3.2 & 1.3 & 0.2 & setosa\\\\\n",
"\t4 & 4.6 & 3.1 & 1.5 & 0.2 & setosa\\\\\n",
"\t5 & 5.0 & 3.6 & 1.4 & 0.2 & setosa\\\\\n",
"\t6 & 5.4 & 3.9 & 1.7 & 0.4 & setosa\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 5\n",
"\n",
"| | Sepal.Length <dbl> | Sepal.Width <dbl> | Petal.Length <dbl> | Petal.Width <dbl> | Species <fct> |\n",
"|---|---|---|---|---|---|\n",
"| 1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |\n",
"| 2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |\n",
"| 3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |\n",
"| 4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |\n",
"| 5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |\n",
"| 6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |\n",
"\n"
],
"text/plain": [
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n",
"1 5.1 3.5 1.4 0.2 setosa \n",
"2 4.9 3.0 1.4 0.2 setosa \n",
"3 4.7 3.2 1.3 0.2 setosa \n",
"4 4.6 3.1 1.5 0.2 setosa \n",
"5 5.0 3.6 1.4 0.2 setosa \n",
"6 5.4 3.9 1.7 0.4 setosa "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"iris <- datasets::iris\n",
"head(iris)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"- 'setosa'
- 'versicolor'
- 'virginica'
\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 'setosa'\n",
"\\item 'versicolor'\n",
"\\item 'virginica'\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 'setosa'\n",
"2. 'versicolor'\n",
"3. 'virginica'\n",
"\n",
"\n"
],
"text/plain": [
"[1] \"setosa\" \"versicolor\" \"virginica\" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#extracting the levels for the dataset\n",
"slevels <- levels(iris$Species)\n",
"slevels"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Building samples (training and testing)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# preparing dataset for random sampling\n",
"set.seed(1)\n",
"sr <- sample_random()\n",
"sr <- train_test(sr, iris)\n",
"iris_train <- sr$train\n",
"iris_test <- sr$test"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"A matrix: 3 × 3 of type int\n",
"\n",
"\t | setosa | versicolor | virginica |
\n",
"\n",
"\n",
"\tdataset | 50 | 50 | 50 |
\n",
"\ttraining | 39 | 38 | 43 |
\n",
"\ttest | 11 | 12 | 7 |
\n",
"\n",
"
\n"
],
"text/latex": [
"A matrix: 3 × 3 of type int\n",
"\\begin{tabular}{r|lll}\n",
" & setosa & versicolor & virginica\\\\\n",
"\\hline\n",
"\tdataset & 50 & 50 & 50\\\\\n",
"\ttraining & 39 & 38 & 43\\\\\n",
"\ttest & 11 & 12 & 7\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A matrix: 3 × 3 of type int\n",
"\n",
"| | setosa | versicolor | virginica |\n",
"|---|---|---|---|\n",
"| dataset | 50 | 50 | 50 |\n",
"| training | 39 | 38 | 43 |\n",
"| test | 11 | 12 | 7 |\n",
"\n"
],
"text/plain": [
" setosa versicolor virginica\n",
"dataset 50 50 50 \n",
"training 39 38 43 \n",
"test 11 12 7 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"tbl <- rbind(table(iris[,\"Species\"]), \n",
" table(iris_train[,\"Species\"]), \n",
" table(iris_test[,\"Species\"]))\n",
"rownames(tbl) <- c(\"dataset\", \"training\", \"test\")\n",
"head(tbl)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model training"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"model <- cla_knn(\"Species\", slevels, k=1)\n",
"model <- fit(model, iris_train)\n",
"train_prediction <- predict(model, iris_train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model adjustment"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
"1 1 39 81 0 0 1 1 1 1 1\n"
]
}
],
"source": [
"iris_train_predictand <- adjust_class_label(iris_train[,\"Species\"])\n",
"train_eval <- evaluate(model, iris_train_predictand, train_prediction)\n",
"print(train_eval$metrics)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model testing"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
"1 0.9333333 11 19 0 0 1 1 1 1 1\n",
" accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
"1 0.9333333 12 16 2 0 0.8571429 1 1 0.8888889 0.9230769\n",
" accuracy TP TN FP FN precision recall sensitivity specificity f1\n",
"1 0.9333333 5 23 0 2 1 0.7142857 0.7142857 1 0.8333333\n"
]
}
],
"source": [
"# Test\n",
"test_prediction <- predict(model, iris_test)\n",
"\n",
"iris_test_predictand <- adjust_class_label(iris_test[,\"Species\"])\n",
"\n",
"#Avaliação #setosa\n",
"test_eval <- evaluate(model, iris_test_predictand, test_prediction)\n",
"print(test_eval$metrics)\n",
"\n",
"#Avaliação #versicolor\n",
"test_eval <- evaluate(model, iris_test_predictand, test_prediction, ref=2)\n",
"print(test_eval$metrics)\n",
"\n",
"#Avaliação #virginica\n",
"test_eval <- evaluate(model, iris_test_predictand, test_prediction, ref=3)\n",
"print(test_eval$metrics)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "4.4.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}