{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Clustering - dbscan\n", "## Libraries and Datasets\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading required package: daltoolbox\n", "\n", "Registered S3 method overwritten by 'quantmod':\n", " method from\n", " as.zoo.data.frame zoo \n", "\n", "\n", "Attaching package: ‘daltoolbox’\n", "\n", "\n", "The following object is masked from ‘package:base’:\n", "\n", " transform\n", "\n", "\n" ] } ], "source": [ "# DAL ToolBox\n", "# version 1.01.727\n", "\n", "source(\"https://raw.githubusercontent.com/cefet-rj-dal/daltoolbox/main/jupyter.R\")\n", "\n", "#loading DAL\n", "load_library(\"daltoolbox\") " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#load dataset\n", "data(iris)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "General entropy of dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## General function to test clustering methods" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# setup clustering\n", "model <- cluster_dbscan(minPts = 3)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "clu\n", " 0 1 2 3 4 \n", "26 47 38 4 35 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# build model\n", "model <- fit(model, iris[,1:4])\n", "clu <- cluster(model, iris[,1:4])\n", "table(clu)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\t
$clusters_entropy
\n", "\t\t
\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A tibble: 5 × 4
xceqtdceg
<fct><dbl><int><dbl>
01.1841636260.20525503
10.0000000470.00000000
20.0000000380.00000000
30.0000000 40.00000000
40.4220005350.09846679
\n", "
\n", "\t
$clustering_entropy
\n", "\t\t
0.303721818400112
\n", "\t
$data_entropy
\n", "\t\t
1.58496250072116
\n", "
\n" ], "text/latex": [ "\\begin{description}\n", "\\item[\\$clusters\\_entropy] A tibble: 5 × 4\n", "\\begin{tabular}{llll}\n", " x & ce & qtd & ceg\\\\\n", " & & & \\\\\n", "\\hline\n", "\t 0 & 1.1841636 & 26 & 0.20525503\\\\\n", "\t 1 & 0.0000000 & 47 & 0.00000000\\\\\n", "\t 2 & 0.0000000 & 38 & 0.00000000\\\\\n", "\t 3 & 0.0000000 & 4 & 0.00000000\\\\\n", "\t 4 & 0.4220005 & 35 & 0.09846679\\\\\n", "\\end{tabular}\n", "\n", "\\item[\\$clustering\\_entropy] 0.303721818400112\n", "\\item[\\$data\\_entropy] 1.58496250072116\n", "\\end{description}\n" ], "text/markdown": [ "$clusters_entropy\n", ": \n", "A tibble: 5 × 4\n", "\n", "| x <fct> | ce <dbl> | qtd <int> | ceg <dbl> |\n", "|---|---|---|---|\n", "| 0 | 1.1841636 | 26 | 0.20525503 |\n", "| 1 | 0.0000000 | 47 | 0.00000000 |\n", "| 2 | 0.0000000 | 38 | 0.00000000 |\n", "| 3 | 0.0000000 | 4 | 0.00000000 |\n", "| 4 | 0.4220005 | 35 | 0.09846679 |\n", "\n", "\n", "$clustering_entropy\n", ": 0.303721818400112\n", "$data_entropy\n", ": 1.58496250072116\n", "\n", "\n" ], "text/plain": [ "$clusters_entropy\n", "\u001b[90m# A tibble: 5 × 4\u001b[39m\n", " x ce qtd ceg\n", " \u001b[3m\u001b[90m\u001b[39m\u001b[23m \u001b[3m\u001b[90m\u001b[39m\u001b[23m \u001b[3m\u001b[90m\u001b[39m\u001b[23m \u001b[3m\u001b[90m\u001b[39m\u001b[23m\n", "\u001b[90m1\u001b[39m 0 1.18 26 0.205 \n", "\u001b[90m2\u001b[39m 1 0 47 0 \n", "\u001b[90m3\u001b[39m 2 0 38 0 \n", "\u001b[90m4\u001b[39m 3 0 4 0 \n", "\u001b[90m5\u001b[39m 4 0.422 35 0.098\u001b[4m5\u001b[24m\n", "\n", "$clustering_entropy\n", "[1] 0.3037218\n", "\n", "$data_entropy\n", "[1] 1.584963\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# evaluate model using external metric\n", "eval <- evaluate(model, clu, iris$Species)\n", "eval" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "4.3.3" } }, "nbformat": 4, "nbformat_minor": 4 }