{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Clustering - dbscan\n", "## Libraries and Datasets\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading required package: daltoolbox\n", "\n", "Registered S3 method overwritten by 'quantmod':\n", " method from\n", " as.zoo.data.frame zoo \n", "\n", "\n", "Attaching package: ‘daltoolbox’\n", "\n", "\n", "The following object is masked from ‘package:base’:\n", "\n", " transform\n", "\n", "\n" ] } ], "source": [ "# DAL ToolBox\n", "# version 1.01.727\n", "\n", "source(\"https://raw.githubusercontent.com/cefet-rj-dal/daltoolbox/main/jupyter.R\")\n", "\n", "#loading DAL\n", "load_library(\"daltoolbox\") " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#load dataset\n", "data(iris)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "General entropy of dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## General function to test clustering methods" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# setup clustering\n", "model <- cluster_dbscan(minPts = 3)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "clu\n", " 0 1 2 3 4 \n", "26 47 38 4 35 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# build model\n", "model <- fit(model, iris[,1:4])\n", "clu <- cluster(model, iris[,1:4])\n", "table(clu)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
x | ce | qtd | ceg |
---|---|---|---|
<fct> | <dbl> | <int> | <dbl> |
0 | 1.1841636 | 26 | 0.20525503 |
1 | 0.0000000 | 47 | 0.00000000 |
2 | 0.0000000 | 38 | 0.00000000 |
3 | 0.0000000 | 4 | 0.00000000 |
4 | 0.4220005 | 35 | 0.09846679 |