{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## NA and Outlier analysis" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading required package: daltoolbox\n", "\n", "Registered S3 method overwritten by 'quantmod':\n", " method from\n", " as.zoo.data.frame zoo \n", "\n", "\n", "Attaching package: ‘daltoolbox’\n", "\n", "\n", "The following object is masked from ‘package:base’:\n", "\n", " transform\n", "\n", "\n" ] } ], "source": [ "# DAL ToolBox\n", "# version 1.01.727\n", "\n", "source(\"https://raw.githubusercontent.com/cefet-rj-dal/daltoolbox/main/jupyter.R\")\n", "\n", "#loading DAL\n", "load_library(\"daltoolbox\") " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### NA removal" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 5
Sepal.LengthSepal.WidthPetal.LengthPetal.WidthSpecies
<dbl><dbl><dbl><dbl><fct>
15.13.51.40.2setosa
24.93.01.40.2setosa
34.73.21.30.2setosa
44.63.11.50.2setosa
55.03.61.40.2setosa
65.43.91.70.4setosa
\n" ], "text/latex": [ "A data.frame: 6 × 5\n", "\\begin{tabular}{r|lllll}\n", " & Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\\\\n", " & & & & & \\\\\n", "\\hline\n", "\t1 & 5.1 & 3.5 & 1.4 & 0.2 & setosa\\\\\n", "\t2 & 4.9 & 3.0 & 1.4 & 0.2 & setosa\\\\\n", "\t3 & 4.7 & 3.2 & 1.3 & 0.2 & setosa\\\\\n", "\t4 & 4.6 & 3.1 & 1.5 & 0.2 & setosa\\\\\n", "\t5 & 5.0 & 3.6 & 1.4 & 0.2 & setosa\\\\\n", "\t6 & 5.4 & 3.9 & 1.7 & 0.4 & setosa\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 5\n", "\n", "| | Sepal.Length <dbl> | Sepal.Width <dbl> | Petal.Length <dbl> | Petal.Width <dbl> | Species <fct> |\n", "|---|---|---|---|---|---|\n", "| 1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |\n", "| 2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |\n", "| 3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |\n", "| 4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |\n", "| 5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |\n", "| 6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |\n", "\n" ], "text/plain": [ " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", "1 5.1 3.5 1.4 0.2 setosa \n", "2 4.9 3.0 1.4 0.2 setosa \n", "3 4.7 3.2 1.3 0.2 setosa \n", "4 4.6 3.1 1.5 0.2 setosa \n", "5 5.0 3.6 1.4 0.2 setosa \n", "6 5.4 3.9 1.7 0.4 setosa " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "150" ], "text/latex": [ "150" ], "text/markdown": [ "150" ], "text/plain": [ "[1] 150" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "iris <- datasets::iris\n", "head(iris)\n", "nrow(iris)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 5
Sepal.LengthSepal.WidthPetal.LengthPetal.WidthSpecies
<dbl><dbl><dbl><dbl><fct>
15.13.51.40.2setosa
2 NA3.01.40.2setosa
34.73.21.30.2setosa
44.63.11.50.2setosa
55.03.61.40.2setosa
65.43.91.70.4setosa
\n" ], "text/latex": [ "A data.frame: 6 × 5\n", "\\begin{tabular}{r|lllll}\n", " & Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\\\\n", " & & & & & \\\\\n", "\\hline\n", "\t1 & 5.1 & 3.5 & 1.4 & 0.2 & setosa\\\\\n", "\t2 & NA & 3.0 & 1.4 & 0.2 & setosa\\\\\n", "\t3 & 4.7 & 3.2 & 1.3 & 0.2 & setosa\\\\\n", "\t4 & 4.6 & 3.1 & 1.5 & 0.2 & setosa\\\\\n", "\t5 & 5.0 & 3.6 & 1.4 & 0.2 & setosa\\\\\n", "\t6 & 5.4 & 3.9 & 1.7 & 0.4 & setosa\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 5\n", "\n", "| | Sepal.Length <dbl> | Sepal.Width <dbl> | Petal.Length <dbl> | Petal.Width <dbl> | Species <fct> |\n", "|---|---|---|---|---|---|\n", "| 1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |\n", "| 2 | NA | 3.0 | 1.4 | 0.2 | setosa |\n", "| 3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |\n", "| 4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |\n", "| 5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |\n", "| 6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |\n", "\n" ], "text/plain": [ " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", "1 5.1 3.5 1.4 0.2 setosa \n", "2 NA 3.0 1.4 0.2 setosa \n", "3 4.7 3.2 1.3 0.2 setosa \n", "4 4.6 3.1 1.5 0.2 setosa \n", "5 5.0 3.6 1.4 0.2 setosa \n", "6 5.4 3.9 1.7 0.4 setosa " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "150" ], "text/latex": [ "150" ], "text/markdown": [ "150" ], "text/plain": [ "[1] 150" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#introducing a NA to remove\n", "iris.na <- iris\n", "iris.na$Sepal.Length[2] <- NA\n", "head(iris.na)\n", "nrow(iris.na)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### removing NA tuples" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 5
Sepal.LengthSepal.WidthPetal.LengthPetal.WidthSpecies
<dbl><dbl><dbl><dbl><fct>
15.13.51.40.2setosa
34.73.21.30.2setosa
44.63.11.50.2setosa
55.03.61.40.2setosa
65.43.91.70.4setosa
74.63.41.40.3setosa
\n" ], "text/latex": [ "A data.frame: 6 × 5\n", "\\begin{tabular}{r|lllll}\n", " & Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\\\\n", " & & & & & \\\\\n", "\\hline\n", "\t1 & 5.1 & 3.5 & 1.4 & 0.2 & setosa\\\\\n", "\t3 & 4.7 & 3.2 & 1.3 & 0.2 & setosa\\\\\n", "\t4 & 4.6 & 3.1 & 1.5 & 0.2 & setosa\\\\\n", "\t5 & 5.0 & 3.6 & 1.4 & 0.2 & setosa\\\\\n", "\t6 & 5.4 & 3.9 & 1.7 & 0.4 & setosa\\\\\n", "\t7 & 4.6 & 3.4 & 1.4 & 0.3 & setosa\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 5\n", "\n", "| | Sepal.Length <dbl> | Sepal.Width <dbl> | Petal.Length <dbl> | Petal.Width <dbl> | Species <fct> |\n", "|---|---|---|---|---|---|\n", "| 1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |\n", "| 3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |\n", "| 4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |\n", "| 5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |\n", "| 6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |\n", "| 7 | 4.6 | 3.4 | 1.4 | 0.3 | setosa |\n", "\n" ], "text/plain": [ " Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", "1 5.1 3.5 1.4 0.2 setosa \n", "3 4.7 3.2 1.3 0.2 setosa \n", "4 4.6 3.1 1.5 0.2 setosa \n", "5 5.0 3.6 1.4 0.2 setosa \n", "6 5.4 3.9 1.7 0.4 setosa \n", "7 4.6 3.4 1.4 0.3 setosa " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "149" ], "text/latex": [ "149" ], "text/markdown": [ "149" ], "text/plain": [ "[1] 149" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "iris.na.omit <- na.omit(iris.na)\n", "head(iris.na.omit)\n", "nrow(iris.na.omit)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "4.3.3" } }, "nbformat": 4, "nbformat_minor": 4 }