{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## NA and Outlier analysis"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading required package: daltoolbox\n",
"\n",
"Registered S3 method overwritten by 'quantmod':\n",
" method from\n",
" as.zoo.data.frame zoo \n",
"\n",
"\n",
"Attaching package: ‘daltoolbox’\n",
"\n",
"\n",
"The following object is masked from ‘package:base’:\n",
"\n",
" transform\n",
"\n",
"\n"
]
}
],
"source": [
"# DAL ToolBox\n",
"# version 1.01.727\n",
"\n",
"source(\"https://raw.githubusercontent.com/cefet-rj-dal/daltoolbox/main/jupyter.R\")\n",
"\n",
"#loading DAL\n",
"load_library(\"daltoolbox\") "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### NA removal"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"A data.frame: 6 × 5\n",
"\n",
"\t | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
\n",
"\t | <dbl> | <dbl> | <dbl> | <dbl> | <fct> |
\n",
"\n",
"\n",
"\t1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
\n",
"\t2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
\n",
"\t3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
\n",
"\t4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
\n",
"\t5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
\n",
"\t6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
\n",
"\n",
"
\n"
],
"text/latex": [
"A data.frame: 6 × 5\n",
"\\begin{tabular}{r|lllll}\n",
" & Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\\\\n",
" & & & & & \\\\\n",
"\\hline\n",
"\t1 & 5.1 & 3.5 & 1.4 & 0.2 & setosa\\\\\n",
"\t2 & 4.9 & 3.0 & 1.4 & 0.2 & setosa\\\\\n",
"\t3 & 4.7 & 3.2 & 1.3 & 0.2 & setosa\\\\\n",
"\t4 & 4.6 & 3.1 & 1.5 & 0.2 & setosa\\\\\n",
"\t5 & 5.0 & 3.6 & 1.4 & 0.2 & setosa\\\\\n",
"\t6 & 5.4 & 3.9 & 1.7 & 0.4 & setosa\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 5\n",
"\n",
"| | Sepal.Length <dbl> | Sepal.Width <dbl> | Petal.Length <dbl> | Petal.Width <dbl> | Species <fct> |\n",
"|---|---|---|---|---|---|\n",
"| 1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |\n",
"| 2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |\n",
"| 3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |\n",
"| 4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |\n",
"| 5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |\n",
"| 6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |\n",
"\n"
],
"text/plain": [
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n",
"1 5.1 3.5 1.4 0.2 setosa \n",
"2 4.9 3.0 1.4 0.2 setosa \n",
"3 4.7 3.2 1.3 0.2 setosa \n",
"4 4.6 3.1 1.5 0.2 setosa \n",
"5 5.0 3.6 1.4 0.2 setosa \n",
"6 5.4 3.9 1.7 0.4 setosa "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"150"
],
"text/latex": [
"150"
],
"text/markdown": [
"150"
],
"text/plain": [
"[1] 150"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"iris <- datasets::iris\n",
"head(iris)\n",
"nrow(iris)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"A data.frame: 6 × 5\n",
"\n",
"\t | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
\n",
"\t | <dbl> | <dbl> | <dbl> | <dbl> | <fct> |
\n",
"\n",
"\n",
"\t1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
\n",
"\t2 | NA | 3.0 | 1.4 | 0.2 | setosa |
\n",
"\t3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
\n",
"\t4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
\n",
"\t5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
\n",
"\t6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
\n",
"\n",
"
\n"
],
"text/latex": [
"A data.frame: 6 × 5\n",
"\\begin{tabular}{r|lllll}\n",
" & Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\\\\n",
" & & & & & \\\\\n",
"\\hline\n",
"\t1 & 5.1 & 3.5 & 1.4 & 0.2 & setosa\\\\\n",
"\t2 & NA & 3.0 & 1.4 & 0.2 & setosa\\\\\n",
"\t3 & 4.7 & 3.2 & 1.3 & 0.2 & setosa\\\\\n",
"\t4 & 4.6 & 3.1 & 1.5 & 0.2 & setosa\\\\\n",
"\t5 & 5.0 & 3.6 & 1.4 & 0.2 & setosa\\\\\n",
"\t6 & 5.4 & 3.9 & 1.7 & 0.4 & setosa\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 5\n",
"\n",
"| | Sepal.Length <dbl> | Sepal.Width <dbl> | Petal.Length <dbl> | Petal.Width <dbl> | Species <fct> |\n",
"|---|---|---|---|---|---|\n",
"| 1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |\n",
"| 2 | NA | 3.0 | 1.4 | 0.2 | setosa |\n",
"| 3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |\n",
"| 4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |\n",
"| 5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |\n",
"| 6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |\n",
"\n"
],
"text/plain": [
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n",
"1 5.1 3.5 1.4 0.2 setosa \n",
"2 NA 3.0 1.4 0.2 setosa \n",
"3 4.7 3.2 1.3 0.2 setosa \n",
"4 4.6 3.1 1.5 0.2 setosa \n",
"5 5.0 3.6 1.4 0.2 setosa \n",
"6 5.4 3.9 1.7 0.4 setosa "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"150"
],
"text/latex": [
"150"
],
"text/markdown": [
"150"
],
"text/plain": [
"[1] 150"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#introducing a NA to remove\n",
"iris.na <- iris\n",
"iris.na$Sepal.Length[2] <- NA\n",
"head(iris.na)\n",
"nrow(iris.na)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### removing NA tuples"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"A data.frame: 6 × 5\n",
"\n",
"\t | Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
\n",
"\t | <dbl> | <dbl> | <dbl> | <dbl> | <fct> |
\n",
"\n",
"\n",
"\t1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
\n",
"\t3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
\n",
"\t4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
\n",
"\t5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
\n",
"\t6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
\n",
"\t7 | 4.6 | 3.4 | 1.4 | 0.3 | setosa |
\n",
"\n",
"
\n"
],
"text/latex": [
"A data.frame: 6 × 5\n",
"\\begin{tabular}{r|lllll}\n",
" & Sepal.Length & Sepal.Width & Petal.Length & Petal.Width & Species\\\\\n",
" & & & & & \\\\\n",
"\\hline\n",
"\t1 & 5.1 & 3.5 & 1.4 & 0.2 & setosa\\\\\n",
"\t3 & 4.7 & 3.2 & 1.3 & 0.2 & setosa\\\\\n",
"\t4 & 4.6 & 3.1 & 1.5 & 0.2 & setosa\\\\\n",
"\t5 & 5.0 & 3.6 & 1.4 & 0.2 & setosa\\\\\n",
"\t6 & 5.4 & 3.9 & 1.7 & 0.4 & setosa\\\\\n",
"\t7 & 4.6 & 3.4 & 1.4 & 0.3 & setosa\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"A data.frame: 6 × 5\n",
"\n",
"| | Sepal.Length <dbl> | Sepal.Width <dbl> | Petal.Length <dbl> | Petal.Width <dbl> | Species <fct> |\n",
"|---|---|---|---|---|---|\n",
"| 1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |\n",
"| 3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |\n",
"| 4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |\n",
"| 5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |\n",
"| 6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |\n",
"| 7 | 4.6 | 3.4 | 1.4 | 0.3 | setosa |\n",
"\n"
],
"text/plain": [
" Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n",
"1 5.1 3.5 1.4 0.2 setosa \n",
"3 4.7 3.2 1.3 0.2 setosa \n",
"4 4.6 3.1 1.5 0.2 setosa \n",
"5 5.0 3.6 1.4 0.2 setosa \n",
"6 5.4 3.9 1.7 0.4 setosa \n",
"7 4.6 3.4 1.4 0.3 setosa "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"149"
],
"text/latex": [
"149"
],
"text/markdown": [
"149"
],
"text/plain": [
"[1] 149"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"iris.na.omit <- na.omit(iris.na)\n",
"head(iris.na.omit)\n",
"nrow(iris.na.omit)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "4.3.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}