{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# `\"boxplotOutlier\"` Statistics\n", "\n", "Computes outlier values on boxplot chart but can be used in alternative visualizations as well:\n", "\n", "```\n", "stat = Stat.boxplotOutlier()\n", "```" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%useLatestDescriptors\n", "%use lets-plot\n", "%use dataframe" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Lets-Plot Kotlin API v.4.4.2. Frontend: Notebook with dynamically loaded JS. Lets-Plot JS v.4.0.0." ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "LetsPlot.getInfo()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "application/kotlindataframe+json": "{\"nrow\":3,\"ncol\":12,\"columns\":[\"untitled\",\"manufacturer\",\"model\",\"displ\",\"year\",\"cyl\",\"trans\",\"drv\",\"cty\",\"hwy\",\"fl\",\"class\"],\"kotlin_dataframe\":[{\"untitled\":1,\"manufacturer\":\"audi\",\"model\":\"a4\",\"displ\":1.8,\"year\":1999,\"cyl\":4,\"trans\":\"auto(l5)\",\"drv\":\"f\",\"cty\":18,\"hwy\":29,\"fl\":\"p\",\"class\":\"compact\"},{\"untitled\":2,\"manufacturer\":\"audi\",\"model\":\"a4\",\"displ\":1.8,\"year\":1999,\"cyl\":4,\"trans\":\"manual(m5)\",\"drv\":\"f\",\"cty\":21,\"hwy\":29,\"fl\":\"p\",\"class\":\"compact\"},{\"untitled\":3,\"manufacturer\":\"audi\",\"model\":\"a4\",\"displ\":2.0,\"year\":2008,\"cyl\":4,\"trans\":\"manual(m6)\",\"drv\":\"f\",\"cty\":20,\"hwy\":31,\"fl\":\"p\",\"class\":\"compact\"}]}", "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "\n", "

DataFrame: rowsCount = 3, columnsCount = 12

\n", " \n", " \n", " " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val mpg = DataFrame.readCSV(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")\n", "mpg.head(3)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "val p = letsPlot(mpg.toMap()) { y = \"hwy\" } + scaleColorViridis(option = \"magma\", end = 0.8) + ggsize(700, 400)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "// Ordering by variable \"..middle..\" when using stat \"boxplot\" or \"boxplotOutlier\".\n", "val classByMiddle = asDiscrete(\"class\", orderBy = \"..middle..\", order = 1)\n", "\n", "// Equivalent ordering by variable \"..y..\" when using `statSummary()`.\n", "val classByY = asDiscrete(\"class\", orderBy = \"..y..\", order = 1)\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p + geomBoxplot() { x = classByMiddle; color = \"..middle..\" }" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1. Show Just Outliers\n", "\n", "Use `stat = Stat.boxplotOutlier()`." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val outliers = geomPoint(stat = Stat.boxplotOutlier()) { x = classByMiddle; color = \"..middle..\" }\n", "\n", "p + outliers" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 2. Add Ribbons and Mid-points" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val ribbon1 = geomRibbon(stat = Stat.boxplot()) { x = classByMiddle; ymin = \"..ymin..\"; ymax = \"..ymax..\" }\n", "\n", "val ribbon2 = geomRibbon(stat = Stat.boxplot()) { x = classByMiddle; ymin = \"..lower..\"; ymax = \"..upper..\" }\n", "\n", "val midPoints = statSummary(fn = \"mq\", geom = Geom.point(), shape = 15, size = 6) { x = classByY; color = \"..y..\" }\n", "\n", "p + ribbon1 + ribbon2 + midPoints + outliers + labs(color=\"Middle\")" ] } ], "metadata": { "kernelspec": { "display_name": "Kotlin", "language": "kotlin", "name": "kotlin" }, "language_info": { "codemirror_mode": "text/x-kotlin", "file_extension": ".kt", "mimetype": "text/x-kotlin", "name": "kotlin", "nbconvert_exporter": "", "pygments_lexer": "kotlin", "version": "1.8.20" } }, "nbformat": 4, "nbformat_minor": 4 }