{ "cells": [ { "cell_type": "markdown", "id": "f6b68dfa-a402-4c30-802b-a8cbaec69a7b", "metadata": {}, "source": [ "# Palmer Penguins\n", "\n", "This notebook is inspired by an example [Radar chart with ggradar](https://r-graph-gallery.com/web-radar-chart-with-R.html)." ] }, { "cell_type": "code", "execution_count": 1, "id": "aebbd473-7db1-4144-8bae-a61aefc67e9d", "metadata": { "execution": { "iopub.execute_input": "2024-11-01T20:41:59.524668Z", "iopub.status.busy": "2024-11-01T20:41:59.522659Z", "iopub.status.idle": "2024-11-01T20:42:02.350686Z", "shell.execute_reply": "2024-11-01T20:42:02.350441Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%useLatestDescriptors\n", "%use dataframe\n", "%use lets-plot" ] }, { "cell_type": "code", "execution_count": 2, "id": "4cc8902f-7761-43ef-9fa9-26c72c2b8f00", "metadata": { "execution": { "iopub.execute_input": "2024-11-01T20:42:02.352811Z", "iopub.status.busy": "2024-11-01T20:42:02.352601Z", "iopub.status.idle": "2024-11-01T20:42:02.740735Z", "shell.execute_reply": "2024-11-01T20:42:02.740460Z" } }, "outputs": [], "source": [ "fun rescaleInGroupDataFrame(df: DataFrame<*>, valueCol: String, groupCol: String, rescaledCol: String? = null): DataFrame<*> {\n", " val rescaledColName = if (rescaledCol == null) {\n", " \"rescaled_${valueCol}\"\n", " } else {\n", " rescaledCol\n", " }\n", " fun rescaleSubDataFrame(subDf: DataFrame<*>): DataFrame<*> {\n", " val minValue = subDf.minByOrNull(valueCol)?.let { it[valueCol] } as Double\n", " val maxValue = subDf.maxByOrNull(valueCol)?.let { it[valueCol] } as Double\n", " return subDf.add(rescaledColName) { (valueCol() - minValue) / (maxValue - minValue) }\n", " }\n", " return df.select(groupCol).distinct().map { v -> rescaleSubDataFrame(df.filter { groupCol() == v[groupCol] }) }.concat()\n", "}\n", "\n", "fun getData(): DataFrame<*> {\n", " val df = DataFrame.readCSV(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/penguins.csv\")\n", " .dropNulls()\n", " .rename(\"bill_length_mm\", \"bill_depth_mm\", \"flipper_length_mm\", \"body_mass_g\")\n", " .into(\"avg. bill length\", \"avg. bill depth\", \"avg. flipper length\", \"avg. body mass\")\n", " .groupBy(\"species\")\n", " .mean()\n", " .gather(\"avg. bill length\", \"avg. bill depth\", \"avg. flipper length\", \"avg. body mass\")\n", " .into(\"variable\", \"value\")\n", " .update { \"value\"() }\n", " .where { \"variable\"() == \"avg. body mass\" }\n", " .with { it / 1000 }\n", " .add(\"units\") {\n", " when (\"variable\"()) {\n", " \"avg. body mass\" -> \"kg\"\n", " else -> \"mm\"\n", " }\n", " }\n", " return rescaleInGroupDataFrame(df, \"value\", \"variable\")\n", " .add(\"rescaled_value_pct\") { floor(100 * \"rescaled_value\"()) }\n", " .convert { \"rescaled_value_pct\"() }.to()\n", " .sortBy(\"species\")\n", "}" ] }, { "cell_type": "code", "execution_count": 3, "id": "8f01ce8f-807a-457b-a91f-6a68568f883f", "metadata": { "execution": { "iopub.execute_input": "2024-11-01T20:42:02.742218Z", "iopub.status.busy": "2024-11-01T20:42:02.742001Z", "iopub.status.idle": "2024-11-01T20:42:03.307957Z", "shell.execute_reply": "2024-11-01T20:42:03.308080Z" } }, "outputs": [ { "data": { "application/kotlindataframe+json": "{\"nrow\":4,\"ncol\":6,\"columns\":[\"species\",\"variable\",\"value\",\"units\",\"rescaled_value\",\"rescaled_value_pct\"],\"kotlin_dataframe\":[{\"species\":\"Adelie\",\"variable\":\"avg. bill length\",\"value\":38.82397260273974,\"units\":\"mm\",\"rescaled_value\":0.0,\"rescaled_value_pct\":0},{\"species\":\"Adelie\",\"variable\":\"avg. bill depth\",\"value\":18.347260273972594,\"units\":\"mm\",\"rescaled_value\":0.9785838080813327,\"rescaled_value_pct\":97},{\"species\":\"Adelie\",\"variable\":\"avg. flipper length\",\"value\":190.1027397260274,\"units\":\"mm\",\"rescaled_value\":0.0,\"rescaled_value_pct\":0},{\"species\":\"Adelie\",\"variable\":\"avg. body mass\",\"value\":3.7061643835616436,\"units\":\"kg\",\"rescaled_value\":0.0,\"rescaled_value_pct\":0}]}", "text/html": [ " \n", " \n", " \n", " \n", " \n", "
\n", "\n", "

DataFrame: rowsCount = 4, columnsCount = 6

\n", "
speciesvariablevalueunitsrescaled_valuerescaled_value_pct
Adelieavg. bill length38.823973mm0.0000000
Adelieavg. bill depth18.347260mm0.97858497
Adelieavg. flipper length190.102740mm0.0000000
Adelieavg. body mass3.706164kg0.0000000
\n", " \n", " \n", " " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val df = getData()\n", "df.head(4)" ] }, { "cell_type": "code", "execution_count": 4, "id": "cef2ba80-6b5f-4ef2-b40d-8a003d02bec0", "metadata": { "execution": { "iopub.execute_input": "2024-11-01T20:42:03.310133Z", "iopub.status.busy": "2024-11-01T20:42:03.309919Z", "iopub.status.idle": "2024-11-01T20:42:03.410010Z", "shell.execute_reply": "2024-11-01T20:42:03.409804Z" } }, "outputs": [], "source": [ "val fontFamily = \"roboto\"\n", "val axisColor = \"lightgray\"\n", "val axisTextData = mapOf(\n", " \"x\" to List(3) { \"avg. bill length\" },\n", " \"y\" to listOf(0, 50, 100),\n", " \"text\" to listOf(\"0%\", \"50%\", \"100%\")\n", ")\n", "val penguinsColors = mapOf(\n", " \"Adelie\" to \"#ff5a5f\",\n", " \"Chinstrap\" to \"#ffb400\",\n", " \"Gentoo\" to \"#007a87\"\n", ")\n", "val penguinsTooltips = layerTooltips().title(\"@species\").line(\"@variable (@units): @value\").format(\"@value\", \".2~f\")\n", "val penguinsTheme = theme(\n", " text = elementText(family = fontFamily, size = 18),\n", " plotTitle = elementText(size = 28, hjust = .5, face = \"bold\"),\n", " axisTitle = \"blank\", axisTextY = \"blank\", axisLineX = \"blank\",\n", " axisTicks = elementLine(color = axisColor),\n", " panelGrid = elementLine(color = axisColor),\n", " panelInset = Pair(0, 100),\n", " tooltip = elementRect(),\n", " axisTooltip = \"blank\",\n", ").legendPosition(1, 0).legendJustification(1, 0)" ] }, { "cell_type": "code", "execution_count": 5, "id": "07a3e4b7-55ee-47e0-aa88-9d850340c571", "metadata": { "execution": { "iopub.execute_input": "2024-11-01T20:42:03.412136Z", "iopub.status.busy": "2024-11-01T20:42:03.411924Z", "iopub.status.idle": "2024-11-01T20:42:03.621157Z", "shell.execute_reply": "2024-11-01T20:42:03.621281Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "letsPlot(df.toMap()) +\n", " geomArea(position = positionIdentity, flat = true,\n", " size = 2.5, colorBy = \"paint_a\", fillBy = \"paint_a\", alpha = .2)\n", " { x = \"variable\"; y = \"rescaled_value_pct\"; paint_a = \"species\" } +\n", " geomPoint(size = 6, colorBy = \"paint_a\", tooltips = penguinsTooltips)\n", " { x = \"variable\"; y = \"rescaled_value_pct\"; paint_a = \"species\" } +\n", " geomText(data = axisTextData, hjust = 1, fontface = \"bold\", family = fontFamily, size = 10)\n", " { x = \"x\"; y = \"y\"; label = \"text\" } +\n", " scaleXDiscrete() +\n", " scaleManual(\"paint_a\", name = \"\", values = penguinsColors) +\n", " coordPolar(ylim = Pair(-15, 100)) +\n", " ggsize(800, 600) +\n", " ggtitle(\"Penguins species\") +\n", " penguinsTheme + flavorSolarizedLight()" ] } ], "metadata": { "kernelspec": { "display_name": "Kotlin", "language": "kotlin", "name": "kotlin" }, "language_info": { "codemirror_mode": "text/x-kotlin", "file_extension": ".kt", "mimetype": "text/x-kotlin", "name": "kotlin", "nbconvert_exporter": "", "pygments_lexer": "kotlin", "version": "1.9.23" } }, "nbformat": 4, "nbformat_minor": 5 }