{ "cells": [ { "cell_type": "markdown", "id": "awful-desktop", "metadata": {}, "source": [ "### Marginal Plots\n", "\n", "A marginal plot is a scatterplot (sometimes a density plot or other bivariate plot) that has histograms, boxplots, or other distribution visualization layers in the margins of the x- and y-axes. \n", "\n", "It allows studying the relationship between 2 numeric variables. \n", "\n", "You can use the `ggmarginal()` function to add marginal layers to a plot." ] }, { "cell_type": "code", "execution_count": 1, "id": "satisfied-grenada", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%useLatestDescriptors\n", "%use lets-plot\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "tender-tutorial", "metadata": {}, "outputs": [], "source": [ "@file:DependsOn(\"org.apache.commons:commons-math3:3.6.1\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "norman-point", "metadata": {}, "outputs": [], "source": [ "import org.apache.commons.math3.distribution.MultivariateNormalDistribution" ] }, { "cell_type": "code", "execution_count": 4, "id": "mexican-terror", "metadata": {}, "outputs": [], "source": [ "val cov0 : Array = arrayOf(doubleArrayOf(1.0, -.8),\n", " doubleArrayOf(-.8, 1.0))\n", "\n", "val cov1 : Array = arrayOf(doubleArrayOf(10.0, .1),\n", " doubleArrayOf(.1, .1))\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "revolutionary-munich", "metadata": {}, "outputs": [], "source": [ "val n = 200\n", "\n", "val means0 : DoubleArray = doubleArrayOf(-2.0, 0.0)\n", "val means1 : DoubleArray = doubleArrayOf(0.0, 1.0)\n", "\n", "val xy0 = MultivariateNormalDistribution(means0, cov0).sample(n)\n", "val xy1 = MultivariateNormalDistribution(means1, cov1).sample(n)\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "sunrise-skating", "metadata": {}, "outputs": [], "source": [ "val dat = mapOf(\n", " \"x\" to (xy0.map { it[0] } + xy1.map { it[0] }).toList(),\n", " \"y\" to (xy0.map { it[1] } + xy1.map { it[1] }).toList(),\n", " \"c\" to List(n){\"A\"} + List(n){\"B\"},\n", ")\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "material-village", "metadata": {}, "outputs": [], "source": [ "// The default plot theme and palette.\n", "LetsPlot.theme = themeLight()\n", "val paletteColors = listOf(\"#394449\", \"#F7C443\")\n", "val palette = scaleColorManual(values = paletteColors) + scaleFillManual(values = paletteColors)" ] }, { "cell_type": "code", "execution_count": 8, "id": "passing-suicide", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val p = letsPlot(dat.toMap()){x = \"x\"; y = \"y\"; color = \"c\"; fill = \"c\"} + geomPoint() + palette\n", "p" ] }, { "cell_type": "code", "execution_count": 9, "id": "straight-locator", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Add marginal layers to any side on the plot: \n", "// - [l]eft \n", "// - [r]ight \n", "// - [t]op \n", "// - [b]ottom \n", "\n", "p + ggmarginal(\"tr\", layer = geomHistogram(color = \"white\")) +\n", " ggmarginal(\"lb\", layer = geomDensity(size = 0))\n" ] }, { "cell_type": "code", "execution_count": 10, "id": "smooth-lingerie", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Set constant colors to disable the grouping.\n", "\n", "val pHist = p +\n", " ggmarginal(\"tr\", layer=geomHistogram(fill = \"gray\", color = \"white\")) +\n", " ggmarginal(\"tr\", layer=geomDensity(fill = \"rgba(0,0,0,0)\", color = \"red\"))\n", "pHist" ] }, { "cell_type": "code", "execution_count": 11, "id": "unexpected-puppy", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Add marginal boxplots.\n", "// Make margins smaller using the parameter `size`.\n", "\n", "pHist + ggmarginal(\"lb\", size=0.03, layer=geomBoxplot(fill = \"white\", color = \"black\")) \n" ] }, { "cell_type": "code", "execution_count": 12, "id": "grave-dragon", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Combine marginal violin and boxplot.\n", "// Make margins much larger.\n", "\n", "p + ggmarginal(\"rb\", size=0.4, layer=geomViolin(trim = false, color = \"black\")) +\n", " ggmarginal(\"rb\", layer=geomBoxplot(mapping = {group = \"c\"}, fill = \"white\", color = \"white\", \n", " alpha = .25, outlierColor = \"red\", width = .2))\n" ] }, { "cell_type": "code", "execution_count": 13, "id": "north-worthy", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Show marginal densities on a 2D-density plot.\n", "\n", "letsPlot(dat) {x = \"x\"; y = \"y\"} + geomDensity2DFilled {fill = \"..level..\"} + coordCartesian() +\n", " ggmarginal(\"tr\", layer = geomArea(stat = Stat.density()))" ] }, { "cell_type": "code", "execution_count": 14, "id": "bizarre-longer", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "// A marginal plot with a significant number of groups on margins. \n", "\n", "\n", "%use dataframe" ] }, { "cell_type": "code", "execution_count": 15, "id": "stainless-throat", "metadata": {}, "outputs": [ { "data": { "application/kotlindataframe+json": "{\"nrow\":3,\"ncol\":12,\"columns\":[\"untitled\",\"manufacturer\",\"model\",\"displ\",\"year\",\"cyl\",\"trans\",\"drv\",\"cty\",\"hwy\",\"fl\",\"class\"],\"kotlin_dataframe\":[{\"untitled\":1,\"manufacturer\":\"audi\",\"model\":\"a4\",\"displ\":1.8,\"year\":1999,\"cyl\":4,\"trans\":\"auto(l5)\",\"drv\":\"f\",\"cty\":18,\"hwy\":29,\"fl\":\"p\",\"class\":\"compact\"},{\"untitled\":2,\"manufacturer\":\"audi\",\"model\":\"a4\",\"displ\":1.8,\"year\":1999,\"cyl\":4,\"trans\":\"manual(m5)\",\"drv\":\"f\",\"cty\":21,\"hwy\":29,\"fl\":\"p\",\"class\":\"compact\"},{\"untitled\":3,\"manufacturer\":\"audi\",\"model\":\"a4\",\"displ\":2.0,\"year\":2008,\"cyl\":4,\"trans\":\"manual(m6)\",\"drv\":\"f\",\"cty\":20,\"hwy\":31,\"fl\":\"p\",\"class\":\"compact\"}]}", "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "\n", "

DataFrame: rowsCount = 3, columnsCount = 12

\n", " \n", " \n", " " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "var mpg = DataFrame.readCSV(\"https://raw.githubusercontent.com/JetBrains/lets-plot-kotlin/master/docs/examples/data/mpg.csv\")\n", "mpg.head(3)\n" ] }, { "cell_type": "code", "execution_count": 16, "id": "combined-diagram", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val plotSettings = ggsize(900, 700) +\n", " theme(plotBackground = elementRect(fill = \"#eaeaea\"), \n", " legendBackground = elementRect(fill = \"#eaeaea\"))\n", "\n", "(letsPlot(mpg.toMap()) {x = \"cty\"; y = \"hwy\"; fill = \"manufacturer\"}\n", " + geomPoint( size = 7, shape = 21, alpha = .5, color = \"black\", position=positionJitter())\n", " + ggmarginal(\"tr\", size = 0.3, \n", " layer = geomBoxplot(width=0.5) {\n", " fill = asDiscrete(\"manufacturer\", orderBy = \"..lower..\")\n", " })\n", " + plotSettings) \n", " " ] } ], "metadata": { "kernelspec": { "display_name": "Kotlin", "language": "kotlin", "name": "kotlin" }, "language_info": { "codemirror_mode": "text/x-kotlin", "file_extension": ".kt", "mimetype": "text/x-kotlin", "name": "kotlin", "nbconvert_exporter": "", "pygments_lexer": "kotlin", "version": "1.8.20" } }, "nbformat": 4, "nbformat_minor": 5 }