{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Marginal Plots\n", "\n", "A marginal plot is a scatterplot (sometimes a density plot or other bivariate plot) that has histograms, boxplots, or other distribution visualization layers in the margins of the x- and y-axes. \n", "\n", "It allows studying the relationship between 2 numeric variables. \n", "\n", "You can use the `ggmarginal()` function to add marginal layers to a plot." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "from lets_plot import *\n", "from lets_plot.mapping import as_discrete\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# The default plot theme and palette.\n", "LetsPlot.set_theme(theme_light())\n", "palette = scale_color_manual(values=[\"#394449\", \"#F7C443\"]) + scale_fill_manual(values=[\"#394449\", \"#F7C443\"])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "np.random.seed(0)\n", "\n", "cov0=[[1, -.8], \n", " [-.8, 1]] \n", "cov1=[[ 10, .1],\n", " [.1, .1]]\n", "\n", "x0, y0 = np.random.multivariate_normal(mean=[-2,0], cov=cov0, size=200).T\n", "x1, y1 = np.random.multivariate_normal(mean=[0,1], cov=cov1, size=200).T\n", "\n", "data = dict(\n", " x = np.concatenate((x0,x1)),\n", " y = np.concatenate((y0,y1)),\n", " c = [\"A\"]*200 + [\"B\"]*200\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p = ggplot(data, aes(\"x\", \"y\", color=\"c\", fill=\"c\")) + geom_point() + palette\n", "p" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Add marginal layers to any side on the plot: \n", "# - [l]eft \n", "# - [r]ight \n", "# - [t]op \n", "# - [b]ottom \n", "\n", "p + ggmarginal(\"tr\", layer=geom_histogram(color=\"white\")) \\\n", " + ggmarginal(\"lb\", layer=geom_density(size=0))\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Set constant colors to disable the grouping.\n", "\n", "p_hist = (p \n", " + ggmarginal(\"tr\", layer=geom_histogram(fill=\"gray\", color=\"white\"))\n", " + ggmarginal(\"tr\", layer=geom_density(fill=\"rgba(0,0,0,0)\", color=\"red\")))\n", "p_hist" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Add marginal boxplots.\n", "# Make margins smaller using the parameter `size`.\n", "\n", "p_hist + ggmarginal(\"lb\", size=0.03, layer=geom_boxplot(fill=\"white\", color=\"black\")) \n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Combine marginal violin and boxplot.\n", "# Make margins much larger.\n", "\n", "p + ggmarginal(\"rb\", size=0.4, layer=geom_violin(trim=False, color=\"black\")) \\\n", " + ggmarginal(\"rb\", layer=geom_boxplot(aes(group=\"c\"), fill=\"white\", color=\"white\", \n", " alpha=.25, outlier_color=\"red\", width=.2))\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Show marginal densities on a 2D-density plot.\n", "\n", "ggplot(data, aes(\"x\", \"y\")) + geom_density2df(aes(fill=\"..level..\")) + coord_cartesian() \\\n", " + ggmarginal(\"tr\", layer=geom_area(stat=\"density\"))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0manufacturermodeldisplyearcyltransdrvctyhwyflclass
01audia41.819994auto(l5)f1829pcompact
12audia41.819994manual(m5)f2129pcompact
23audia42.020084manual(m6)f2031pcompact
\n", "
" ], "text/plain": [ " Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy \\\n", "0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 \n", "1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 \n", "2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 \n", "\n", " fl class \n", "0 p compact \n", "1 p compact \n", "2 p compact " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# A marginal plot with a significant number of groups on margins. \n", "\n", "mpg = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")\n", "mpg.head(3)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot_settings = (ggsize(900, 700) +\n", " theme(plot_background=element_rect(fill=\"#eaeaea\"), \n", " legend_background=element_rect(fill=\"#eaeaea\")))\n", "\n", "(ggplot(mpg, aes(\"cty\", \"hwy\"))\n", " + geom_point(aes(fill=\"manufacturer\"), size=7, shape=21, alpha=.5, color=\"black\", position=position_jitter(seed=42))\n", " + ggmarginal(\"tr\", size=0.3, \n", " layer=geom_boxplot(aes(fill=as_discrete(\"manufacturer\", order_by=\"..lower..\")), width=0.5))\n", " + plot_settings) \n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.13" } }, "nbformat": 4, "nbformat_minor": 4 }