{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Marginal Plots\n", "\n", "A marginal plot is a scatterplot (sometimes a density plot or other bivariate plot) that has histograms, boxplots, or other distribution visualization layers in the margins of the x- and y-axes. \n", "\n", "It allows studying the relationship between 2 numeric variables. \n", "\n", "You can use the `ggmarginal()` function to add marginal layers to a plot." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.371207Z", "iopub.status.busy": "2025-11-05T13:40:05.371123Z", "iopub.status.idle": "2025-11-05T13:40:05.374204Z", "shell.execute_reply": "2025-11-05T13:40:05.374014Z" } }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "from lets_plot import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.375100Z", "iopub.status.busy": "2025-11-05T13:40:05.375029Z", "iopub.status.idle": "2025-11-05T13:40:05.376901Z", "shell.execute_reply": "2025-11-05T13:40:05.376586Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.389918Z", "iopub.status.busy": "2025-11-05T13:40:05.389818Z", "iopub.status.idle": "2025-11-05T13:40:05.391544Z", "shell.execute_reply": "2025-11-05T13:40:05.391248Z" } }, "outputs": [], "source": [ "# The default plot theme and palette.\n", "LetsPlot.set_theme(theme_light())\n", "palette = scale_color_manual(values=[\"#394449\", \"#F7C443\"]) + scale_fill_manual(values=[\"#394449\", \"#F7C443\"])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.392195Z", "iopub.status.busy": "2025-11-05T13:40:05.392124Z", "iopub.status.idle": "2025-11-05T13:40:05.394365Z", "shell.execute_reply": "2025-11-05T13:40:05.394173Z" } }, "outputs": [], "source": [ "np.random.seed(0)\n", "\n", "cov0 = [[1, -.8],\n", " [-.8, 1]]\n", "cov1 = [[10, .1],\n", " [.1, .1]]\n", "\n", "x0, y0 = np.random.multivariate_normal(mean=[-2, 0], cov=cov0, size=200).T\n", "x1, y1 = np.random.multivariate_normal(mean=[0, 1], cov=cov1, size=200).T\n", "\n", "data = dict(\n", " x=np.concatenate((x0, x1)),\n", " y=np.concatenate((y0, y1)),\n", " c=[\"A\"] * 200 + [\"B\"] * 200\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.394957Z", "iopub.status.busy": "2025-11-05T13:40:05.394885Z", "iopub.status.idle": "2025-11-05T13:40:05.425451Z", "shell.execute_reply": "2025-11-05T13:40:05.425245Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p = ggplot(data, aes(\"x\", \"y\", color=\"c\", fill=\"c\")) + geom_point() + palette\n", "p" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.426398Z", "iopub.status.busy": "2025-11-05T13:40:05.426299Z", "iopub.status.idle": "2025-11-05T13:40:05.466685Z", "shell.execute_reply": "2025-11-05T13:40:05.466447Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Add marginal layers to any side on the plot:\n", "# - [l]eft\n", "# - [r]ight\n", "# - [t]op\n", "# - [b]ottom\n", "\n", "p + ggmarginal(\"tr\", layer=geom_histogram(color=\"white\")) + \\\n", " ggmarginal(\"lb\", layer=geom_density(size=0))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.467603Z", "iopub.status.busy": "2025-11-05T13:40:05.467523Z", "iopub.status.idle": "2025-11-05T13:40:05.494608Z", "shell.execute_reply": "2025-11-05T13:40:05.494401Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Set constant colors to disable the grouping.\n", "\n", "p_hist = p + \\\n", " ggmarginal(\"tr\", layer=geom_histogram(fill=\"gray\", color=\"white\")) + \\\n", " ggmarginal(\"tr\", layer=geom_density(fill=\"rgba(0,0,0,0)\", color=\"red\"))\n", "p_hist" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.495499Z", "iopub.status.busy": "2025-11-05T13:40:05.495416Z", "iopub.status.idle": "2025-11-05T13:40:05.522593Z", "shell.execute_reply": "2025-11-05T13:40:05.522388Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Add marginal boxplots.\n", "# Make margins smaller using the parameter `size`.\n", "\n", "p_hist + ggmarginal(\"lb\", size=.03, layer=geom_boxplot(fill=\"white\", color=\"black\"))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.523448Z", "iopub.status.busy": "2025-11-05T13:40:05.523373Z", "iopub.status.idle": "2025-11-05T13:40:05.565503Z", "shell.execute_reply": "2025-11-05T13:40:05.565294Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Combine marginal violin and boxplot.\n", "# Make margins much larger.\n", "\n", "p + ggmarginal(\"rb\", size=.4, layer=geom_violin(trim=False, color=\"black\")) + \\\n", " ggmarginal(\"rb\", layer=geom_boxplot(aes(group=\"c\"), fill=\"white\", color=\"white\",\n", " alpha=.25, outlier_color=\"red\", width=.2))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.566389Z", "iopub.status.busy": "2025-11-05T13:40:05.566312Z", "iopub.status.idle": "2025-11-05T13:40:05.608966Z", "shell.execute_reply": "2025-11-05T13:40:05.608754Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Set the sum of the layers as marginals.\n", "\n", "p + ggmarginal(\"rb\", size=.2, layer=geom_violin() + \\\n", " geom_boxplot(aes(group=\"c\"), fill=\"white\", color=\"black\", alpha=.5, width=.2))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.609887Z", "iopub.status.busy": "2025-11-05T13:40:05.609810Z", "iopub.status.idle": "2025-11-05T13:40:05.741557Z", "shell.execute_reply": "2025-11-05T13:40:05.741315Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Show marginal densities on a 2D-density plot.\n", "\n", "ggplot(data, aes(\"x\", \"y\")) + geom_density2df(aes(fill=\"..level..\")) + coord_cartesian() + \\\n", " ggmarginal(\"tr\", layer=geom_area(stat=\"density\"))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.744017Z", "iopub.status.busy": "2025-11-05T13:40:05.743928Z", "iopub.status.idle": "2025-11-05T13:40:05.966112Z", "shell.execute_reply": "2025-11-05T13:40:05.965904Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0manufacturermodeldisplyearcyltransdrvctyhwyflclass
01audia41.819994auto(l5)f1829pcompact
12audia41.819994manual(m5)f2129pcompact
23audia42.020084manual(m6)f2031pcompact
34audia42.020084auto(av)f2130pcompact
45audia42.819996auto(l5)f1626pcompact
\n", "
" ], "text/plain": [ " Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy \\\n", "0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 \n", "1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 \n", "2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 \n", "3 4 audi a4 2.0 2008 4 auto(av) f 21 30 \n", "4 5 audi a4 2.8 1999 6 auto(l5) f 16 26 \n", "\n", " fl class \n", "0 p compact \n", "1 p compact \n", "2 p compact \n", "3 p compact \n", "4 p compact " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# A marginal plot with a significant number of groups on margins.\n", "\n", "df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:40:05.966951Z", "iopub.status.busy": "2025-11-05T13:40:05.966875Z", "iopub.status.idle": "2025-11-05T13:40:05.979207Z", "shell.execute_reply": "2025-11-05T13:40:05.979011Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot_settings = ggsize(900, 700) + \\\n", " theme(plot_background=element_rect(fill=\"#eaeaea\"),\n", " legend_background=element_rect(fill=\"#eaeaea\"))\n", "\n", "ggplot(df, aes(\"cty\", \"hwy\")) + \\\n", " geom_point(aes(fill=\"manufacturer\"), size=7, shape=21, alpha=.5, color=\"black\", position=position_jitter(seed=42)) + \\\n", " ggmarginal(\"tr\", size=.3,\n", " layer=geom_boxplot(aes(fill=as_discrete(\"manufacturer\", order_by=\"..lower..\")), width=0.5)) + \\\n", " plot_settings" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 4 }