{ "cells": [ { "cell_type": "markdown", "id": "5a3851c8", "metadata": {}, "source": [ "# Ridgeline plot" ] }, { "cell_type": "markdown", "id": "86fe154a", "metadata": {}, "source": [ "## Preparation" ] }, { "cell_type": "code", "execution_count": 1, "id": "7dfbf1c6", "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:30:35.655139Z", "iopub.status.busy": "2024-04-17T07:30:35.655049Z", "iopub.status.idle": "2024-04-17T07:30:35.974958Z", "shell.execute_reply": "2024-04-17T07:30:35.974656Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "\n", "from lets_plot import *\n", "from lets_plot.mapping import as_discrete\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "id": "bbcfcdc4", "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:30:35.976159Z", "iopub.status.busy": "2024-04-17T07:30:35.976053Z", "iopub.status.idle": "2024-04-17T07:30:36.108058Z", "shell.execute_reply": "2024-04-17T07:30:36.107865Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(150, 5)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n", "
" ], "text/plain": [ " sepal_length sepal_width petal_length petal_width species\n", "0 5.1 3.5 1.4 0.2 setosa\n", "1 4.9 3.0 1.4 0.2 setosa\n", "2 4.7 3.2 1.3 0.2 setosa\n", "3 4.6 3.1 1.5 0.2 setosa\n", "4 5.0 3.6 1.4 0.2 setosa" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iris_df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/iris.csv\")\n", "print(iris_df.shape)\n", "iris_df.head()" ] }, { "cell_type": "code", "execution_count": 3, "id": "697027b8", "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:30:36.109086Z", "iopub.status.busy": "2024-04-17T07:30:36.109014Z", "iopub.status.idle": "2024-04-17T07:30:36.251249Z", "shell.execute_reply": "2024-04-17T07:30:36.250959Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(234, 12)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0manufacturermodeldisplyearcyltransdrvctyhwyflclass
01audia41.819994auto(l5)f1829pcompact
12audia41.819994manual(m5)f2129pcompact
23audia42.020084manual(m6)f2031pcompact
34audia42.020084auto(av)f2130pcompact
45audia42.819996auto(l5)f1626pcompact
\n", "
" ], "text/plain": [ " Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy \\\n", "0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 \n", "1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 \n", "2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 \n", "3 4 audi a4 2.0 2008 4 auto(av) f 21 30 \n", "4 5 audi a4 2.8 1999 6 auto(l5) f 16 26 \n", "\n", " fl class \n", "0 p compact \n", "1 p compact \n", "2 p compact \n", "3 p compact \n", "4 p compact " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mpg_df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")\n", "print(mpg_df.shape)\n", "mpg_df.head()" ] }, { "cell_type": "markdown", "id": "b59a2ac7", "metadata": {}, "source": [ "## Plots" ] }, { "cell_type": "markdown", "id": "8dd5f2e8", "metadata": {}, "source": [ "### Default plot" ] }, { "cell_type": "code", "execution_count": 4, "id": "bbb2109a", "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:30:36.252193Z", "iopub.status.busy": "2024-04-17T07:30:36.252120Z", "iopub.status.idle": "2024-04-17T07:30:36.299299Z", "shell.execute_reply": "2024-04-17T07:30:36.299037Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(iris_df, aes(\"sepal_length\", \"species\")) + \\\n", " geom_area_ridges()" ] }, { "cell_type": "markdown", "id": "5d7a7f35", "metadata": {}, "source": [ "### `min_height` parameter" ] }, { "cell_type": "code", "execution_count": 5, "id": "9dffec45", "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:30:36.300562Z", "iopub.status.busy": "2024-04-17T07:30:36.300410Z", "iopub.status.idle": "2024-04-17T07:30:36.302718Z", "shell.execute_reply": "2024-04-17T07:30:36.302514Z" } }, "outputs": [], "source": [ "df = pd.DataFrame({\n", " \"x\": [1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7],\n", " \"y\": [-.4, -.4, -.4, -.4, -.4, -.4, -.4, -.8, -.8, -.8, -.8, -.8, -.8, -.8],\n", " \"h\": [.4, -.2, .6, -.8, .3, .1, .7, .1, .3, .1, -.6, -.1, -.3, -.1],\n", "})" ] }, { "cell_type": "code", "execution_count": 6, "id": "f540a0f4", "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:30:36.303751Z", "iopub.status.busy": "2024-04-17T07:30:36.303662Z", "iopub.status.idle": "2024-04-17T07:30:36.306604Z", "shell.execute_reply": "2024-04-17T07:30:36.306410Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df) + \\\n", " geom_area_ridges(aes(\"x\", \"y\", height=\"h\"), stat='identity', color=\"black\", fill=\"#3182bd\", min_height=-.4)" ] }, { "cell_type": "markdown", "id": "dfff5e9b", "metadata": {}, "source": [ "### `trim` and `tails_cutoff` parameters" ] }, { "cell_type": "code", "execution_count": 7, "id": "ae081703", "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:30:36.307746Z", "iopub.status.busy": "2024-04-17T07:30:36.307594Z", "iopub.status.idle": "2024-04-17T07:30:36.374821Z", "shell.execute_reply": "2024-04-17T07:30:36.374538Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "width, height = 400, 300\n", "bunch = GGBunch()\n", "bunch.add_plot(ggplot(iris_df, aes(\"sepal_length\", \"species\")) + \\\n", " geom_area_ridges(trim=False, tails_cutoff=None) + \\\n", " ggtitle(\"Default: trim=False, tails_cutoff=None\"),\n", " 0, 0, width, height)\n", "bunch.add_plot(ggplot(iris_df, aes(\"sepal_length\", \"species\")) + \\\n", " geom_area_ridges(trim=False, tails_cutoff=0) + \\\n", " ggtitle(\"trim=False, tails_cutoff=0\"),\n", " width, 0, width, height)\n", "bunch.add_plot(ggplot(iris_df, aes(\"sepal_length\", \"species\")) + \\\n", " geom_area_ridges(trim=False, tails_cutoff=3) + \\\n", " ggtitle(\"trim=False, tails_cutoff=3\"),\n", " 0, height, width, height)\n", "bunch.add_plot(ggplot(iris_df, aes(\"sepal_length\", \"species\")) + \\\n", " geom_area_ridges(trim=True) + \\\n", " ggtitle(\"trim=True\"),\n", " width, height, width, height)\n", "bunch.show()" ] }, { "cell_type": "markdown", "id": "66e7aa25", "metadata": {}, "source": [ "### `scale` parameter" ] }, { "cell_type": "code", "execution_count": 8, "id": "d5958892", "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:30:36.376041Z", "iopub.status.busy": "2024-04-17T07:30:36.375929Z", "iopub.status.idle": "2024-04-17T07:30:36.394353Z", "shell.execute_reply": "2024-04-17T07:30:36.394153Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(iris_df, aes(\"sepal_length\", \"species\")) + \\\n", " geom_area_ridges(scale=1.5)" ] }, { "cell_type": "markdown", "id": "e4517238", "metadata": {}, "source": [ "### Quantiles" ] }, { "cell_type": "code", "execution_count": 9, "id": "e7e17610", "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:30:36.395501Z", "iopub.status.busy": "2024-04-17T07:30:36.395353Z", "iopub.status.idle": "2024-04-17T07:30:36.462225Z", "shell.execute_reply": "2024-04-17T07:30:36.461949Z" }, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "quantiles = [.05, .25, .5, .75, .95]\n", "width, height = 400, 300\n", "bunch = GGBunch()\n", "bunch.add_plot(ggplot(iris_df, aes(\"sepal_length\", \"species\")) + \\\n", " geom_area_ridges(quantiles=quantiles, color='black'),\n", " 0, 0, width, height)\n", "bunch.add_plot(ggplot(iris_df, aes(\"sepal_length\", \"species\")) + \\\n", " geom_area_ridges(quantiles=quantiles, quantile_lines=True, color='black'),\n", " width, 0, width, height)\n", "bunch.add_plot(ggplot(iris_df, aes(\"sepal_length\", \"species\")) + \\\n", " geom_area_ridges(aes(fill=\"..quantile..\"), quantiles=quantiles, color='black', show_legend=False),\n", " 0, height, width, height)\n", "bunch.add_plot(ggplot(iris_df, aes(\"sepal_length\", \"species\")) + \\\n", " geom_area_ridges(aes(fill=\"..quantile..\"), quantiles=quantiles, quantile_lines=True, color='black', show_legend=False),\n", " width, height, width, height)\n", "bunch.show()\n" ] }, { "cell_type": "markdown", "id": "07ad0b8d", "metadata": {}, "source": [ "### Other" ] }, { "cell_type": "code", "execution_count": 10, "id": "2c960426", "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:30:36.463528Z", "iopub.status.busy": "2024-04-17T07:30:36.463335Z", "iopub.status.idle": "2024-04-17T07:30:36.482302Z", "shell.execute_reply": "2024-04-17T07:30:36.482081Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(iris_df, aes(\"sepal_length\", \"species\")) + \\\n", " geom_area_ridges(kernel=\"triangular\", adjust=.8, quantile_lines=True, trim='all', \\\n", " color=\"#993404\", fill=\"#fe9929\", tooltips=layer_tooltips().line(\"height|@..height..\")\\\n", " .format(\"@..density..\", \".2f\").line(\"density|@..density..\")\\\n", " .line(\"quantile|@..quantile..\"))" ] }, { "cell_type": "code", "execution_count": 11, "id": "aeabe120", "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:30:36.483400Z", "iopub.status.busy": "2024-04-17T07:30:36.483265Z", "iopub.status.idle": "2024-04-17T07:30:36.517143Z", "shell.execute_reply": "2024-04-17T07:30:36.516942Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes(\"hwy\", as_discrete(\"year\"), fill=\"drv\")) + \\\n", " geom_area_ridges(color=\"white\", alpha=.5) + \\\n", " facet_grid(x=\"drv\") + \\\n", " theme_bw() + flavor_darcula()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }