{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.427565Z",
"iopub.status.busy": "2025-11-05T13:44:08.427462Z",
"iopub.status.idle": "2025-11-05T13:44:08.430675Z",
"shell.execute_reply": "2025-11-05T13:44:08.430483Z"
}
},
"outputs": [],
"source": [
"from lets_plot import *"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.431337Z",
"iopub.status.busy": "2025-11-05T13:44:08.431264Z",
"iopub.status.idle": "2025-11-05T13:44:08.432951Z",
"shell.execute_reply": "2025-11-05T13:44:08.432777Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"LetsPlot.setup_html()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.446644Z",
"iopub.status.busy": "2025-11-05T13:44:08.446523Z",
"iopub.status.idle": "2025-11-05T13:44:08.451656Z",
"shell.execute_reply": "2025-11-05T13:44:08.451453Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(400, 2)\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cond | \n",
" rating | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" A | \n",
" -1.085631 | \n",
"
\n",
" \n",
" | 1 | \n",
" A | \n",
" 0.997345 | \n",
"
\n",
" \n",
" | 2 | \n",
" A | \n",
" 0.282978 | \n",
"
\n",
" \n",
" | 3 | \n",
" A | \n",
" -1.506295 | \n",
"
\n",
" \n",
" | 4 | \n",
" A | \n",
" -0.578600 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cond rating\n",
"0 A -1.085631\n",
"1 A 0.997345\n",
"2 A 0.282978\n",
"3 A -1.506295\n",
"4 A -0.578600"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# This example was found at: www.cookbook-r.com/Graphs/Plotting_distributions_(ggplot2)\n",
"def get_data():\n",
" import numpy as np\n",
" import pandas as pd\n",
"\n",
" np.random.seed(123)\n",
"\n",
" return pd.DataFrame(dict(\n",
" cond=np.repeat([\"A\", \"B\"], 200),\n",
" rating=np.concatenate((np.random.normal(0, 1, 200), np.random.normal(.8, 1, 200)))\n",
" ))\n",
"\n",
"df = get_data()\n",
"print(df.shape)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.452450Z",
"iopub.status.busy": "2025-11-05T13:44:08.452377Z",
"iopub.status.idle": "2025-11-05T13:44:08.481198Z",
"shell.execute_reply": "2025-11-05T13:44:08.480992Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Basic histogram of \"rating\"\n",
"p = ggplot(df, aes(x=\"rating\")) + ggsize(500, 250)\n",
"p + geom_histogram(binwidth=.5)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.482231Z",
"iopub.status.busy": "2025-11-05T13:44:08.482152Z",
"iopub.status.idle": "2025-11-05T13:44:08.496189Z",
"shell.execute_reply": "2025-11-05T13:44:08.496004Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Histogram overlaid with kernel density curve\n",
"# - histogram with density instead of count on y-axis\n",
"# - overlay with transparent density plot\n",
"p + \\\n",
" geom_histogram(aes(y='..density..'), binwidth=.5, colour=\"black\", fill=\"white\") + \\\n",
" geom_density(alpha=.2, color=\"#de2d26\", fill=\"#ff6666\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.497146Z",
"iopub.status.busy": "2025-11-05T13:44:08.497074Z",
"iopub.status.idle": "2025-11-05T13:44:08.500461Z",
"shell.execute_reply": "2025-11-05T13:44:08.500269Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"p + \\\n",
" geom_histogram(binwidth=.5, colour=\"black\", fill=\"white\") + \\\n",
" geom_vline(xintercept=df[\"rating\"].mean(), \\\n",
" color=\"red\", linetype='dashed', size=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Histogram and density plots with multiple groups"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.501453Z",
"iopub.status.busy": "2025-11-05T13:44:08.501365Z",
"iopub.status.idle": "2025-11-05T13:44:08.504946Z",
"shell.execute_reply": "2025-11-05T13:44:08.504776Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"p1 = ggplot(df, aes(x=\"rating\", fill=\"cond\")) + ggsize(500, 250)\n",
"\n",
"# Default histogram (stacked)\n",
"p1 + geom_histogram(binwidth=.5)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.506028Z",
"iopub.status.busy": "2025-11-05T13:44:08.505958Z",
"iopub.status.idle": "2025-11-05T13:44:08.509521Z",
"shell.execute_reply": "2025-11-05T13:44:08.509348Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Overlaid histograms\n",
"p1 + geom_histogram(binwidth=.5, alpha=.7, position=\"identity\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.510625Z",
"iopub.status.busy": "2025-11-05T13:44:08.510554Z",
"iopub.status.idle": "2025-11-05T13:44:08.513572Z",
"shell.execute_reply": "2025-11-05T13:44:08.513399Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Interleaved histograms\n",
"p1 + geom_histogram(binwidth=.5, position=\"dodge\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.514432Z",
"iopub.status.busy": "2025-11-05T13:44:08.514364Z",
"iopub.status.idle": "2025-11-05T13:44:08.530945Z",
"shell.execute_reply": "2025-11-05T13:44:08.530744Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Density plot\n",
"p2 = ggplot(df, aes(x=\"rating\", color=\"cond\")) + ggsize(500, 250)\n",
"p2 + geom_density()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.531897Z",
"iopub.status.busy": "2025-11-05T13:44:08.531818Z",
"iopub.status.idle": "2025-11-05T13:44:08.551047Z",
"shell.execute_reply": "2025-11-05T13:44:08.550854Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Density plot with semi-transparent fill\n",
"p2 + geom_density(aes(fill=\"cond\"), alpha=.7)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.551990Z",
"iopub.status.busy": "2025-11-05T13:44:08.551914Z",
"iopub.status.idle": "2025-11-05T13:44:08.555466Z",
"shell.execute_reply": "2025-11-05T13:44:08.555273Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cond | \n",
" rating | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" A | \n",
" 0.003787 | \n",
"
\n",
" \n",
" | 1 | \n",
" B | \n",
" 0.685638 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cond rating\n",
"0 A 0.003787\n",
"1 B 0.685638"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Find the mean of each group\n",
"cdf = df.groupby([\"cond\"], as_index=False).mean()\n",
"cdf.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.556389Z",
"iopub.status.busy": "2025-11-05T13:44:08.556316Z",
"iopub.status.idle": "2025-11-05T13:44:08.561532Z",
"shell.execute_reply": "2025-11-05T13:44:08.561357Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Overlaid histograms with means\n",
"p2 + \\\n",
" geom_histogram(aes(fill=\"cond\"), alpha=.5, position=\"identity\", size=0) + \\\n",
" geom_vline(data=cdf, \\\n",
" mapping=aes(xintercept=\"rating\", color=\"cond\"), \\\n",
" linetype='dashed', size=1)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.562435Z",
"iopub.status.busy": "2025-11-05T13:44:08.562364Z",
"iopub.status.idle": "2025-11-05T13:44:08.566143Z",
"shell.execute_reply": "2025-11-05T13:44:08.565974Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Use frqpoly instead of histogram\n",
"p2 + \\\n",
" geom_freqpoly(aes(fill=\"cond\")) + \\\n",
" geom_vline(data=cdf, \\\n",
" mapping=aes(xintercept=\"rating\", color=\"cond\"), \\\n",
" linetype='dashed', size=1)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.567033Z",
"iopub.status.busy": "2025-11-05T13:44:08.566963Z",
"iopub.status.idle": "2025-11-05T13:44:08.585256Z",
"shell.execute_reply": "2025-11-05T13:44:08.585074Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Density plots with means\n",
"p2 + \\\n",
" geom_density() + \\\n",
" geom_vline(data=cdf, \\\n",
" mapping=aes(xintercept=\"rating\", color=\"cond\"), \\\n",
" linetype='dashed', size=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Using facets"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.586350Z",
"iopub.status.busy": "2025-11-05T13:44:08.586278Z",
"iopub.status.idle": "2025-11-05T13:44:08.590878Z",
"shell.execute_reply": "2025-11-05T13:44:08.590697Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(df, aes(x=\"rating\")) + \\\n",
" geom_histogram(binwidth=.5, colour=\"black\", fill=\"white\") + \\\n",
" facet_grid(\"cond\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.592007Z",
"iopub.status.busy": "2025-11-05T13:44:08.591928Z",
"iopub.status.idle": "2025-11-05T13:44:08.595958Z",
"shell.execute_reply": "2025-11-05T13:44:08.595779Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# With mean lines, using 'cdat' computed earlier.\n",
"ggplot(df, aes(x=\"rating\")) + \\\n",
" geom_histogram(binwidth=.5, colour=\"black\", fill=\"white\") + \\\n",
" geom_vline(data=cdf, \\\n",
" mapping=aes(xintercept=\"rating\"), \\\n",
" linetype='dashed', size=1, colour=\"red\") + \\\n",
" facet_grid(None, \"cond\") + \\\n",
" ggsize(500, 250)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Box plots"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.597035Z",
"iopub.status.busy": "2025-11-05T13:44:08.596963Z",
"iopub.status.idle": "2025-11-05T13:44:08.600513Z",
"shell.execute_reply": "2025-11-05T13:44:08.600337Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# A basic box plot\n",
"p3 = ggplot(df, aes(x=\"cond\", y=\"rating\")) + ggsize(400, 300)\n",
"p3 + geom_boxplot()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.601541Z",
"iopub.status.busy": "2025-11-05T13:44:08.601466Z",
"iopub.status.idle": "2025-11-05T13:44:08.605530Z",
"shell.execute_reply": "2025-11-05T13:44:08.605354Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# A basic box with the conditions colored\n",
"p3 + geom_boxplot(aes(fill=\"cond\"))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"execution": {
"iopub.execute_input": "2025-11-05T13:44:08.606572Z",
"iopub.status.busy": "2025-11-05T13:44:08.606495Z",
"iopub.status.idle": "2025-11-05T13:44:08.609963Z",
"shell.execute_reply": "2025-11-05T13:44:08.609776Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Style outliers\n",
"p3 + geom_boxplot(outlier_color=\"red\", outlier_shape=8, outlier_size=1.5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 4
}