{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Dotplot Geometry"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Preparation"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from lets_plot import *\n",
"from lets_plot.mapping import as_discrete\n",
"LetsPlot.setup_html()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"DEF_BIN_COUNT = 10"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def plot_matrix(plots=[], width=400, height=300, columns=2):\n",
" bunch = GGBunch()\n",
" for i in range(len(plots)):\n",
" row = int(i / columns)\n",
" column = i % columns\n",
" bunch.add_plot(plots[i], column * width, row * height, width, height)\n",
" return bunch.show()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" model | \n",
" mpg | \n",
" cyl | \n",
" disp | \n",
" hp | \n",
" drat | \n",
" wt | \n",
" qsec | \n",
" vs | \n",
" am | \n",
" gear | \n",
" carb | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Mazda RX4 | \n",
" 21.0 | \n",
" 6 | \n",
" 160.0 | \n",
" 110 | \n",
" 3.90 | \n",
" 2.620 | \n",
" 16.46 | \n",
" 0 | \n",
" 1 | \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
" 1 | \n",
" Mazda RX4 Wag | \n",
" 21.0 | \n",
" 6 | \n",
" 160.0 | \n",
" 110 | \n",
" 3.90 | \n",
" 2.875 | \n",
" 17.02 | \n",
" 0 | \n",
" 1 | \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
" 2 | \n",
" Datsun 710 | \n",
" 22.8 | \n",
" 4 | \n",
" 108.0 | \n",
" 93 | \n",
" 3.85 | \n",
" 2.320 | \n",
" 18.61 | \n",
" 1 | \n",
" 1 | \n",
" 4 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" Hornet 4 Drive | \n",
" 21.4 | \n",
" 6 | \n",
" 258.0 | \n",
" 110 | \n",
" 3.08 | \n",
" 3.215 | \n",
" 19.44 | \n",
" 1 | \n",
" 0 | \n",
" 3 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" Hornet Sportabout | \n",
" 18.7 | \n",
" 8 | \n",
" 360.0 | \n",
" 175 | \n",
" 3.15 | \n",
" 3.440 | \n",
" 17.02 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" model mpg cyl disp hp drat wt qsec vs am gear \\\n",
"0 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 \n",
"1 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 \n",
"2 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 \n",
"3 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 \n",
"4 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 \n",
"\n",
" carb \n",
"0 4 \n",
"1 4 \n",
"2 1 \n",
"3 1 \n",
"4 2 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mtcars_df = pd.read_csv(\"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\")\n",
"mtcars_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"mpg_binwidth = (mtcars_df.mpg.max() - mtcars_df.mpg.min()) / DEF_BIN_COUNT"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" count | \n",
" mpg | \n",
" binwidth | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2 | \n",
" 10.40 | \n",
" 2.35 | \n",
"
\n",
" \n",
" 1 | \n",
" 4 | \n",
" 12.75 | \n",
" 2.35 | \n",
"
\n",
" \n",
" 2 | \n",
" 6 | \n",
" 15.10 | \n",
" 2.35 | \n",
"
\n",
" \n",
" 3 | \n",
" 6 | \n",
" 17.45 | \n",
" 2.35 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 19.80 | \n",
" 2.35 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count mpg binwidth\n",
"0 2 10.40 2.35\n",
"1 4 12.75 2.35\n",
"2 6 15.10 2.35\n",
"3 6 17.45 2.35\n",
"4 5 19.80 2.35"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mpg_df = pd.DataFrame(list(zip(*np.histogram(mtcars_df.mpg, bins=DEF_BIN_COUNT))), columns=[\"count\", \"mpg\"])\n",
"mpg_df['binwidth'] = [mpg_binwidth] * mpg_df.shape[0]\n",
"mpg_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Minimalistic example"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mtcars_df, aes(x=\"mpg\")) + geom_dotplot() + ggtitle(\"Simplest example\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Comparison of geoms"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"PACIFIC_BLUE = '#118ed8'"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n",
" geom_area(stat='density', fill=PACIFIC_BLUE) + \\\n",
" geom_dotplot(binwidth=mpg_binwidth, color=PACIFIC_BLUE, fill='white') + \\\n",
" ggtitle(\"density + dotplot\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n",
" geom_histogram(binwidth=mpg_binwidth, color='white') + \\\n",
" geom_dotplot(binwidth=mpg_binwidth, method='histodot', color=PACIFIC_BLUE, fill='white') + \\\n",
" coord_fixed(ratio=mpg_binwidth) + \\\n",
" ggtitle(\"histogram + dotplot\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Parameters"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### `stackdir`"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"p = ggplot(mtcars_df, aes(x=\"mpg\"))\n",
"p1 = p + geom_dotplot(binwidth=mpg_binwidth, stackdir='up') + ggtitle(\"stackdir='up' (default)\")\n",
"p2 = p + geom_dotplot(binwidth=mpg_binwidth, stackdir='down') + ggtitle(\"stackdir='down'\")\n",
"p3 = p + geom_dotplot(binwidth=mpg_binwidth, stackdir='center') + ggtitle(\"stackdir='center'\")\n",
"p4 = p + geom_dotplot(binwidth=mpg_binwidth, stackdir='centerwhole') + ggtitle(\"stackdir='centerwhole'\")\n",
"\n",
"plot_matrix([p1, p2, p3, p4])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### `stackratio`"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"p = ggplot(mtcars_df, aes(x=\"mpg\"))\n",
"p1 = p + geom_dotplot(binwidth=mpg_binwidth, stackratio=1.0) + ggtitle(\"stackratio=1.0 (default)\")\n",
"p2 = p + geom_dotplot(binwidth=mpg_binwidth, stackratio=0.5) + ggtitle(\"stackratio=0.5\")\n",
"p3 = p + geom_dotplot(binwidth=mpg_binwidth, stackratio=1.5) + ggtitle(\"stackratio=1.5\")\n",
"\n",
"plot_matrix([p1, p2, p3])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### `dotsize`"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"p = ggplot(mtcars_df, aes(x=\"mpg\"))\n",
"p1 = p + geom_dotplot(binwidth=mpg_binwidth, dotsize=1.0) + ggtitle(\"dotsize=1.0 (default)\")\n",
"p2 = p + geom_dotplot(binwidth=mpg_binwidth, dotsize=0.5) + ggtitle(\"dotsize=0.5\")\n",
"p3 = p + geom_dotplot(binwidth=mpg_binwidth, dotsize=1.5) + ggtitle(\"dotsize=1.5\")\n",
"\n",
"plot_matrix([p1, p2, p3])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### `center`"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"p = ggplot(mtcars_df, aes(x=\"mpg\"))\n",
"p1 = p + geom_dotplot(binwidth=mpg_binwidth, method='histodot') + ggtitle(\"Default\")\n",
"p2 = p + geom_dotplot(binwidth=mpg_binwidth, method='histodot', center=11.0) + ggtitle(\"center=11.0\")\n",
"\n",
"plot_matrix([p1, p2])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### `boundary`"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"p = ggplot(mtcars_df, aes(x=\"mpg\"))\n",
"p1 = p + geom_dotplot(binwidth=mpg_binwidth, method='histodot') + ggtitle(\"Default\")\n",
"p2 = p + geom_dotplot(binwidth=mpg_binwidth, method='histodot', boundary=10.0) + ggtitle(\"boundary=10.0\")\n",
"\n",
"plot_matrix([p1, p2])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### `bins`"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"p = ggplot(mtcars_df, aes(x=\"mpg\"))\n",
"p1 = p + geom_dotplot(method='histodot') + ggtitle(\"Default\")\n",
"p2 = p + geom_dotplot(method='histodot', bins=20) + ggtitle(\"bins=20\")\n",
"\n",
"plot_matrix([p1, p2])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Grouping"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n",
" geom_dotplot(aes(fill=as_discrete(\"cyl\")), \\\n",
" binwidth=mpg_binwidth, color='black') + \\\n",
" ggtitle(\"method='dotdensity'\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"p = ggplot(mtcars_df, aes(x=\"mpg\"))\n",
"p1 = p + geom_dotplot(aes(fill=as_discrete(\"cyl\")), method='histodot', \\\n",
" binwidth=mpg_binwidth, color='black') + \\\n",
" ggtitle(\"method='histodot' (default)\")\n",
"p2 = p + geom_dotplot(aes(fill=as_discrete(\"cyl\")), method='histodot', \\\n",
" binwidth=mpg_binwidth, stackgroups=True, color='black') + \\\n",
" ggtitle(\"method='histodot' and stackgroups=True\")\n",
"\n",
"plot_matrix([p1, p2])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tooltips"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n",
" geom_dotplot(binwidth=mpg_binwidth, \\\n",
" tooltips=layer_tooltips().line(\"Stack center|^x\")\\\n",
" .line(\"Number of dots in stack|@..count..\")\\\n",
" .line(\"Dot diameter|@..binwidth..\")) + \\\n",
" ggtitle(\"With tooltips\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Facetting"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n",
" geom_dotplot(aes(fill=as_discrete(\"cyl\")), \\\n",
" binwidth=mpg_binwidth, color='black') + \\\n",
" facet_grid(x=\"cyl\") + \\\n",
" ggtitle(\"facet_grid()\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Coordinates"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Fixed"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n",
" geom_dotplot(binwidth=mpg_binwidth) + \\\n",
" coord_fixed(ratio=mpg_binwidth) + \\\n",
" ggtitle(\"Fixed coordinates\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Flip"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n",
" geom_dotplot(binwidth=mpg_binwidth) + \\\n",
" coord_flip() + \\\n",
" ggtitle(\"Flip coordinates\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## \"identity\" statistic"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mpg_df, aes(x=\"mpg\", stacksize=\"count\", binwidth=\"binwidth\")) + \\\n",
" geom_dotplot(stat='identity') + \\\n",
" ggtitle(\"stat='identity'\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Additional layers"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n",
" geom_dotplot(aes(fill=as_discrete(\"vs\")), method='histodot', \\\n",
" bins=9, stackdir='center', \\\n",
" stackratio=0.8, dotsize=0.8, \\\n",
" color='black', alpha=.5, size=1) + \\\n",
" scale_fill_brewer(type='qual', palette='Set1') + \\\n",
" theme_grey() + \\\n",
" ggtitle(\"Some additional aesthetics, parameters and layers\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}