{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Dotplot Geometry" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preparation" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "from lets_plot import *\n", "from lets_plot.mapping import as_discrete\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "DEF_BIN_COUNT = 10" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def plot_matrix(plots=[], width=400, height=300, columns=2):\n", " bunch = GGBunch()\n", " for i in range(len(plots)):\n", " row = int(i / columns)\n", " column = i % columns\n", " bunch.add_plot(plots[i], column * width, row * height, width, height)\n", " return bunch.show()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
modelmpgcyldisphpdratwtqsecvsamgearcarb
0Mazda RX421.06160.01103.902.62016.460144
1Mazda RX4 Wag21.06160.01103.902.87517.020144
2Datsun 71022.84108.0933.852.32018.611141
3Hornet 4 Drive21.46258.01103.083.21519.441031
4Hornet Sportabout18.78360.01753.153.44017.020032
\n", "
" ], "text/plain": [ " model mpg cyl disp hp drat wt qsec vs am gear \\\n", "0 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 \n", "1 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 \n", "2 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 \n", "3 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 \n", "4 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 \n", "\n", " carb \n", "0 4 \n", "1 4 \n", "2 1 \n", "3 1 \n", "4 2 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mtcars_df = pd.read_csv(\"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\")\n", "mtcars_df.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "mpg_binwidth = (mtcars_df.mpg.max() - mtcars_df.mpg.min()) / DEF_BIN_COUNT" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countmpgbinwidth
0210.402.35
1412.752.35
2615.102.35
3617.452.35
4519.802.35
\n", "
" ], "text/plain": [ " count mpg binwidth\n", "0 2 10.40 2.35\n", "1 4 12.75 2.35\n", "2 6 15.10 2.35\n", "3 6 17.45 2.35\n", "4 5 19.80 2.35" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mpg_df = pd.DataFrame(list(zip(*np.histogram(mtcars_df.mpg, bins=DEF_BIN_COUNT))), columns=[\"count\", \"mpg\"])\n", "mpg_df['binwidth'] = [mpg_binwidth] * mpg_df.shape[0]\n", "mpg_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Minimalistic example" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mtcars_df, aes(x=\"mpg\")) + geom_dotplot() + ggtitle(\"Simplest example\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Comparison of geoms" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "PACIFIC_BLUE = '#118ed8'" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n", " geom_area(stat='density', fill=PACIFIC_BLUE) + \\\n", " geom_dotplot(binwidth=mpg_binwidth, color=PACIFIC_BLUE, fill='white') + \\\n", " ggtitle(\"density + dotplot\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n", " geom_histogram(binwidth=mpg_binwidth, color='white') + \\\n", " geom_dotplot(binwidth=mpg_binwidth, method='histodot', color=PACIFIC_BLUE, fill='white') + \\\n", " coord_fixed(ratio=mpg_binwidth) + \\\n", " ggtitle(\"histogram + dotplot\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Parameters" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `stackdir`" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p = ggplot(mtcars_df, aes(x=\"mpg\"))\n", "p1 = p + geom_dotplot(binwidth=mpg_binwidth, stackdir='up') + ggtitle(\"stackdir='up' (default)\")\n", "p2 = p + geom_dotplot(binwidth=mpg_binwidth, stackdir='down') + ggtitle(\"stackdir='down'\")\n", "p3 = p + geom_dotplot(binwidth=mpg_binwidth, stackdir='center') + ggtitle(\"stackdir='center'\")\n", "p4 = p + geom_dotplot(binwidth=mpg_binwidth, stackdir='centerwhole') + ggtitle(\"stackdir='centerwhole'\")\n", "\n", "plot_matrix([p1, p2, p3, p4])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `stackratio`" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p = ggplot(mtcars_df, aes(x=\"mpg\"))\n", "p1 = p + geom_dotplot(binwidth=mpg_binwidth, stackratio=1.0) + ggtitle(\"stackratio=1.0 (default)\")\n", "p2 = p + geom_dotplot(binwidth=mpg_binwidth, stackratio=0.5) + ggtitle(\"stackratio=0.5\")\n", "p3 = p + geom_dotplot(binwidth=mpg_binwidth, stackratio=1.5) + ggtitle(\"stackratio=1.5\")\n", "\n", "plot_matrix([p1, p2, p3])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `dotsize`" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p = ggplot(mtcars_df, aes(x=\"mpg\"))\n", "p1 = p + geom_dotplot(binwidth=mpg_binwidth, dotsize=1.0) + ggtitle(\"dotsize=1.0 (default)\")\n", "p2 = p + geom_dotplot(binwidth=mpg_binwidth, dotsize=0.5) + ggtitle(\"dotsize=0.5\")\n", "p3 = p + geom_dotplot(binwidth=mpg_binwidth, dotsize=1.5) + ggtitle(\"dotsize=1.5\")\n", "\n", "plot_matrix([p1, p2, p3])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `center`" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p = ggplot(mtcars_df, aes(x=\"mpg\"))\n", "p1 = p + geom_dotplot(binwidth=mpg_binwidth, method='histodot') + ggtitle(\"Default\")\n", "p2 = p + geom_dotplot(binwidth=mpg_binwidth, method='histodot', center=11.0) + ggtitle(\"center=11.0\")\n", "\n", "plot_matrix([p1, p2])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `boundary`" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p = ggplot(mtcars_df, aes(x=\"mpg\"))\n", "p1 = p + geom_dotplot(binwidth=mpg_binwidth, method='histodot') + ggtitle(\"Default\")\n", "p2 = p + geom_dotplot(binwidth=mpg_binwidth, method='histodot', boundary=10.0) + ggtitle(\"boundary=10.0\")\n", "\n", "plot_matrix([p1, p2])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `bins`" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p = ggplot(mtcars_df, aes(x=\"mpg\"))\n", "p1 = p + geom_dotplot(method='histodot') + ggtitle(\"Default\")\n", "p2 = p + geom_dotplot(method='histodot', bins=20) + ggtitle(\"bins=20\")\n", "\n", "plot_matrix([p1, p2])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Grouping" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n", " geom_dotplot(aes(fill=as_discrete(\"cyl\")), \\\n", " binwidth=mpg_binwidth, color='black') + \\\n", " ggtitle(\"method='dotdensity'\")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p = ggplot(mtcars_df, aes(x=\"mpg\"))\n", "p1 = p + geom_dotplot(aes(fill=as_discrete(\"cyl\")), method='histodot', \\\n", " binwidth=mpg_binwidth, color='black') + \\\n", " ggtitle(\"method='histodot' (default)\")\n", "p2 = p + geom_dotplot(aes(fill=as_discrete(\"cyl\")), method='histodot', \\\n", " binwidth=mpg_binwidth, stackgroups=True, color='black') + \\\n", " ggtitle(\"method='histodot' and stackgroups=True\")\n", "\n", "plot_matrix([p1, p2])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tooltips" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n", " geom_dotplot(binwidth=mpg_binwidth, \\\n", " tooltips=layer_tooltips().line(\"Stack center|^x\")\\\n", " .line(\"Number of dots in stack|@..count..\")\\\n", " .line(\"Dot diameter|@..binwidth..\")) + \\\n", " ggtitle(\"With tooltips\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Facetting" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n", " geom_dotplot(aes(fill=as_discrete(\"cyl\")), \\\n", " binwidth=mpg_binwidth, color='black') + \\\n", " facet_grid(x=\"cyl\") + \\\n", " ggtitle(\"facet_grid()\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Coordinates" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Fixed" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n", " geom_dotplot(binwidth=mpg_binwidth) + \\\n", " coord_fixed(ratio=mpg_binwidth) + \\\n", " ggtitle(\"Fixed coordinates\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Flip" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n", " geom_dotplot(binwidth=mpg_binwidth) + \\\n", " coord_flip() + \\\n", " ggtitle(\"Flip coordinates\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## \"identity\" statistic" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes(x=\"mpg\", stacksize=\"count\", binwidth=\"binwidth\")) + \\\n", " geom_dotplot(stat='identity') + \\\n", " ggtitle(\"stat='identity'\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Additional layers" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mtcars_df, aes(x=\"mpg\")) + \\\n", " geom_dotplot(aes(fill=as_discrete(\"vs\")), method='histodot', \\\n", " bins=9, stackdir='center', \\\n", " stackratio=0.8, dotsize=0.8, \\\n", " color='black', alpha=.5, size=1) + \\\n", " scale_fill_brewer(type='qual', palette='Set1') + \\\n", " theme_grey() + \\\n", " ggtitle(\"Some additional aesthetics, parameters and layers\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }