{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:16.897834Z", "iopub.status.busy": "2024-04-17T07:34:16.897645Z", "iopub.status.idle": "2024-04-17T07:34:17.213649Z", "shell.execute_reply": "2024-04-17T07:34:17.213361Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "\n", "from lets_plot import *\n", "from lets_plot.mapping import as_discrete\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.227433Z", "iopub.status.busy": "2024-04-17T07:34:17.227233Z", "iopub.status.idle": "2024-04-17T07:34:17.228997Z", "shell.execute_reply": "2024-04-17T07:34:17.228743Z" } }, "outputs": [], "source": [ "QUANTILES = [.25, .5, .75]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.230095Z", "iopub.status.busy": "2024-04-17T07:34:17.229939Z", "iopub.status.idle": "2024-04-17T07:34:17.231761Z", "shell.execute_reply": "2024-04-17T07:34:17.231571Z" } }, "outputs": [], "source": [ "def plot_matrix(plots=[], width=400, height=300, columns=2):\n", " bunch = GGBunch()\n", " for i in range(len(plots)):\n", " row = int(i / columns)\n", " column = i % columns\n", " bunch.add_plot(plots[i], column * width, row * height, width, height)\n", " return bunch.show()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.232850Z", "iopub.status.busy": "2024-04-17T07:34:17.232708Z", "iopub.status.idle": "2024-04-17T07:34:17.376172Z", "shell.execute_reply": "2024-04-17T07:34:17.375899Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0manufacturermodeldisplyearcyltransdrvctyhwyflclass
01audia41.819994auto(l5)f1829pcompact
12audia41.819994manual(m5)f2129pcompact
23audia42.020084manual(m6)f2031pcompact
34audia42.020084auto(av)f2130pcompact
45audia42.819996auto(l5)f1626pcompact
\n", "
" ], "text/plain": [ " Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy \\\n", "0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 \n", "1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 \n", "2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 \n", "3 4 audi a4 2.0 2008 4 auto(av) f 21 30 \n", "4 5 audi a4 2.8 1999 6 auto(l5) f 16 26 \n", "\n", " fl class \n", "0 p compact \n", "1 p compact \n", "2 p compact \n", "3 p compact \n", "4 p compact " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mpg_df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")\n", "\n", "mpg_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Minimalistic example" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.377327Z", "iopub.status.busy": "2024-04-17T07:34:17.377173Z", "iopub.status.idle": "2024-04-17T07:34:17.418243Z", "shell.execute_reply": "2024-04-17T07:34:17.417943Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes(y='hwy')) + geom_violin() + ggtitle(\"Simplest example\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Comparison of geoms" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.419446Z", "iopub.status.busy": "2024-04-17T07:34:17.419341Z", "iopub.status.idle": "2024-04-17T07:34:17.457741Z", "shell.execute_reply": "2024-04-17T07:34:17.457542Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p_d = ggplot(mpg_df) + \\\n", " geom_density(aes(x='hwy', fill='drv'), color='black', alpha=.5) + \\\n", " facet_grid(x='drv') + \\\n", " coord_flip() + \\\n", " ggtitle(\"geom_density()\")\n", "p_v = ggplot(mpg_df, aes(x=as_discrete('drv', order=1), y='hwy')) + \\\n", " geom_violin(aes(fill='drv'), alpha=.5) + \\\n", " ggtitle(\"geom_violin()\")\n", "\n", "plot_matrix([p_d, p_v])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Original parameters" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `quantiles`" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.458871Z", "iopub.status.busy": "2024-04-17T07:34:17.458702Z", "iopub.status.idle": "2024-04-17T07:34:17.546149Z", "shell.execute_reply": "2024-04-17T07:34:17.545937Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tests = [\n", " {'quantiles': None},\n", " {'quantiles': []},\n", " {'quantiles': [.05, .5, .95]},\n", " {'quantiles': [.25]},\n", " {'quantiles': [0, .5, 1]},\n", "]\n", "\n", "plot_matrix([\n", " ggplot(mpg_df, aes('drv', 'hwy')) + \\\n", " geom_violin(quantiles=test['quantiles'], quantile_lines=True) + \\\n", " ggtitle(\"quantiles={0}\".format(test['quantiles']))\n", " for test in tests\n", "])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `scale`" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.548056Z", "iopub.status.busy": "2024-04-17T07:34:17.547940Z", "iopub.status.idle": "2024-04-17T07:34:17.621762Z", "shell.execute_reply": "2024-04-17T07:34:17.621544Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tests = [\n", " {'scale': None},\n", " {'scale': 'area'},\n", " {'scale': 'count'},\n", " {'scale': 'width'},\n", "]\n", "\n", "plot_matrix([\n", " ggplot(mpg_df, aes('drv', 'hwy')) + \\\n", " geom_violin(scale=test['scale'], \\\n", " quantiles=QUANTILES, quantile_lines=True) + \\\n", " ggtitle(\"scale={0}\".format(test['scale']))\n", " for test in tests\n", "])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `trim` and `tails_cutoff`" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.623253Z", "iopub.status.busy": "2024-04-17T07:34:17.623092Z", "iopub.status.idle": "2024-04-17T07:34:17.698082Z", "shell.execute_reply": "2024-04-17T07:34:17.697862Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tests = [\n", " {'trim': True, 'tails_cutoff': None},\n", " {'trim': False, 'tails_cutoff': 1},\n", " {'trim': False, 'tails_cutoff': 2},\n", " {'trim': False, 'tails_cutoff': 3},\n", "]\n", "\n", "plot_matrix([\n", " ggplot(mpg_df, aes('drv', 'hwy')) + \\\n", " geom_violin(trim=test['trim'], tails_cutoff=test['tails_cutoff']) + \\\n", " ggtitle(\"trim={0}, tails_cutoff={1}\".format(test['trim'], test['tails_cutoff']))\n", " for test in tests\n", "])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `show_half`" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.699554Z", "iopub.status.busy": "2024-04-17T07:34:17.699382Z", "iopub.status.idle": "2024-04-17T07:34:17.736295Z", "shell.execute_reply": "2024-04-17T07:34:17.736099Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df) + \\\n", " geom_violin(aes(x='drv', y='hwy'), show_half=-1, fill=\"#66c2a5\") + \\\n", " geom_violin(aes(x='drv', y='cty'), show_half=1, fill=\"#fc8d62\") + \\\n", " ylab('hwy/cty') + \\\n", " ggtitle(\"hwy is green and cty is orange\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Custom density parameters" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.737483Z", "iopub.status.busy": "2024-04-17T07:34:17.737357Z", "iopub.status.idle": "2024-04-17T07:34:17.814232Z", "shell.execute_reply": "2024-04-17T07:34:17.813925Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p = ggplot(mpg_df, aes('drv', 'hwy'))\n", "p_default = p + geom_violin(quantiles=QUANTILES, quantile_lines=True) + \\\n", " ggtitle(\"Default\")\n", "p_kernel = p + geom_violin(quantiles=QUANTILES, kernel='epanechikov', quantile_lines=True) + \\\n", " ggtitle(\"kernel='epanechikov'\")\n", "p_bw = p + geom_violin(quantiles=QUANTILES, bw=.1, quantile_lines=True) + \\\n", " ggtitle(\"bw=0.1\")\n", "p_adjust = p + geom_violin(quantiles=QUANTILES, adjust=2, quantile_lines=True) + \\\n", " ggtitle(\"adjust=2\")\n", "\n", "plot_matrix([p_default, p_kernel, p_bw, p_adjust])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Grouping and tooltips" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.815569Z", "iopub.status.busy": "2024-04-17T07:34:17.815466Z", "iopub.status.idle": "2024-04-17T07:34:17.862586Z", "shell.execute_reply": "2024-04-17T07:34:17.862383Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes(x='drv', y='hwy')) + \\\n", " geom_violin(aes(group='year', fill=as_discrete('year')), \\\n", " quantiles=QUANTILES, quantile_lines=True, \\\n", " tooltips=layer_tooltips().line('^x')\n", " .line('year|@year')\n", " .line('hwy|@hwy')\n", " .line('violinwidth|@..violinwidth..')\n", " .line('density|@..density..')\n", " .line('count|@..count..')\n", " .line('scaled|@..scaled..')) + \\\n", " ggtitle(\"Grouping and tooltips\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Facets" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.864462Z", "iopub.status.busy": "2024-04-17T07:34:17.864293Z", "iopub.status.idle": "2024-04-17T07:34:17.900418Z", "shell.execute_reply": "2024-04-17T07:34:17.900212Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes(x='drv', y='hwy')) + \\\n", " geom_violin(aes(fill=as_discrete('year')), \\\n", " quantiles=QUANTILES, quantile_lines=True) + \\\n", " facet_grid(y='year')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## `coord_flip()`" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.901527Z", "iopub.status.busy": "2024-04-17T07:34:17.901448Z", "iopub.status.idle": "2024-04-17T07:34:17.922963Z", "shell.execute_reply": "2024-04-17T07:34:17.922761Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes('drv', 'hwy')) + \\\n", " geom_violin(quantiles=QUANTILES, quantile_lines=True) + \\\n", " coord_flip() + \\\n", " ggtitle(\"Use coord_flip()\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Additional layers" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.924062Z", "iopub.status.busy": "2024-04-17T07:34:17.923962Z", "iopub.status.idle": "2024-04-17T07:34:17.929508Z", "shell.execute_reply": "2024-04-17T07:34:17.929333Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes(as_discrete('drv', order=-1), 'hwy')) + \\\n", " geom_violin(aes(color='drv', fill='drv'), alpha=.5, size=2, \\\n", " n=8, quantiles=QUANTILES, quantile_lines=True,\n", " sampling=sampling_group_systematic(2)) + \\\n", " scale_y_continuous(breaks=list(range(12, 29, 2))) + \\\n", " ylim(12, 28) + \\\n", " coord_fixed(ratio=.2) + \\\n", " theme_grey() + \\\n", " ggtitle(\"Some additional aesthetics, parameters and layers\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:34:17.930591Z", "iopub.status.busy": "2024-04-17T07:34:17.930462Z", "iopub.status.idle": "2024-04-17T07:34:17.951629Z", "shell.execute_reply": "2024-04-17T07:34:17.951439Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "quartiles = [1/4, 2/4, 3/4]\n", "ggplot(mpg_df, aes(x='drv', y='hwy')) + \\\n", " geom_violin(quantiles=quartiles, quantile_lines=True) + \\\n", " geom_boxplot(width=.1)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 4 }