{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "\n", "from lets_plot import *\n", "from lets_plot.mapping import as_discrete\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "DRAW_QUANTILES = [.25, .5, .75]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def plot_matrix(plots=[], width=400, height=300, columns=2):\n", " bunch = GGBunch()\n", " for i in range(len(plots)):\n", " row = int(i / columns)\n", " column = i % columns\n", " bunch.add_plot(plots[i], column * width, row * height, width, height)\n", " return bunch.show()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0manufacturermodeldisplyearcyltransdrvctyhwyflclass
01audia41.819994auto(l5)f1829pcompact
12audia41.819994manual(m5)f2129pcompact
23audia42.020084manual(m6)f2031pcompact
34audia42.020084auto(av)f2130pcompact
45audia42.819996auto(l5)f1626pcompact
\n", "
" ], "text/plain": [ " Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy \\\n", "0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 \n", "1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 \n", "2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 \n", "3 4 audi a4 2.0 2008 4 auto(av) f 21 30 \n", "4 5 audi a4 2.8 1999 6 auto(l5) f 16 26 \n", "\n", " fl class \n", "0 p compact \n", "1 p compact \n", "2 p compact \n", "3 p compact \n", "4 p compact " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mpg_df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")\n", "\n", "mpg_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Minimalistic example" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes(y='hwy')) + geom_violin() + ggtitle(\"Simplest example\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Comparison of geoms" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p_d = ggplot(mpg_df) + \\\n", " geom_density(aes(x='hwy', fill='drv'), color='black', alpha=.5) + \\\n", " facet_grid(x='drv') + \\\n", " coord_flip() + \\\n", " ggtitle(\"geom_density()\")\n", "p_v = ggplot(mpg_df, aes(x=as_discrete('drv', order=1), y='hwy')) + \\\n", " geom_violin(aes(fill='drv'), alpha=.5) + \\\n", " ggtitle(\"geom_violin()\")\n", "\n", "plot_matrix([p_d, p_v])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Original parameters" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `draw_quantiles`" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tests = [\n", " {'draw_quantiles': None}, # default\n", " {'draw_quantiles': [.05, .5, .95]}, # all correct\n", " {'draw_quantiles': (1/3, .5, 2/3)}, # strange, but correct\n", " {'draw_quantiles': [.25]}, # only one\n", " {'draw_quantiles': []}, # empty\n", " {'draw_quantiles': [0, .5, 1]}, # include borders\n", " {'draw_quantiles': [-1, .5, 2], 'skip': True}, # beyond borders\n", " {'draw_quantiles': ['0.25', '0.5', '0.75'], 'skip': True}, # invalid values\n", " {'draw_quantiles': [True, False], 'skip': True}, # totally invalid values\n", " {'draw_quantiles': 0.5, 'skip': True}, # wrong parameter type\n", " {'draw_quantiles': True, 'skip': True}, # another wrong parameter type\n", " {'draw_quantiles': '0.25', 'skip': True}, # even worse parameter type\n", " {'draw_quantiles': object(), 'skip': True}, # totally wrong parameter type\n", "]\n", "\n", "ggplot(mpg_df, aes('drv', 'hwy')) + \\\n", " geom_violin(draw_quantiles=DRAW_QUANTILES) + \\\n", " ggtitle(\"draw_quantiles={0}\".format(DRAW_QUANTILES))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `scale`" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tests = [\n", " {'scale': None},\n", " {'scale': 'area'},\n", " {'scale': 'count'},\n", " {'scale': 'width'},\n", "]\n", "\n", "plot_matrix([\n", " ggplot(mpg_df, aes('drv', 'hwy')) + \\\n", " geom_violin(scale=test['scale'], \\\n", " draw_quantiles=DRAW_QUANTILES) + \\\n", " ggtitle(\"scale={0}\".format(test['scale']))\n", " for test in tests\n", "])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Custom density parameters" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p = ggplot(mpg_df, aes('drv', 'hwy'))\n", "p_default = p + geom_violin(draw_quantiles=DRAW_QUANTILES) + ggtitle(\"Default\")\n", "p_kernel = p + geom_violin(draw_quantiles=DRAW_QUANTILES, kernel='epanechikov') + ggtitle(\"kernel='epanechikov'\")\n", "p_bw = p + geom_violin(draw_quantiles=DRAW_QUANTILES, bw=.1) + ggtitle(\"bw=0.1\")\n", "p_adjust = p + geom_violin(draw_quantiles=DRAW_QUANTILES, adjust=2) + ggtitle(\"adjust=2\")\n", "\n", "plot_matrix([p_default, p_kernel, p_bw, p_adjust])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Grouping and tooltips" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes(x='drv', y='hwy')) + \\\n", " geom_violin(aes(group='year', fill=as_discrete('year')), \\\n", " draw_quantiles=DRAW_QUANTILES, \\\n", " tooltips=layer_tooltips().line('^x')\n", " .line('year|@year')\n", " .line('hwy|@hwy')\n", " .line('violinwidth|@..violinwidth..')\n", " .line('density|@..density..')\n", " .line('count|@..count..')\n", " .line('scaled|@..scaled..')) + \\\n", " ggtitle(\"Grouping and tooltips\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Facets" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes(x='drv', y='hwy')) + \\\n", " geom_violin(aes(fill=as_discrete('year')), draw_quantiles=DRAW_QUANTILES) + \\\n", " facet_grid(y='year')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## `coord_flip()`" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes('drv', 'hwy')) + \\\n", " geom_violin(draw_quantiles=DRAW_QUANTILES) + \\\n", " coord_flip() + \\\n", " ggtitle(\"Use coord_flip()\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Additional layers" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_df, aes(as_discrete('drv', order=-1), 'hwy')) + \\\n", " geom_violin(aes(color='drv', fill='drv'), alpha=.5, size=2, \\\n", " n=8, draw_quantiles=DRAW_QUANTILES,\n", " sampling=sampling_group_systematic(2)) + \\\n", " scale_y_continuous(breaks=list(range(12, 29, 2))) + \\\n", " scale_color_brewer(type='qual', palette='Set1') + \\\n", " scale_fill_brewer(type='qual', palette='Set1') + \\\n", " ylim(12, 28) + \\\n", " coord_fixed(ratio=.2) + \\\n", " theme_grey() + \\\n", " ggtitle(\"Some additional aesthetics, parameters and layers\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Note: quartiles for violin need not to be equal to the quartiles for boxplot!\n", "# See the last paragraph here: https://stackoverflow.com/a/36036821/11771414\n", "quartiles = [1/4, 2/4, 3/4]\n", "ggplot(mpg_df, aes(x='drv', y='hwy')) + \\\n", " geom_violin(draw_quantiles=quartiles) + \\\n", " geom_boxplot(width=.1)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }