{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "88b6cfbb", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import random\n", "from lets_plot import *\n", "from lets_plot.mapping import as_discrete" ] }, { "cell_type": "code", "execution_count": 2, "id": "a7fd8774", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "def dump_plot(plot, display=None):\n", " import json\n", "\n", " try:\n", " import clipboard\n", " except:\n", " clipboard = None\n", " \n", " from lets_plot._type_utils import standardize_dict\n", " \n", " plot_dict = standardize_dict(plot.as_dict())\n", " plot_json = json.dumps(plot_dict, indent=2)\n", " \n", " if clipboard:\n", " clipboard.copy('')\n", " clipboard.copy(str(plot_json))\n", " else:\n", " if display is None:\n", " display = True\n", " \n", " if display:\n", " print(plot_json)\n", "\n", " return plot\n", "\n", "LetsPlot.setup_html()" ] }, { "cell_type": "markdown", "id": "f51ee2cf", "metadata": {}, "source": [ "## Regression Testing\n", "Regression testing of geometries that may be affected by current changes." ] }, { "cell_type": "markdown", "id": "eeb35e19", "metadata": {}, "source": [ "### Scatter Plots" ] }, { "cell_type": "code", "execution_count": 3, "id": "2297d2f8", "metadata": {}, "outputs": [], "source": [ "np.random.seed(42)\n", "random.seed(42)\n", "data = dict(\n", " cond=np.repeat(['A','B'], 10),\n", " xvar=[i + random.normalvariate(0, 3) for i in range(0,20)],\n", " yvar=[i + random.normalvariate(0, 3) for i in range(0,20)]\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "id": "bb8a7928", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(data, aes(x='xvar', y='yvar')) + geom_point(shape=1),\n", " ggplot(data, aes(x='yvar', y='xvar')) + geom_point(shape=1),\n", " ggplot(data, aes(x='xvar', y='yvar')) + geom_point(shape=1) + geom_smooth(),\n", " ggplot(data, aes(x='yvar', y='xvar')) + geom_point(shape=1) + geom_smooth()\n", "])" ] }, { "cell_type": "code", "execution_count": 5, "id": "1d892d2d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(data, aes(x='xvar', y='yvar', color='cond')) + geom_point(shape=1) + geom_smooth(se=False),\n", " ggplot(data, aes(x='yvar', y='xvar', color='cond')) + geom_point(shape=1) + geom_smooth(se=False)\n", "])" ] }, { "cell_type": "markdown", "id": "7531ddf4", "metadata": {}, "source": [ "### Marginal Plots" ] }, { "cell_type": "code", "execution_count": 6, "id": "3247f51a", "metadata": {}, "outputs": [], "source": [ "np.random.seed(42)\n", "cov0=[[1, -.8], [-.8, 1]] \n", "cov1=[[ 10, .1], [.1, .1]]\n", "\n", "x0, y0 = np.random.multivariate_normal(mean=[-2,0], cov=cov0, size=200).T\n", "x1, y1 = np.random.multivariate_normal(mean=[0,1], cov=cov1, size=200).T\n", "\n", "data = dict(\n", " x = np.concatenate((x0,x1)),\n", " y = np.concatenate((y0,y1)),\n", " c = [\"A\"]*200 + [\"B\"]*200\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "id": "4cbc63a4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(data, aes(\"x\", \"y\")) + geom_density2df(aes(fill=\"..level..\")) + coord_cartesian() \\\n", " + ggmarginal(\"tr\", layer=geom_area(stat=\"density\")),\n", " ggplot(data, aes(\"y\", \"x\")) + geom_density2df(aes(fill=\"..level..\")) + coord_cartesian() \\\n", " + ggmarginal(\"tr\", layer=geom_area(stat=\"density\"))\n", "])" ] }, { "cell_type": "markdown", "id": "15bd6c1d", "metadata": {}, "source": [ "### `coord_flip()`" ] }, { "cell_type": "code", "execution_count": 8, "id": "c52de749", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.random.seed(42)\n", "n = 10\n", "x = np.arange(n)\n", "y = 1 + np.random.randint(5, size=10)\n", "gggrid([\n", " ggplot() + geom_bar(aes(x='x', y='y'), data={'x': x, 'y': y}, stat='identity'),\n", " ggplot() + geom_bar(aes(x='x', y='y'), data={'x': x, 'y': y}, stat='identity') + coord_flip()\n", "])" ] }, { "cell_type": "markdown", "id": "55bb8824", "metadata": {}, "source": [ "### `geom_bar()`" ] }, { "cell_type": "code", "execution_count": 9, "id": "959a5ec0", "metadata": {}, "outputs": [], "source": [ "data = {\n", " 'code': ['a','b','c','d','e'],\n", " 'value': [2, 5, 3, 8, -1],\n", "}" ] }, { "cell_type": "code", "execution_count": 10, "id": "1a4077a0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(data, aes('code', 'value')) + geom_bar(stat='identity'),\n", " ggplot(data, aes('value', 'code')) + geom_bar(orientation='y', stat='identity'),\n", " ggplot(data, aes('value', 'code')) + geom_bar(stat='identity'),\n", " ggplot(data) + geom_bar(aes('value', 'code'), stat='identity'),\n", " ggplot(data, aes('value', as_discrete('code'))) + geom_bar(stat='identity')\n", "])" ] }, { "cell_type": "markdown", "id": "e737e168", "metadata": {}, "source": [ "### `geom_lollipop()`" ] }, { "cell_type": "code", "execution_count": 11, "id": "27967418", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(data, aes('code', 'value')) + geom_lollipop(stat='identity'),\n", " ggplot(data, aes('value', 'code')) + geom_lollipop(orientation='y', stat='identity'),\n", " ggplot(data, aes('value', 'code')) + geom_lollipop(stat='identity'),\n", " ggplot(data) + geom_lollipop(aes('value', 'code'), stat='identity'),\n", " ggplot(data, aes('value', as_discrete('code'))) + geom_lollipop(stat='identity') \n", "])" ] }, { "cell_type": "markdown", "id": "e3b7b6b7", "metadata": {}, "source": [ "### `geom_boxplot()`" ] }, { "cell_type": "code", "execution_count": 12, "id": "8e934911", "metadata": {}, "outputs": [], "source": [ "n = 100\n", "np.random.seed(42)\n", "data = {\n", " 'code': np.random.choice(list('abcde'), size=100),\n", " 'value': np.random.normal(size=100),\n", " 'value_str': [str(i) for i in np.random.normal(size=100)],\n", "}" ] }, { "cell_type": "code", "execution_count": 13, "id": "df453671", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(data, aes('code', 'value')) + geom_boxplot(),\n", " ggplot(data, aes('value', 'code')) + geom_boxplot(orientation='y'),\n", " ggplot(data, aes('value', 'code')) + geom_boxplot(),\n", " ggplot(data) + geom_boxplot(aes('value', 'code')),\n", " ggplot(data, aes('value', as_discrete('code'))) + geom_boxplot() \n", "])" ] }, { "cell_type": "markdown", "id": "0dacca7a", "metadata": {}, "source": [ "### `geom_violin()`" ] }, { "cell_type": "code", "execution_count": 14, "id": "7db1d033", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(data, aes('code', 'value')) + geom_violin(),\n", " ggplot(data, aes('value', 'code')) + geom_violin(orientation='y'),\n", " ggplot(data, aes('value', 'code')) + geom_violin(),\n", " ggplot(data) + geom_violin(aes('value', 'code')),\n", " ggplot(data, aes('value', as_discrete('code'))) + geom_violin() \n", "])" ] }, { "cell_type": "markdown", "id": "e1d07a13", "metadata": {}, "source": [ "### `geom_ydotplot()`" ] }, { "cell_type": "code", "execution_count": 15, "id": "5a05e47f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p = gggrid([\n", " ggplot(data, aes('code', 'value')) + geom_ydotplot(),\n", " ggplot(data, aes('value', 'code')) + geom_ydotplot(orientation='y'),\n", " ggplot(data, aes('value', 'code')) + geom_ydotplot(),\n", " ggplot(data) + geom_ydotplot(aes('value', 'code')),\n", " ggplot(data, aes('value', as_discrete('code'))) + geom_ydotplot() \n", "])\n", "dump_plot(p)" ] }, { "cell_type": "markdown", "id": "632d6de8", "metadata": {}, "source": [ "## Some Special Cases" ] }, { "cell_type": "markdown", "id": "92f95198", "metadata": {}, "source": [ "### `geom_smooth()`\n", "`geom_smooth()` needs `orientation=”y”` when we want to flip axis." ] }, { "cell_type": "code", "execution_count": 16, "id": "bb724b19", "metadata": {}, "outputs": [], "source": [ "np.random.seed(42)\n", "n = 100\n", "x = np.linspace(-2, 2, n)\n", "y = x ** 2 + np.random.normal(size=n)" ] }, { "cell_type": "code", "execution_count": 17, "id": "d5bd40cb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_point() + geom_smooth(deg=2, se=False),\n", " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_point() + geom_smooth(deg=2, se=False),\n", " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_point() + geom_smooth(deg=2, se=False, orientation=\"y\"),\n", "])" ] }, { "cell_type": "markdown", "id": "aa2e0c86", "metadata": {}, "source": [ "### Few Datasets" ] }, { "cell_type": "markdown", "id": "bbc0359b", "metadata": {}, "source": [ "If we have some dataset in the `ggplot()` context, then we can use data from it in `geom_boxplot()`. And, in this case, we can use variable names from the `ggplot()` context even if we set another context in `geom_boxplot()`." ] }, { "cell_type": "code", "execution_count": 18, "id": "baf88e20", "metadata": {}, "outputs": [], "source": [ "np.random.seed(42)\n", "data = {\n", " 'code': np.random.choice(list('abcde'), size=100),\n", " 'value': np.random.normal(size=100),\n", " 'value_str': [str(i) for i in np.random.normal(size=100)],\n", "}\n", "data_num = {\n", " 'number': np.random.choice(list('qwxyz'), size=100),\n", " 'volume': np.random.normal(size=100),\n", "}" ] }, { "cell_type": "code", "execution_count": 19, "id": "956932a3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(data, aes('value', 'code')) + geom_boxplot(aes('number', 'volume'), data = data_num),\n", " ggplot(data, aes('value', 'code')) + geom_boxplot(aes('value', 'code'), data = data_num)\n", "])" ] }, { "cell_type": "markdown", "id": "b9a2bded", "metadata": {}, "source": [ "### `stat_summary(geom='crossbar')`" ] }, { "cell_type": "code", "execution_count": 20, "id": "f0976fe8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(data, aes('code', 'value')) + stat_summary(geom='crossbar'),\n", " ggplot(data, aes('value', 'code')) + stat_summary(geom='crossbar') \n", "])" ] }, { "cell_type": "markdown", "id": "f949d6de", "metadata": {}, "source": [ "### Discrete Both Axes" ] }, { "cell_type": "code", "execution_count": 21, "id": "0aba6fbb", "metadata": {}, "outputs": [], "source": [ "x = ['a', 'a', 'b', 'b']\n", "y = ['x','x', 'x', 'y']" ] }, { "cell_type": "markdown", "id": "fa99080f", "metadata": {}, "source": [ "For `stat_sum()` and `geom_point()`, rotation does not work for both discrete axes." ] }, { "cell_type": "code", "execution_count": 22, "id": "14e36676", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + stat_sum(),\n", " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_point(orientation=\"y\")\n", "])" ] }, { "cell_type": "markdown", "id": "5a761ca2", "metadata": {}, "source": [ "The lollipop can be drawing when both axes are discrete. But this case is not usual for a lollipop." ] }, { "cell_type": "code", "execution_count": 23, "id": "f5ccf129", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = ['a', 'a', 'a', 'b', 'b']\n", "y = ['x','x', 'x', 'y', 'y']\n", "gggrid([\n", " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_lollipop(stat='count'),\n", " ggplot({'x': x, 'y': y}, aes(x='x')) + geom_lollipop(stat='count'),\n", " ggplot({'x': x, 'y': y}, aes(y='y')) + geom_lollipop(stat='count'), \n", " ggplot({'x': x, 'y': y}, aes(y='y')) + geom_lollipop(stat='count', orientation='x'), \n", " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_lollipop(stat='count', orientation='y') \n", "])" ] }, { "cell_type": "markdown", "id": "c5b003f3", "metadata": {}, "source": [ "### `stat='boxplot'`" ] }, { "cell_type": "code", "execution_count": 24, "id": "70805e17", "metadata": {}, "outputs": [], "source": [ "n = 50\n", "np.random.seed(42)\n", "x = np.random.uniform(size=n)\n", "c = np.random.choice(['a', 'b', 'c'], size=n)" ] }, { "cell_type": "code", "execution_count": 25, "id": "e27e1ed0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p1 = ggplot({'x': x, 'c': c}) + \\\n", " geom_crossbar(aes(x='c', y='x', color='c'), \\\n", " stat='boxplot') + \\\n", " geom_point(aes(x='c', y='x', color='c'), \\\n", " size=4, shape=21, fill='white',\n", " position=position_jitterdodge(seed=42))\n", "p2 = ggplot({'x': x, 'c': c}) + \\\n", " geom_crossbar(aes(x='x', y='c', color='c'), \\\n", " stat='boxplot') + \\\n", " geom_point(aes(x='x', y='c', color='c'), \\\n", " size=4, shape=21, fill='white',\n", " position=position_jitterdodge(seed=42))\n", "gggrid([p1, p2])" ] }, { "cell_type": "code", "execution_count": 26, "id": "0986ea7d", "metadata": {}, "outputs": [], "source": [ "n = 100\n", "np.random.seed(42)\n", "x = np.random.choice(['a', 'b', 'c'], size=n)\n", "y = np.random.normal(size=n)" ] }, { "cell_type": "code", "execution_count": 27, "id": "e687f5c9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_pointrange(stat='boxplot'),\n", " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_pointrange(stat='boxplot')\n", "])" ] }, { "cell_type": "markdown", "id": "6604561b", "metadata": {}, "source": [ "### `stat='boxplot_outlier'`" ] }, { "cell_type": "code", "execution_count": 28, "id": "c7b7a899", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_pointrange(stat='boxplot_outlier'),\n", " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_pointrange(stat='boxplot_outlier')\n", "])" ] }, { "cell_type": "markdown", "id": "57f10f16", "metadata": {}, "source": [ "### `stat_summary()`" ] }, { "cell_type": "code", "execution_count": 29, "id": "6bdc4826", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p1 = ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\\n", " stat_summary(position=position_nudge(x=-.1), color=\"red\") + \\\n", " stat_summary(fun='mq', fun_min='lq', fun_max='uq', quantiles=[.1, .5, .9], \\\n", " position=position_nudge(x=.1), color=\"blue\")\n", "p2 = p=ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + \\\n", " stat_summary(position=position_nudge(x=-.1), color=\"red\") + \\\n", " stat_summary(fun='mq', fun_min='lq', fun_max='uq', quantiles=[.1, .5, .9], \\\n", " position=position_nudge(x=.1), color=\"blue\")\n", "gggrid([p1, p2])" ] }, { "cell_type": "markdown", "id": "a44e0244", "metadata": {}, "source": [ "### `stat='ydensity'`" ] }, { "cell_type": "code", "execution_count": 30, "id": "3248a6b1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([ \n", " ggplot({'x': x, 'y': y}, aes('x', 'y')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..violinwidth..'), stat='ydensity'),\n", " ggplot({'x': x, 'y': y}, aes('y', 'x')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..violinwidth..'), stat='ydensity', orientation='y'),\n", " ggplot({'x': x, 'y': y}, aes('y', 'x')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..violinwidth..'), stat='ydensity')\n", "])" ] }, { "cell_type": "markdown", "id": "5296f46a", "metadata": {}, "source": [ "### `stat='ydotplot'`" ] }, { "cell_type": "code", "execution_count": 31, "id": "9d820d5e", "metadata": {}, "outputs": [], "source": [ "n = 100\n", "np.random.seed(42)\n", "x = np.random.choice(['a'], size=n)\n", "y = np.random.normal(size=n)" ] }, { "cell_type": "code", "execution_count": 32, "id": "08d6ea01", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([ \n", " ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..count..'), stat='ydotplot'),\n", " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..count..'), stat='ydotplot', orientation='y'),\n", " ggplot({'x': x, 'y': y}, aes(x='y', y='x')) + geom_crossbar(aes(ymin='..y..', ymax='..y..', width='..count..'), stat='ydotplot')\n", "])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 5 }