{ "cells": [ { "cell_type": "markdown", "id": "7acf6e12-4e40-4245-93c0-228bb5e544ef", "metadata": {}, "source": [ "# `geom_sina()`" ] }, { "cell_type": "code", "execution_count": 1, "id": "92a98b49-3573-407c-84ab-bfca2ab66d42", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "from lets_plot import *" ] }, { "cell_type": "code", "execution_count": 2, "id": "7a0e2977-ec8b-4823-b94e-f930dd03aba7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 3, "id": "1e2264e4-0a1a-486f-84c0-c4e0d01ad50c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(234, 12)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0manufacturermodeldisplyearcyltransdrvctyhwyflclass
01audia41.819994auto(l5)f1829pcompact
12audia41.819994manual(m5)f2129pcompact
23audia42.020084manual(m6)f2031pcompact
34audia42.020084auto(av)f2130pcompact
45audia42.819996auto(l5)f1626pcompact
\n", "
" ], "text/plain": [ " Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy \\\n", "0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 \n", "1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 \n", "2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 \n", "3 4 audi a4 2.0 2008 4 auto(av) f 21 30 \n", "4 5 audi a4 2.8 1999 6 auto(l5) f 16 26 \n", "\n", " fl class \n", "0 p compact \n", "1 p compact \n", "2 p compact \n", "3 p compact \n", "4 p compact " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/mpg.csv\")\n", "print(df.shape)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "d4431292-ac3f-4cc4-a0a3-939b4a2ee25e", "metadata": {}, "outputs": [], "source": [ "xaes, yaes = \"drv\", \"cty\"" ] }, { "cell_type": "markdown", "id": "297cb9f3-29d6-48f4-a090-3da9a6978854", "metadata": {}, "source": [ "## Default plot" ] }, { "cell_type": "code", "execution_count": 5, "id": "4d21ccb5-b119-488d-9b74-8c1323b70de8", "metadata": {}, "outputs": [], "source": [ "vparams = dict(size=0, fill=\"#dddddd\")\n", "sparams = dict(seed=42)\n", "g = ggplot(df, aes(xaes, yaes))\n", "v = g + geom_violin(**vparams)\n", "s = v + geom_sina(**sparams)" ] }, { "cell_type": "code", "execution_count": 6, "id": "348abc30-eda7-4a42-b9e0-909ba3a363da", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s" ] }, { "cell_type": "markdown", "id": "07079c2d-d0d1-42d3-a200-86e55dd55000", "metadata": {}, "source": [ "## Parameters and aesthetics" ] }, { "cell_type": "markdown", "id": "c3fe88cf-f97d-4d4f-8df1-40d317192d28", "metadata": {}, "source": [ "### \"jitter\" parameters and aesthetics" ] }, { "cell_type": "markdown", "id": "9a429742-e579-4689-942c-0a48ed44bec3", "metadata": {}, "source": [ "#### `shape`/`stroke`" ] }, { "cell_type": "code", "execution_count": 7, "id": "d31a9a98-23f8-4f88-b3ef-d80094292ad2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " v + geom_sina(aes(shape=xaes), **sparams) + ggtitle(\"shape aesthetic\"),\n", " v + geom_sina(shape=22, **sparams) + ggtitle(\"shape parameter\"),\n", " v + geom_sina(aes(stroke=xaes), shape=21, **sparams) + scale_stroke(range=[.5, 2]) + ggtitle(\"stroke aesthetic\"),\n", " v + geom_sina(shape=21, stroke=2, **sparams) + ggtitle(\"stroke parameter\"),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "54cb408a-3a90-4d6e-8303-13d5df59f811", "metadata": {}, "source": [ "#### `seed`" ] }, { "cell_type": "code", "execution_count": 8, "id": "7349b20e-dddb-46f0-bd7a-154005dc2538", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " v + geom_sina(seed=0) + ggtitle(\"seed=0\"),\n", " v + geom_sina(seed=0) + ggtitle(\"seed=0\"),\n", " v + geom_sina(seed=1) + ggtitle(\"seed=1\"),\n", "])" ] }, { "cell_type": "markdown", "id": "12c85b6d-ad8e-4885-b36f-63c5bd0bfa6d", "metadata": {}, "source": [ "### \"violin\" parameters and aesthetics" ] }, { "cell_type": "code", "execution_count": 9, "id": "7e9018ce-3fc9-4b1a-8efe-db9188584588", "metadata": {}, "outputs": [], "source": [ "def violin_sina_plot(**params):\n", " seed = 42\n", " return g + \\\n", " geom_violin(**{**vparams, **params}) + \\\n", " geom_sina(seed=seed, **params) + \\\n", " ggtitle(\"\\n\".join([\"{0}: {1}\".format(k, v) for k, v in dict(params).items()]))" ] }, { "cell_type": "markdown", "id": "1d492fce-4d74-427e-91dd-e5bfb4128c0d", "metadata": {}, "source": [ "#### `weight` aesthetic" ] }, { "cell_type": "code", "execution_count": 10, "id": "1d72be17-9634-49a0-b70e-e71220636bcd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def weight_plot():\n", " size = 1.5\n", " seed = 42\n", " return g + \\\n", " geom_violin(size=0, fill=\"red\", alpha=.1) + \\\n", " geom_violin(aes(weight=\"displ\"), size=0, fill=\"blue\", alpha=.1) + \\\n", " geom_sina(color=\"red\", size=size, **sparams) + \\\n", " geom_sina(aes(weight=\"displ\"), color=\"blue\", size=size, **sparams) + \\\n", " ggsize(1000, 500)\n", "\n", "weight_plot()" ] }, { "cell_type": "markdown", "id": "70b673b1-83df-402e-b021-e469ed839be1", "metadata": {}, "source": [ "#### `orientation`" ] }, { "cell_type": "code", "execution_count": 11, "id": "bdeb7ba1-d7cc-4eb1-ba89-61bf88d59539", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(yaes, xaes, fill=xaes)) + geom_violin(alpha=.25) + geom_sina(shape=21, **sparams)" ] }, { "cell_type": "markdown", "id": "e984935d-9337-4ea3-9e97-d352c7cb93dd", "metadata": {}, "source": [ "#### `quantiles`" ] }, { "cell_type": "code", "execution_count": 12, "id": "0099cb04-1c2b-485b-9988-5eb189516233", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def quantiles_plot(quantiles):\n", " seed = 42\n", " return g + \\\n", " geom_violin(aes(fill=\"..quantile..\"), alpha=1/3, quantile_lines=True, quantiles=quantiles) + \\\n", " geom_sina(aes(color=\"..quantile..\"), shape=1, size=2, quantiles=quantiles, **sparams) + \\\n", " scale_continuous([\"color\", \"fill\"], low=\"yellow\", high=\"blue\") + \\\n", " ggtitle(\"quantiles={0}\".format(quantiles))\n", "\n", "gggrid([\n", " quantiles_plot(None),\n", " quantiles_plot([.1, .5, .9]),\n", " quantiles_plot([.5]),\n", "], ncol=1) + ggsize(600, 1200)" ] }, { "cell_type": "markdown", "id": "584d3beb-c06f-4b38-81ec-220c4c61b7f8", "metadata": {}, "source": [ "#### `show_half`" ] }, { "cell_type": "code", "execution_count": 13, "id": "f44eb828-8a71-4470-9052-e83436ea7939", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g + \\\n", " geom_violin(show_half=-1, **vparams) + \\\n", " geom_sina(show_half=1, **sparams)" ] }, { "cell_type": "markdown", "id": "5ba0b6c4-6fe2-41e9-9f64-297df98b87d1", "metadata": {}, "source": [ "#### `scale`" ] }, { "cell_type": "code", "execution_count": 14, "id": "95c58bc5-a93f-4de2-8292-ed7ab45ec8a7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " violin_sina_plot(scale='area'),\n", " violin_sina_plot(scale='width'),\n", " violin_sina_plot(scale='count'),\n", "], ncol=1) + ggsize(600, 900)" ] }, { "cell_type": "markdown", "id": "df9c1484-50ef-422c-90bf-5d761467ae65", "metadata": {}, "source": [ "#### `kernel`" ] }, { "cell_type": "code", "execution_count": 15, "id": "9242c04d-d00d-43ef-854d-5e10d8c1c460", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " violin_sina_plot(kernel=None),\n", " violin_sina_plot(kernel='gaussian'),\n", " violin_sina_plot(kernel='epanechikov'),\n", " violin_sina_plot(kernel='triangular'),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "ea79b201-16de-40cc-bf2c-7482ad4fe3fa", "metadata": {}, "source": [ "#### `bw`" ] }, { "cell_type": "code", "execution_count": 16, "id": "5a8ca7e8-7dfb-415b-a6d2-4a6098277006", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " violin_sina_plot(bw=None),\n", " violin_sina_plot(bw='nrd0'),\n", " violin_sina_plot(bw='nrd'),\n", " violin_sina_plot(bw=2),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "ffb536e6-9c5d-49bf-94dc-dae35043644e", "metadata": {}, "source": [ "#### `adjust`" ] }, { "cell_type": "code", "execution_count": 17, "id": "15505d10-d917-41d8-bdd4-eb177fa4bc98", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " violin_sina_plot(adjust=None),\n", " violin_sina_plot(adjust=2),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "d803bd70-c911-47b2-ac13-efad83021f94", "metadata": {}, "source": [ "### Common parameters and aesthetics" ] }, { "cell_type": "markdown", "id": "1c6345fe-258d-44c4-882e-8a04d26f4ce3", "metadata": {}, "source": [ "#### `alpha`/`color`/`fill`/`size` aesthetics" ] }, { "cell_type": "code", "execution_count": 18, "id": "377697af-b787-4d88-b63d-782863d41d7f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " v + geom_sina(aes(alpha=xaes), **sparams) + ggtitle(\"alpha aesthetic\"),\n", " v + geom_sina(alpha=.5, **sparams) + ggtitle(\"alpha parameter\"),\n", " v + geom_sina(aes(color=xaes), **sparams) + ggtitle(\"color aesthetic\"),\n", " v + geom_sina(color=\"red\", **sparams) + ggtitle(\"color parameter\"),\n", " v + geom_sina(aes(fill=xaes), shape=21, **sparams) + ggtitle(\"fill aesthetic\"),\n", " v + geom_sina(fill=\"red\", shape=21, **sparams) + ggtitle(\"fill parameter\"),\n", " v + geom_sina(aes(size=xaes), **sparams) + ggtitle(\"size aesthetic\"),\n", " v + geom_sina(size=1.5, **sparams) + ggtitle(\"size parameter\"),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "9e89457e-20dc-4742-bda2-70d749cf02a3", "metadata": {}, "source": [ "#### `width` aesthetic" ] }, { "cell_type": "code", "execution_count": 19, "id": "3153fbf6-8a44-4899-9f38-99e6a4d6270e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " g + geom_violin() + geom_sina(**sparams) + ggtitle(\"Default plot\"),\n", " g + geom_violin(aes(width=xaes)) + geom_sina(aes(width=xaes), **sparams) + ggtitle(\"width aesthetic\"),\n", " g + geom_violin(width=1/3) + geom_sina(width=1/3, **sparams) + ggtitle(\"width parameter\"),\n", "], ncol=1) + ggsize(600, 900)" ] }, { "cell_type": "markdown", "id": "50e05177-77b1-49f6-af1c-9be7a6f90b7f", "metadata": {}, "source": [ "#### `position`" ] }, { "cell_type": "code", "execution_count": 20, "id": "87d01aa1-3a1d-4ddf-bc38-663aebd867b1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def position_plot(position):\n", " data = {\n", " 'x': ['a'] * 6,\n", " 'y': [1, 1, 2, 1, 2, 2],\n", " 'g': ['x'] * 3 + ['y'] * 3,\n", " }\n", " return ggplot(data, aes('x', 'y', color='g', fill='g')) + \\\n", " geom_violin(alpha=.25, position=position) + \\\n", " geom_sina(shape=21, color=\"black\", position=position, **sparams) + \\\n", " ggtitle(\"position={0}\".format(position))\n", "\n", "gggrid([\n", " position_plot(None),\n", " position_plot('dodge'),\n", " position_plot('identity'),\n", " position_plot(position_jitter(seed=42)),\n", " position_plot(position_jitterdodge(seed=42)),\n", " position_plot(position_nudge(x=.2)),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "0c9ddf44-d129-47b0-8513-c449a199b205", "metadata": {}, "source": [ "#### `show_legend`" ] }, { "cell_type": "code", "execution_count": 21, "id": "5ee0c1f6-0a6e-4708-b783-68864ae433b2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " v + geom_sina(aes(color=xaes), **sparams) + ggtitle(\"Default\"),\n", " v + geom_sina(aes(color=xaes), show_legend=False, **sparams) + ggtitle(\"show_legend=False\"),\n", "])" ] }, { "cell_type": "markdown", "id": "226627fd-8b08-476f-82d2-9efb57ecc3fd", "metadata": {}, "source": [ "#### `inherit_aes`" ] }, { "cell_type": "code", "execution_count": 22, "id": "4c8fb4e8-28a2-42c7-9a76-47401401acc3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(color=xaes)) + \\\n", " geom_violin(aes(xaes, yaes)) + \\\n", " geom_sina(aes(xaes, yaes), **sparams) + \\\n", " ggtitle(\"Default\"),\n", " ggplot(df, aes(color=xaes)) + \\\n", " geom_violin(aes(xaes, yaes)) + \\\n", " geom_sina(aes(xaes, yaes), inherit_aes=False, **sparams) + \\\n", " ggtitle(\"inherit_aes=False\"),\n", "])" ] }, { "cell_type": "markdown", "id": "87e49e42-b505-4678-82ff-51e7834a685c", "metadata": {}, "source": [ "#### `manual_key`" ] }, { "cell_type": "code", "execution_count": 23, "id": "fca832d7-6345-484b-835b-f4869b5f18bb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "v + geom_sina(manual_key=layer_key(\"Sina\"), **sparams)" ] }, { "cell_type": "markdown", "id": "ff65967a-378a-439d-924b-e1665e0a5902", "metadata": {}, "source": [ "#### `sampling`" ] }, { "cell_type": "code", "execution_count": 24, "id": "9d49b26d-2015-449c-a4dd-f34f30ead251", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " v + geom_sina(**sparams) + ggtitle(\"Default\"),\n", " v + geom_sina(sampling=sampling_random(20, seed=42), **sparams) + ggtitle(\"sampling_random\"),\n", "])" ] }, { "cell_type": "markdown", "id": "8718fef6-3167-4e02-b225-fdfd44f8de82", "metadata": {}, "source": [ "#### `tooltips`" ] }, { "cell_type": "code", "execution_count": 25, "id": "3840fbb9-99dd-4920-af61-c98f2fe83c43", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_tooltips(title):\n", " return layer_tooltips().title(title).line(\"(^x, ^y)\")\\\n", " .line(\"@|@..violinwidth..\")\\\n", " .line(\"@|@..density..\")\\\n", " .line(\"@|@..count..\")\\\n", " .line(\"@|@..scaled..\")\\\n", " .line(\"@|@..quantile..\")\n", "\n", "g + geom_violin(tooltips=get_tooltips(\"Violin\"), **vparams) + \\\n", " geom_sina(tooltips=get_tooltips(\"Sina\"), **sparams)" ] }, { "cell_type": "markdown", "id": "7f3a5817-d9cd-4201-9f57-18fe532784d0", "metadata": {}, "source": [ "#### `color_by`/`fill_by`" ] }, { "cell_type": "code", "execution_count": 26, "id": "046d9a87-f318-4008-a2ad-09e24842f975", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "v + geom_sina(aes(paint_a=xaes, paint_b=xaes), shape=21, color_by='paint_a', fill_by='paint_b', **sparams) + \\\n", " scale_brewer('paint_a', name=\"Color: paint_a='Dark2'\", palette='Dark2') + \\\n", " scale_brewer('paint_b', name=\"Fill: paint_b='Set2'\", palette='Set2')" ] }, { "cell_type": "markdown", "id": "0e54f82c-d827-4177-a790-e78acaa31946", "metadata": {}, "source": [ "## Stat" ] }, { "cell_type": "markdown", "id": "8aabc235-6476-4dfc-bd9d-0a58d56cc903", "metadata": {}, "source": [ "### `stat='identity'`" ] }, { "cell_type": "code", "execution_count": 27, "id": "9ef949b5-0a83-4659-ac9d-26a1866959ec", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def identity_data():\n", " return {\n", " 'x': (['a'] * 3 + ['b'] * 3) * 2,\n", " 'y': [1, 2, 3, 1.1, 2.1, 3.1] * 2,\n", " 'violinwidth': [.2, .4, .3] * 2 + [.5, .6, .4] * 2,\n", " 'quantile': [.3, .6, 1] * 4,\n", " 'group': ['s'] * 6 + ['t'] * 6,\n", " }\n", "\n", "def get_identity_tooltips(title):\n", " return layer_tooltips().title(title).line(\"(@x, @y)\")\\\n", " .line(\"@|@violinwidth\")\\\n", " .line(\"@|@quantile\")\n", "\n", "ggplot(identity_data()) + \\\n", " geom_violin(aes('x', 'y', violinwidth='violinwidth', quantile='quantile', fill='group'),\n", " alpha=.25, stat='identity', tooltips=get_identity_tooltips(\"Violin\")) + \\\n", " geom_sina(aes('x', 'y', violinwidth='violinwidth', quantile='quantile', color='group'),\n", " stat='identity', tooltips=get_identity_tooltips(\"Sina\"), **sparams)" ] }, { "cell_type": "markdown", "id": "fe5946f4-48e7-4634-9723-916457371ec4", "metadata": {}, "source": [ "### `'sina'` stat" ] }, { "cell_type": "code", "execution_count": 28, "id": "141482c7-f396-40e6-a3fa-c3bf49eef4e5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g + geom_violin(color=\"red\", fill=\"red\", alpha=.1, manual_key=layer_key(\"stat='ydensity'\")) + \\\n", " geom_violin(stat='sina', color=\"blue\", alpha=0, manual_key=layer_key(\"stat='sina'\")) + \\\n", " geom_errorbar(aes(ymin='..y..', ymax='..y..', width='..violinwidth..'), stat='sina', color=\"blue\") + \\\n", " geom_point(color=\"blue\", size=2) + \\\n", " ggsize(1000, 500)" ] }, { "cell_type": "markdown", "id": "b5afeca7-de96-48d1-963e-87e7d45df1ba", "metadata": {}, "source": [ "## Interaction with other layers" ] }, { "cell_type": "markdown", "id": "6c3bed2f-f56d-463a-adca-7cdc7055a11c", "metadata": {}, "source": [ "### `ggtb()`" ] }, { "cell_type": "code", "execution_count": 29, "id": "b1490b53-659c-41e4-9a8e-6d2355cc5904", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s + ggtb()" ] }, { "cell_type": "markdown", "id": "e65757ba-f5d3-4715-a1e0-dd5118a158bb", "metadata": {}, "source": [ "### Facets" ] }, { "cell_type": "code", "execution_count": 30, "id": "0cd6614f-8426-493e-9297-cb25926bbe5d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s + facet_wrap(facets=xaes, ncol=3, scales='free')" ] }, { "cell_type": "markdown", "id": "01f70517-8068-4dc1-85cf-104c02fd1ac6", "metadata": {}, "source": [ "### Scales" ] }, { "cell_type": "code", "execution_count": 31, "id": "065af3ae-1af7-4d0d-b90f-3ed99c5944e6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s + scale_y_continuous(name=\"City miles per gallon\", limits=[0, 50], trans='sqrt')" ] }, { "cell_type": "markdown", "id": "2d3eeaea-c530-41e2-8c94-7389285b7e43", "metadata": {}, "source": [ "### Coordinate Systems" ] }, { "cell_type": "code", "execution_count": 32, "id": "2d7e83ed-0294-4bac-8477-e3fa08504136", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " s + ggtitle(\"Default\"),\n", " s + coord_flip() + ggtitle(\"coord_flip()\"),\n", " s + coord_polar() + ggtitle(\"coord_polar()\"),\n", "], ncol=1) + ggsize(800, 1600)" ] }, { "cell_type": "markdown", "id": "9568471c-6d45-4113-871d-67bca599cc33", "metadata": {}, "source": [ "### Themes" ] }, { "cell_type": "code", "execution_count": 33, "id": "cfd39bad-8f88-4b32-9868-cce459c864b0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s + theme_bw() + theme(panel_background=element_rect(fill=\"#ffffcc\")) + flavor_solarized_light()" ] }, { "cell_type": "markdown", "id": "7859218c-79fc-4113-9ac9-6cd709ddc1f0", "metadata": {}, "source": [ "## Tests" ] }, { "cell_type": "code", "execution_count": 34, "id": "fc89ebdd-f85c-483c-bad6-9da996814f24", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tests = [\n", " {\n", " 'title': \"Empty data\",\n", " 'data': {\n", " 'x': [],\n", " 'y': [],\n", " }\n", " },\n", " {\n", " 'title': \"One element\",\n", " 'data': {\n", " 'x': ['a'],\n", " 'y': [0],\n", " }\n", " },\n", " {\n", " 'title': \"Continuous x\",\n", " 'data': {\n", " 'x': [0, 1],\n", " 'y': [0, 0],\n", " }\n", " },\n", " {\n", " 'title': \"NaN's in data\",\n", " 'data': {\n", " 'x': ['a', 'b', np.nan, None, 'b', 'b'],\n", " 'y': [0, 1, 1, 1, np.nan, None],\n", " }\n", " },\n", " {\n", " 'title': \"Empty data, identity stat\",\n", " 'data': {\n", " 'x': [],\n", " 'y': [],\n", " },\n", " 'stat': 'identity'\n", " },\n", " {\n", " 'title': \"One element, identity stat\",\n", " 'data': {\n", " 'x': ['a'],\n", " 'y': [0],\n", " },\n", " 'stat': 'identity'\n", " },\n", " {\n", " 'title': \"Continuous x, identity stat\",\n", " 'data': {\n", " 'x': [0, 1],\n", " 'y': [0, 0],\n", " },\n", " 'stat': 'identity'\n", " },\n", " {\n", " 'title': \"NaN's in data, identity stat\",\n", " 'data': {\n", " 'x': ['a', 'b', np.nan, None, 'b', 'b'],\n", " 'y': [0, 1, 1, 1, np.nan, None],\n", " },\n", " 'stat': 'identity'\n", " },\n", "]\n", "\n", "gggrid([\n", " ggplot(t['data'], aes('x', 'y')) + \\\n", " geom_sina(stat=t['stat'] if 'stat' in t else None, **sparams) + \\\n", " ggtitle(t['title'])\n", " for t in tests\n", "], ncol=2)" ] }, { "cell_type": "code", "execution_count": 35, "id": "6c10d242-2450-4204-b239-7c91e76fd75b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_mapping(var):\n", " if var is None:\n", " return aes('x', 'y')\n", " else:\n", " return aes('x', 'y', color=var, fill=var)\n", "\n", "p_working = lambda var: ggplot({'x': [0], 'y': [0]}, get_mapping(var))\n", "\n", "p_no_data = lambda var: ggplot(mapping=aes('x', 'y', color=var, fill=var)) if var is not None else ggplot(mapping=aes('x', 'y'))\n", "\n", "p_empty = lambda var: ggplot({'x': [], 'y': []}, get_mapping(var))\n", "\n", "p_facet_nan = lambda var: ggplot({'x': [0, np.nan], 'y': [0, 0], 'g': [\"A\", \"B\"]}, get_mapping(var)) + facet_grid(x='g')\n", "\n", "p_facet_cross = lambda var: ggplot({'x': [0, 0], 'y': [0, 0], 'g1': [\"A\", \"B\"], 'g2': [\"C\", \"D\"]}, get_mapping(var)) + facet_grid(x='g1', y='g2')\n", "\n", "p_group = ggplot({'x': [0, np.nan], 'y': [0, 0], 'g': [\"A\", \"B\"]}, aes('x', 'y', color='g', fill='g'))\n", "\n", "p_nan = lambda var: ggplot({'x': [np.nan], 'y': [np.nan]}, get_mapping(var))\n", "\n", "p_lim = lambda var: ggplot({'x': [0], 'y': [0]}, get_mapping(var)) + xlim(1, 2)\n", "\n", "gggrid([\n", " p_working(\"..violinwidth..\") + geom_sina(**sparams),\n", " p_no_data(\"..violinwidth..\") + geom_sina(**sparams),\n", " p_empty(\"..violinwidth..\") + geom_sina(**sparams),\n", " p_facet_nan(\"..violinwidth..\") + geom_sina(**sparams),\n", " p_facet_cross(\"..violinwidth..\") + geom_sina(**sparams),\n", " p_group + geom_sina(**sparams),\n", " p_nan(\"..violinwidth..\") + geom_sina(**sparams),\n", " p_lim(\"..violinwidth..\") + geom_sina(**sparams),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "e0f8b1e9-5aec-4b7b-ad49-b1a754fc5b0c", "metadata": {}, "source": [ "### Regressions" ] }, { "cell_type": "code", "execution_count": 36, "id": "37e8eaf9-574e-4e57-b46e-b1dd47fe33bb", "metadata": {}, "outputs": [], "source": [ "def regression_data1():\n", " np.random.seed(42)\n", " return {\n", " \"val\": np.random.randint(5, size=18),\n", " }" ] }, { "cell_type": "code", "execution_count": 37, "id": "a5dae600-96d8-4d09-a3bc-edbce7b02fed", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(regression_data1(), aes(y=\"val\")) + \\\n", " geom_violin(**vparams) + \\\n", " geom_sina(tooltips=layer_tooltips([\"..y..\", \"..violinwidth..\"]),\n", " **sparams) + \\\n", " ggtitle(\"All points should be inside the violin\")" ] }, { "cell_type": "code", "execution_count": 38, "id": "febf71fd-451b-44ef-9a5e-9cf70d1b1c7a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(regression_data1(), aes(y=\"val\")) + \\\n", " geom_violin(**vparams) + \\\n", " geom_sina(seed=0) + \\\n", " ggtitle(\"All points should be inside the plot panel\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 5 }