{ "cells": [ { "cell_type": "markdown", "id": "84c51e6c-88ba-4407-a5d1-115003f13557", "metadata": {}, "source": [ "# `geom_hex()`" ] }, { "cell_type": "code", "execution_count": 1, "id": "8f9af6c1-dc1b-4107-a929-f549e7b93254", "metadata": {}, "outputs": [], "source": [ "from math import sqrt\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "from lets_plot import *" ] }, { "cell_type": "code", "execution_count": 2, "id": "a2d79609-1445-4d99-b477-0924895df143", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 3, "id": "683129fa-1837-45db-b509-2aa852ff2e53", "metadata": {}, "outputs": [], "source": [ "height_coeff = sqrt(3) / 2" ] }, { "cell_type": "code", "execution_count": 4, "id": "fb8948e6-b688-4186-b006-1cd28d2e6d92", "metadata": {}, "outputs": [], "source": [ "data = {\n", " 'x': [-10, -10, 10, 9, 11],\n", " 'y': [v * height_coeff for v in [-10, 10, -10, 9, 11]],\n", " 'w': [2, 4, 8, 1, 1],\n", "}" ] }, { "cell_type": "code", "execution_count": 5, "id": "c754d42e-a8b2-4295-a538-b64a42c6f588", "metadata": {}, "outputs": [], "source": [ "identity_data = {\n", " 'x': [-.5, .5, 0],\n", " 'y': [0, 0, height_coeff],\n", " 'g': [1, 1, 2],\n", " 'h': [1] * 3,\n", "}" ] }, { "cell_type": "code", "execution_count": 6, "id": "58260cf7-9f94-49c5-8655-b7a86598ad8b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(234, 12)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0manufacturermodeldisplyearcyltransdrvctyhwyflclass
01audia41.819994auto(l5)f1829pcompact
12audia41.819994manual(m5)f2129pcompact
23audia42.020084manual(m6)f2031pcompact
34audia42.020084auto(av)f2130pcompact
45audia42.819996auto(l5)f1626pcompact
\n", "
" ], "text/plain": [ " Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy \\\n", "0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 \n", "1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 \n", "2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 \n", "3 4 audi a4 2.0 2008 4 auto(av) f 21 30 \n", "4 5 audi a4 2.8 1999 6 auto(l5) f 16 26 \n", "\n", " fl class \n", "0 p compact \n", "1 p compact \n", "2 p compact \n", "3 p compact \n", "4 p compact " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/mpg.csv\")\n", "print(df.shape)\n", "df.head()" ] }, { "cell_type": "markdown", "id": "26f02601-bb90-49e0-a853-f2bad492156a", "metadata": {}, "source": [ "## Basic example with default stat" ] }, { "cell_type": "code", "execution_count": 7, "id": "6ff99fe3-b134-4de0-aaf8-b69bbaf3be24", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"cty\", \"hwy\")) + geom_hex()" ] }, { "cell_type": "markdown", "id": "f7615dd7-b19e-4030-b53e-c291ea124299", "metadata": {}, "source": [ "## Basic example with `'identity'` stat" ] }, { "cell_type": "code", "execution_count": 8, "id": "fcd2cfc7-df74-4a15-affa-160bdd352a28", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(identity_data, aes('x', 'y', fill='g')) + geom_hex(aes(height='h'), stat='identity')" ] }, { "cell_type": "markdown", "id": "5bbf774f-cf7e-4d1b-a357-bc57c9b1b011", "metadata": {}, "source": [ "## Aesthetics" ] }, { "cell_type": "code", "execution_count": 9, "id": "48a5758d-94ce-4de6-afc0-afdde1f80911", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(data, aes('x', 'y')) + geom_hex(binwidth=[20, 20]) + geom_point(color='red') + ggtitle(\"Default\"),\n", " ggplot(data, aes('x', 'y')) + geom_hex(width=.5, binwidth=[20, 20]) + geom_point(color='red') + ggtitle(\"width=.5\"),\n", " ggplot(data, aes('x', 'y')) + geom_hex(height=.5, binwidth=[20, 20]) + geom_point(color='red') + ggtitle(\"height=.5\"),\n", " ggplot(data, aes('x', 'y')) + geom_hex(aes(weight='w'), binwidth=[20, 20]) + geom_text(aes(label='w'), color='red') + ggtitle(\"weight='w'\"),\n", " ggplot(data, aes('x', 'y')) + geom_hex(binwidth=[20, 20], alpha=.25) + geom_point(color='red') + ggtitle(\"alpha=.25\"),\n", " ggplot(data, aes('x', 'y')) + geom_hex(binwidth=[20, 20], size=1, color='red', linetype='longdash') + geom_point(color='red') + ggtitle(\"size=2, color='red', linetype='longdash'\"),\n", " ggplot(data, aes('x', 'y')) + geom_hex(aes(fill='..density..'), binwidth=[20, 20]) + geom_point(color='red') + ggtitle(\"fill='..density..'\"),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "af30479a-8aaa-4ed9-8f33-4db2360763bc", "metadata": {}, "source": [ "## Parameters" ] }, { "cell_type": "markdown", "id": "8ab580b4-0cc0-4c55-8c02-1605965e9ead", "metadata": {}, "source": [ "### `bins`/`binwidth`" ] }, { "cell_type": "code", "execution_count": 10, "id": "b3227e26-7292-494d-a491-0dfa8f2543ec", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"cty\", \"hwy\")) + geom_hex(),\n", " ggplot(df, aes(\"cty\", \"hwy\")) + geom_hex(bins=[6, 7]),\n", " ggplot(df, aes(\"cty\", \"hwy\")) + geom_hex(binwidth=[5, 5]),\n", "])" ] }, { "cell_type": "markdown", "id": "8f00661e-c60e-41c4-a474-898398d7e973", "metadata": {}, "source": [ "### `drop`" ] }, { "cell_type": "code", "execution_count": 11, "id": "15eceaca-ad19-42d7-bed6-c82b6009d0e1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"cty\", \"hwy\")) + geom_hex(),\n", " ggplot(df, aes(\"cty\", \"hwy\")) + geom_hex(drop=False),\n", "])" ] }, { "cell_type": "markdown", "id": "6b41acf5-80e8-4210-be71-d8fb3a96ac82", "metadata": {}, "source": [ "### `tooltips`" ] }, { "cell_type": "code", "execution_count": 12, "id": "f3f2f003-29a6-4eb4-b498-1086041fe192", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"cty\", \"hwy\")) + \\\n", " geom_hex(tooltips=layer_tooltips().title(\"(^x, ^y)\")\\\n", " .line(\"count|@..count..\")\\\n", " .line(\"density|@..density..\").format(\"@..density..\", \".3~f\"))" ] }, { "cell_type": "markdown", "id": "d31c3798-8398-4237-8074-de7e850fd7f3", "metadata": {}, "source": [ "### `position`" ] }, { "cell_type": "code", "execution_count": 13, "id": "67870a38-62de-46fe-b0f0-0ad10b118de1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"cty\", \"hwy\")) + geom_hex(binwidth=[4, 4]),\n", " ggplot(df, aes(\"cty\", \"hwy\")) + geom_hex(binwidth=[4, 4], position=position_nudge(x=2, y=2)),\n", "])" ] }, { "cell_type": "markdown", "id": "9b172f14-f480-4aae-b795-f23d163147cb", "metadata": {}, "source": [ "### `orientation`" ] }, { "cell_type": "code", "execution_count": 14, "id": "f9472020-d13f-4a69-8aae-5d2a4f61f75c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(data, aes('x', 'y')) + geom_hex(binwidth=[20, 20]) + geom_point(color='red'),\n", " ggplot(data, aes('x', 'y')) + geom_hex(binwidth=[20, 20], orientation='y') + geom_point(color='red'),\n", "])" ] }, { "cell_type": "markdown", "id": "caee424e-2415-4adb-a8dc-b99054a29369", "metadata": {}, "source": [ "### `inherit_aes`" ] }, { "cell_type": "code", "execution_count": 15, "id": "8f71f883-fa7e-4e61-a82e-3221e7dfe678", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(identity_data, aes(color='g')) + \\\n", " geom_hex(aes('x', 'y', fill='g', height='h'), stat='identity', size=2, alpha=.3),\n", " ggplot(identity_data, aes(color='g')) + \\\n", " geom_hex(aes('x', 'y', fill='g', height='h'), stat='identity', size=2, alpha=.3, inherit_aes=False),\n", "])" ] }, { "cell_type": "markdown", "id": "df3e8ba4-149c-479b-af69-4ae0aab1e6ce", "metadata": {}, "source": [ "### `manual_key`" ] }, { "cell_type": "code", "execution_count": 16, "id": "162007b6-a2eb-4afd-8802-d8b89439e086", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"cty\", \"hwy\")) + geom_hex(manual_key=\"Manual key\")" ] }, { "cell_type": "markdown", "id": "f71e48e4-b196-4736-9832-b995fa923e00", "metadata": {}, "source": [ "### `sampling`" ] }, { "cell_type": "code", "execution_count": 17, "id": "d59e0923-a7fc-4fcb-b078-8ff972242734", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# At least it works as well as it does for `geom_bin2d()`\n", "\n", "def sampling_plot(sampling):\n", " return ggplot(df, aes(\"cty\", \"hwy\")) + \\\n", " geom_hex(sampling=sampling) + \\\n", " ggtitle(str(sampling).replace(\"\\n\", \" \"))\n", "\n", "gggrid([\n", " sampling_plot(sampling_pick(5)),\n", " sampling_plot(sampling_random(5, seed=42)),\n", " sampling_plot(sampling_systematic(5)),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "ea0cb0fa-4fb3-4a44-b115-4c888b5c8967", "metadata": {}, "source": [ "### `color_by`/`fill_by`" ] }, { "cell_type": "code", "execution_count": 18, "id": "b709ddd9-ccf2-4afa-b6de-5939a1943991", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"cty\", \"hwy\")) + \\\n", " geom_hex(aes(paint_a=\"..count..\", paint_b=\"..count..\"),\n", " binwidth=[3, 3], size=1, color_by='paint_a', fill_by='paint_b') + \\\n", " scale_gradient('paint_a', low=\"black\", high=\"red\", guide=guide_colorbar(title=\"count (border)\")) + \\\n", " scale_gradient('paint_b', low=\"black\", high=\"yellow\", guide=guide_colorbar(title=\"count (figure)\"))" ] }, { "cell_type": "markdown", "id": "c431f82d-716c-4ebd-9413-f2d615a378e3", "metadata": {}, "source": [ "## Change geometry" ] }, { "cell_type": "code", "execution_count": 19, "id": "48c28f56-b9d2-4637-ae33-393b56eddc44", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"cty\", \"hwy\")) + \\\n", " geom_point(aes(color='..count..'), stat='binhex', binwidth=[3, 3],\n", " shape=17, size=3, size_unit='x') + \\\n", " coord_fixed()" ] }, { "cell_type": "markdown", "id": "e1140048-788d-4cb7-a208-132351690877", "metadata": {}, "source": [ "## With other layers" ] }, { "cell_type": "code", "execution_count": 20, "id": "a19e9755-0258-4264-8fc8-49fa7da79713", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"cty\", \"hwy\")) + \\\n", " geom_hex(binwidth=[5, 5], color=\"white\", size=.5) + \\\n", " geom_point(color=\"white\", size=1.5) + \\\n", " ggmarginal('tr', layer=geom_histogram(binwidth=5, color=\"lightgray\", fill=\"white\")) + \\\n", " facet_grid(x=\"year\") + \\\n", " scale_x_continuous(breaks=list(range(0, 40, 5))) + \\\n", " scale_y_continuous(breaks=list(range(0, 50, 5))) + \\\n", " xlim(0, 40) + ylim(0, 50) + \\\n", " theme_minimal() + \\\n", " flavor_darcula()" ] }, { "cell_type": "code", "execution_count": 21, "id": "b0dd419a-54d6-45ac-9e5b-71b953e0bb70", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"cty\", \"hwy\")) + \\\n", " geom_hex(binwidth=[5, 5]) + \\\n", " ggtitle(\"Default coord\"),\n", " ggplot(df, aes(\"cty\", \"hwy\")) + \\\n", " geom_hex(binwidth=[5, 5]) + \\\n", " coord_flip() + \\\n", " ggtitle(\"coord_flip()\"),\n", " ggplot(df, aes(\"cty\", \"hwy\")) + \\\n", " geom_hex(binwidth=[5, 5]) + \\\n", " coord_polar() + \\\n", " ggtitle(\"coord_polar()\"),\n", "])" ] }, { "cell_type": "markdown", "id": "ea89af62-bf98-4583-8fd0-ae37b9921d9c", "metadata": {}, "source": [ "## Tests" ] }, { "cell_type": "code", "execution_count": 22, "id": "4b98bdeb-bedb-468f-885c-5d6603ece69d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_grid_data(n, m):\n", " return {\n", " 'x': [i for i in range(n) for j in range(m)],\n", " 'y': [j * height_coeff for i in range(n) for j in range(m)],\n", " }\n", "\n", "def get_grid_plot(n, m, w, h):\n", " return ggplot(get_grid_data(n, m), aes('x', 'y')) + \\\n", " geom_hex(binwidth=[w, h]) + \\\n", " geom_point(color=\"red\") + \\\n", " ggtitle(\"binwidth=[{0}, {1}]\".format(w, h))\n", "\n", "gggrid([\n", " get_grid_plot(2, 2, 1, 1),\n", " get_grid_plot(2, 2, 2, 2),\n", " get_grid_plot(4, 4, 2, 2),\n", " get_grid_plot(4, 4, 1, 4),\n", " get_grid_plot(4, 4, 4, 1),\n", "], ncol=2)" ] }, { "cell_type": "code", "execution_count": 23, "id": "3e74115b-224d-4c51-966c-dfd473b9f4af", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tests = [\n", " {\n", " 'title': \"Empty data\",\n", " 'data': {\n", " 'x': [],\n", " 'y': [],\n", " }\n", " },\n", " {\n", " 'title': \"One element\",\n", " 'data': {\n", " 'x': [0],\n", " 'y': [0],\n", " }\n", " },\n", " {\n", " 'title': \"NaN's in data\",\n", " 'data': {\n", " 'x': [0, 1, np.nan, None, 1, 1],\n", " 'y': [0, 1, 1, 1, np.nan, None],\n", " }\n", " },\n", " {\n", " 'title': \"Empty data, identity stat\",\n", " 'data': {\n", " 'x': [],\n", " 'y': [],\n", " },\n", " 'stat': 'identity'\n", " },\n", " {\n", " 'title': \"One element, identity stat\",\n", " 'data': {\n", " 'x': [0],\n", " 'y': [0],\n", " },\n", " 'stat': 'identity'\n", " },\n", " {\n", " 'title': \"NaN's in data, identity stat\",\n", " 'data': {\n", " 'x': [0, 1, np.nan, None, 1, 1],\n", " 'y': [0, 1, 1, 1, np.nan, None],\n", " },\n", " 'stat': 'identity'\n", " },\n", "]\n", "\n", "gggrid([\n", " ggplot(t['data'], aes('x', 'y')) + \\\n", " geom_hex(stat=t['stat'] if 'stat' in t else None) + \\\n", " ggtitle(t['title'])\n", " for t in tests\n", "], ncol=2)" ] }, { "cell_type": "code", "execution_count": 24, "id": "8ecf39d2-1fcc-417b-b3db-761926c3de45", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_test_plot(x, y):\n", " h = 2 * height_coeff\n", " test_df = pd.DataFrame({\n", " 'x': [-2, 0, -3, -1, 1, -2, 0] + [x],\n", " 'y': [-h, -h, 0, 0, 0, h, h] + [y],\n", " 'g': [\"center\"] * 7 + [\"border\"]\n", " })\n", " return ggplot() + \\\n", " geom_hex(aes('x', 'y'), data=test_df, binwidth=[2, 2], size=.5, color=\"black\") + \\\n", " geom_point(aes('x', 'y'), data=test_df[test_df[\"g\"] == \"border\"], size=5, shape=21, fill=\"red\") + \\\n", " scale_fill_gradient(low=\"blue\", high=\"yellow\", guide='none') + \\\n", " coord_fixed(ratio=1) + \\\n", " theme_void()\n", "\n", "hh = 1 / height_coeff\n", "gggrid([\n", " get_test_plot(0, hh),\n", " get_test_plot(0.5, 3 * hh / 4),\n", " get_test_plot(1, hh / 2),\n", " get_test_plot(1, 0),\n", " get_test_plot(1, -hh / 2),\n", " get_test_plot(0.5, -3 * hh / 4),\n", " get_test_plot(0, -hh),\n", " get_test_plot(-0.5, -3 * hh / 4),\n", " get_test_plot(-1, -hh / 2),\n", " get_test_plot(-1, 0),\n", " get_test_plot(-1, hh / 2),\n", " get_test_plot(-0.5, 3 * hh / 4),\n", "], ncol=4) + ggsize(600, 450)" ] }, { "cell_type": "code", "execution_count": null, "id": "6fde491e-df0c-4a24-85f6-d6b2aa14bf22", "metadata": {}, "outputs": [], "source": [ "# There's just enough here that there are no falls\n", "def get_test_plot(df, xcol, ycol, bin_info, df_name, data_limit=100):\n", " df = df.sort_values(by=[ycol, xcol]).iloc[:data_limit]\n", " xrange = df[xcol].max() - df[xcol].min()\n", " yrange = df[ycol].max() - df[ycol].min()\n", " if yrange == 0:\n", " return None\n", " if bin_info[\"type\"] == \"bins\":\n", " params = {\"bins\": bin_info[\"value\"]}\n", " title = \"{name}\\nbins={bin}\".format(name=df_name, bin=bin_info[\"value\"])\n", " else:\n", " bin_w, bin_h = bin_info[\"value\"]\n", " binwidth = [bin_w * xrange, bin_h * yrange]\n", " params = {\"binwidth\": binwidth}\n", " title = \"{name}\\nbinwidth={bin}\".format(name=df_name, bin=\"[{0:.2g}, {1:.2g}]\".format(binwidth[0], binwidth[1]))\n", " return ggplot(df, aes(xcol, ycol)) + \\\n", " geom_hex(show_legend=False, **params) + \\\n", " geom_point(color=\"red\") + \\\n", " coord_fixed(ratio=xrange/yrange) + \\\n", " ggtitle(title)\n", "\n", "def get_test_plots(data_test, bin_test):\n", " url = data_test[\"url\"]\n", " cols = data_test[\"cols\"]\n", " df_name = url.split(\"/\")[-1].replace(\".csv\", \"\")\n", " df = pd.read_csv(url, encoding='utf-8', encoding_errors='ignore', on_bad_lines='skip')\n", " return [p for p in [\n", " get_test_plot(\n", " df,\n", " cols[i],\n", " cols[i+1],\n", " bin_test,\n", " df_name\n", " )\n", " for i in range(len(cols) - 1)\n", " ] if p is not None]\n", "\n", "data_tests = [\n", " {\n", " \"url\": \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/ToothGrowth.csv\",\n", " \"cols\": [\"len\", \"dose\"],\n", " },\n", " {\n", " \"url\": \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/US_household_income_2017.csv\",\n", " \"cols\": [\"ALand\", \"AWater\", \"Lat\", \"Lon\", \"Mean\", \"Median\", \"Stdev\", \"sum_w\"],\n", " },\n", " {\n", " \"url\": \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/airquality.csv\",\n", " \"cols\": [\"Ozone\", \"Solar.R\", \"Wind\", \"Temp\"],\n", " },\n", " {\n", " \"url\": \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/chemical_elements.csv\",\n", " \"cols\": [\"Atomic Weight\", \"Ionic Radius\", \"Atomic Radius\", \"Electronegativity\", \"First Ionization Potential\", \"Density\", \"Melting Point (K)\", \"Boiling Point (K)\", \"Specific Heat Capacity\"],\n", " },\n", " {\n", " \"url\": \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/diamonds.csv\",\n", " \"cols\": [\"carat\", \"depth\", \"price\"],\n", " },\n", " {\n", " \"url\": \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/economics.csv\",\n", " \"cols\": [\"pce\", \"pop\", \"psavert\", \"uempmed\", \"unemploy\"],\n", " },\n", " {\n", " \"url\": \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/iris.csv\",\n", " \"cols\": [\"sepal_length\", \"sepal_width\", \"petal_length\", \"petal_width\"],\n", " },\n", " {\n", " \"url\": \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/midwest.csv\",\n", " \"cols\": [\"area\", \"poptotal\", \"popdensity\", \"inmetro\"],\n", " },\n", " {\n", " \"url\": \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/mpg.csv\",\n", " \"cols\": [\"displ\", \"cyl\", \"cty\", \"hwy\"],\n", " },\n", " {\n", " \"url\": \"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/penguins.csv\",\n", " \"cols\": [\"bill_length_mm\", \"bill_depth_mm\", \"flipper_length_mm\", \"body_mass_g\"],\n", " },\n", "]\n", "\n", "bin_tests = [\n", " {\"type\": \"bins\", \"value\": [1, 1]},\n", " {\"type\": \"bins\", \"value\": [1, 2]},\n", " {\"type\": \"bins\", \"value\": [2, 1]},\n", " {\"type\": \"bins\", \"value\": [2, 2]},\n", " {\"type\": \"bins\", \"value\": [5, 5]},\n", " {\"type\": \"binwidth\", \"value\": [1, 1]},\n", " {\"type\": \"binwidth\", \"value\": [.99, .99]},\n", " {\"type\": \"binwidth\", \"value\": [1.01, 1.01]},\n", " {\"type\": \"binwidth\", \"value\": [.2, .2]},\n", "]\n", "\n", "gggrid([\n", " p\n", " for data_test in data_tests\n", " for bin_test in bin_tests\n", " for p in get_test_plots(data_test, bin_test)\n", "], ncol=3)" ] }, { "cell_type": "markdown", "id": "7532cfc5-0f8d-47d2-b9ff-438693319abc", "metadata": {}, "source": [ "## Problems" ] }, { "cell_type": "markdown", "id": "1cb6f6ad-b79a-4b73-b423-338fd00f3666", "metadata": {}, "source": [ "### Wrong computations" ] }, { "cell_type": "code", "execution_count": null, "id": "004cba4e-80c1-4e6d-83a7-b61005150cff", "metadata": {}, "outputs": [], "source": [ "# Fixed\n", "test_data1 = {\n", " 'x': [0, 0, 1, 1],\n", " 'y': [0, 1, 0, 1],\n", "}\n", "\n", "ggplot(test_data1, aes('x', 'y')) + \\\n", " geom_hex(binwidth=[1, 2]) + \\\n", " geom_point(color=\"red\")" ] }, { "cell_type": "code", "execution_count": null, "id": "c3f40874-3588-4455-b0f1-36c4758cbde7", "metadata": {}, "outputs": [], "source": [ "# Fixed\n", "def get_test2_plot(xcoeff, ycoeff):\n", " test_data2 = {\n", " 'x': [-xcoeff, xcoeff, 0],\n", " 'y': [-6*ycoeff/height_coeff, 0, ycoeff/height_coeff],\n", " }\n", " return ggplot(test_data2, aes('x', 'y')) + \\\n", " geom_hex(binwidth=[xcoeff, 2*ycoeff]) + \\\n", " geom_point(color=\"red\") + \\\n", " coord_cartesian() + \\\n", " ggtitle(\"x stretch: {0:.2g}\\ny stretch: {1:.2g}\".format(xcoeff, ycoeff))\n", "\n", "gggrid([\n", " get_test2_plot(1, 1),\n", " get_test2_plot(1e-15, 1e-15),\n", " get_test2_plot(1e-15, 1e15),\n", " get_test2_plot(1e15, 1e-15),\n", " get_test2_plot(1e15, 1e15),\n", "], ncol=2)" ] }, { "cell_type": "code", "execution_count": null, "id": "a1a21a1d-0ca0-48dc-9f92-15cb47a06c6e", "metadata": {}, "outputs": [], "source": [ "# Fixed\n", "test_data3 = {\n", " 'x': [-10, -10, 10, 9, 11],\n", " 'y': [v * height_coeff for v in [-10, 10, -10, 9, 11]],\n", "}\n", "\n", "def get_test3_plot(bins):\n", " return ggplot(test_data3, aes('x', 'y')) + \\\n", " geom_hex(bins=bins, drop=False) + \\\n", " geom_point(color=\"red\") + \\\n", " ggtitle(\"bins={0}\".format(bins))\n", "\n", "gggrid([\n", " get_test3_plot([1, 1]),\n", " get_test3_plot([1, 2]),\n", " get_test3_plot([2, 1]),\n", " get_test3_plot([2, 2]),\n", " get_test3_plot([1, 3]),\n", " get_test3_plot([2, 3]),\n", " get_test3_plot([3, 1]),\n", " get_test3_plot([3, 2]),\n", " get_test3_plot([3, 3]),\n", "], ncol=3)" ] }, { "cell_type": "markdown", "id": "1b1e5f3f-33fb-42d5-80c3-01037278225e", "metadata": {}, "source": [ "### Too much stretching of the hexagons" ] }, { "cell_type": "code", "execution_count": null, "id": "550ccc49-ef07-4a9c-8b0e-283dbc93300f", "metadata": {}, "outputs": [], "source": [ "# Fixed when units is 'identity'\n", "test_data = {\n", " 'x': [-10, -10, 10, 9, 11],\n", " 'y': [v * height_coeff for v in [-10, 10, -10, 9, 11]],\n", "}\n", "\n", "ggplot(test_data, aes('x', 'y')) + \\\n", " geom_hex(binwidth=[10, 10]) + \\\n", " geom_point(color='red')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 5 }