{ "cells": [ { "cell_type": "markdown", "id": "1f802cac-7844-4a05-ae02-483978981980", "metadata": {}, "source": [ "# `geom_pointdensity()`" ] }, { "cell_type": "code", "execution_count": 1, "id": "7bcca330-1a27-418f-a868-508dc6b16702", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).\n" ] } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "from lets_plot import *\n", "from lets_plot.geo_data import *\n", "from lets_plot.bistro import *" ] }, { "cell_type": "code", "execution_count": 2, "id": "1c898b8b-a8e2-40d5-9fa8-b986078e9607", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 3, "id": "cafdd82d-2be0-4aec-b0d3-03fb403b5dd2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(53940, 10)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
caratcutcolorclaritydepthtablepricexyz
00.24IdealGVVS162.156.05593.974.002.47
10.58Very GoodFVVS260.057.022015.445.423.26
20.40IdealEVVS262.155.012384.764.742.95
30.43PremiumEVVS260.857.013044.924.892.98
41.55IdealESI262.355.069017.447.374.61
\n", "
" ], "text/plain": [ " carat cut color clarity depth table price x y z\n", "0 0.24 Ideal G VVS1 62.1 56.0 559 3.97 4.00 2.47\n", "1 0.58 Very Good F VVS2 60.0 57.0 2201 5.44 5.42 3.26\n", "2 0.40 Ideal E VVS2 62.1 55.0 1238 4.76 4.74 2.95\n", "3 0.43 Premium E VVS2 60.8 57.0 1304 4.92 4.89 2.98\n", "4 1.55 Ideal E SI2 62.3 55.0 6901 7.44 7.37 4.61" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_size = 1000\n", "df_full = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/diamonds.csv\")\n", "print(df_full.shape)\n", "df = df_full.sample(sample_size, random_state=42).reset_index(drop=True)\n", "df.head()" ] }, { "cell_type": "markdown", "id": "c8733b2b-ac7f-43ea-b0ca-7d2abc08b4eb", "metadata": {}, "source": [ "## Default Plot" ] }, { "cell_type": "code", "execution_count": 4, "id": "3945d441-5df9-49eb-a6a9-2fd9c5f9890c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df_full, aes(\"carat\", \"price\")) + geom_pointdensity()" ] }, { "cell_type": "markdown", "id": "c51dbe84-c2ca-43a1-b9ba-2e5a4cb578c9", "metadata": {}, "source": [ "### Synthetic Datasets" ] }, { "cell_type": "code", "execution_count": 5, "id": "3be23c2e-5e4f-4c25-ab1a-8d30c9bddec6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_mn_data(cov=[[1, 0], [0, 1]], size=250, seed=42):\n", " np.random.seed(seed)\n", " mean=[0, 0]\n", " x, y = np.random.multivariate_normal(mean, cov, size).T\n", " return {'x': x, 'y': y}\n", "\n", "def get_nu_data(size=250, seed=42):\n", " np.random.seed(seed)\n", " return {\n", " 'x': np.random.uniform(size=size),\n", " 'y': np.random.normal(size=size),\n", " }\n", "\n", "def get_nu_data(size=250, seed=42):\n", " np.random.seed(seed)\n", " return {\n", " 'x': np.random.uniform(size=size),\n", " 'y': np.random.normal(size=size),\n", " }\n", "\n", "def get_np_data(scale, size=250, seed=42):\n", " np.random.seed(seed)\n", " return {\n", " 'x': np.random.poisson(size=size) + np.random.normal(scale=scale, size=size),\n", " 'y': np.random.normal(size=size),\n", " }\n", "\n", "def get_pointdensity_plot(data, method):\n", " return ggplot(data, aes(\"x\", \"y\")) + geom_pointdensity(method=method) + ggtitle(\"Method: {0}\".format(method))\n", "\n", "gggrid([\n", " get_pointdensity_plot(get_mn_data(), 'neighbours'),\n", " get_pointdensity_plot(get_mn_data(), 'kde2d'),\n", " get_pointdensity_plot(get_mn_data(cov=[[1, 0.9], [0.9, 1]]), 'neighbours'),\n", " get_pointdensity_plot(get_mn_data(cov=[[1, 0.9], [0.9, 1]]), 'kde2d'),\n", " get_pointdensity_plot(get_mn_data(cov=[[1, -0.75], [-0.75, 1]]), 'neighbours'),\n", " get_pointdensity_plot(get_mn_data(cov=[[1, -0.75], [-0.75, 1]]), 'kde2d'),\n", " get_pointdensity_plot(get_nu_data(), 'neighbours'),\n", " get_pointdensity_plot(get_nu_data(), 'kde2d'),\n", " get_pointdensity_plot(get_np_data(.1), 'neighbours'),\n", " get_pointdensity_plot(get_np_data(.1), 'kde2d'),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "20a642e9-4166-4c0e-af2d-757ad8b18cd6", "metadata": {}, "source": [ "## Aesthetics" ] }, { "cell_type": "markdown", "id": "bb3aa8f2-2d8c-4a80-8e1d-627de3ffa9d1", "metadata": {}, "source": [ "### `weight`" ] }, { "cell_type": "code", "execution_count": 6, "id": "f027d521-f99d-44a1-9ae7-c39fd1c174ee", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "weighted_data = {\n", " 'x': [0, 0, 1],\n", " 'y': [0, 1, 0],\n", " 'w': [1, 2, 1],\n", "}\n", "tooltips_with_weights = layer_tooltips().line(\"@|@..density..\")\\\n", " .line(\"'neighbours count'|@..count..\")\\\n", " .line(\"weight|@w\")\n", "\n", "gggrid([\n", " ggplot(weighted_data, aes('x', 'y')) + \\\n", " geom_pointdensity(adjust=150, tooltips=tooltips_with_weights) + \\\n", " ggtitle(\"method='neighbors', without weights\"),\n", " ggplot(weighted_data, aes('x', 'y')) + \\\n", " geom_pointdensity(aes(weight='w'), adjust=150, tooltips=tooltips_with_weights) + \\\n", " ggtitle(\"method='neighbors', with weights\"),\n", " ggplot(weighted_data, aes('x', 'y')) + \\\n", " geom_pointdensity(method='kde2d', tooltips=tooltips_with_weights) + \\\n", " ggtitle(\"method='kde2d', without weights\"),\n", " ggplot(weighted_data, aes('x', 'y')) + \\\n", " geom_pointdensity(aes(weight='w'), method='kde2d', tooltips=tooltips_with_weights) + \\\n", " ggtitle(\"method='kde2d', with weights\"),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "f919b1ea-f763-4b00-bae1-e56e9862156c", "metadata": {}, "source": [ "### Other aesthetics" ] }, { "cell_type": "code", "execution_count": 7, "id": "c181c9e1-28f7-4062-9728-63e77a6966ed", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"carat\", \"price\")) + \\\n", " geom_pointdensity(aes(alpha=\"..density..\"), color=\"black\") + \\\n", " ggtitle(\"Custom color and alpha\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "f5c972f2-342c-4ea9-a595-44b4959d6301", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"carat\", \"price\")) + \\\n", " geom_pointdensity(aes(fill=\"..density..\"), method='kde2d', color=\"black\", shape=21) + \\\n", " ggtitle(\"Custom shape, color and fill\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "1004f407-e068-4c8d-b41b-b740c68e713b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"carat\", \"price\")) + \\\n", " geom_pointdensity(method='kde2d', shape=17, angle=90) + \\\n", " ggtitle(\"Custom shape and angle\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "a5363818-bc16-45a8-a2c0-973e27176258", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"carat\", \"price\")) + \\\n", " geom_pointdensity(aes(size=\"depth\"), method='kde2d', alpha=.2) + \\\n", " scale_size(range=[1, 5]) + \\\n", " ggtitle(\"Custom size and alpha\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "fcba1777-5a85-4461-aa97-8e8e86106002", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"carat\", \"price\")) + \\\n", " geom_pointdensity(method='kde2d', shape=1, stroke=2, alpha=.5) + \\\n", " ggtitle(\"Custom shape, stroke and alpha\")" ] }, { "cell_type": "markdown", "id": "545ea0e4-3266-4229-bbef-3e593f9d7cd3", "metadata": {}, "source": [ "## Parameters" ] }, { "cell_type": "markdown", "id": "08fd378d-5090-42f6-a776-3350fb811cca", "metadata": {}, "source": [ "### Own Parameters" ] }, { "cell_type": "markdown", "id": "24e290b7-f891-4494-8bd3-a4d33c305f7a", "metadata": {}, "source": [ "#### Unique Parameters" ] }, { "cell_type": "code", "execution_count": 12, "id": "d5abeec4-bcd8-4c83-8f99-1f24aad3f648", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity() + ggtitle(\"Default method (auto)\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='neighbours') + ggtitle(\"method='neighbours'\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + ggtitle(\"method='kde2d'\"),\n", "], ncol=3)" ] }, { "cell_type": "markdown", "id": "56fb9a45-4a9b-4ce3-a559-236ca9b165a4", "metadata": {}, "source": [ "If `method=‘auto’` (default), the selection is determined by the size of the dataset.\n", "\n", "**Note:** If grouping is used, the method is selected independently for each group:" ] }, { "cell_type": "code", "execution_count": 13, "id": "490e07d3-aef9-4370-8443-d19f6486c375", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ideal_df = df_full.assign(is_ideal_quality=(df_full[\"cut\"] == \"Ideal\").map({True: \"Quality: ideal\", False: \"Quality: not ideal\"}))\n", "\n", "ggplot(ideal_df, aes(\"carat\", \"price\")) + \\\n", " geom_pointdensity() + \\\n", " facet_grid(x=\"is_ideal_quality\")" ] }, { "cell_type": "code", "execution_count": 14, "id": "6fe4e59c-4e9c-4f6b-ae57-e35ce2c55b06", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "is_ideal_quality\n", "Quality: not ideal 32389\n", "Quality: ideal 21551\n", "Name: count, dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ideal_df[\"is_ideal_quality\"].value_counts()" ] }, { "cell_type": "markdown", "id": "b11a9e24-7349-4b4d-a38d-463adb69355b", "metadata": {}, "source": [ "#### Density-like Parameters" ] }, { "cell_type": "code", "execution_count": 15, "id": "b6900a2a-efd0-4952-aa64-45ac28d4624a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g = ggplot(df, aes(\"carat\", \"price\")) + scale_color_continuous(low=\"darkgreen\", high=\"tomato\")\n", "\n", "gggrid([\n", " g + geom_pointdensity(method='kde2d') + ggtitle(\"Default kernel\"),\n", " g + geom_pointdensity(method='kde2d', kernel='cosine') + ggtitle(\"kernel='cosine'\"),\n", " g + geom_pointdensity(method='kde2d', kernel='optcosine') + ggtitle(\"kernel='optcosine'\"),\n", " g + geom_pointdensity(method='kde2d', kernel='rectangular') + ggtitle(\"kernel='rectangular'\"),\n", " g + geom_pointdensity(method='kde2d', kernel='triangular') + ggtitle(\"kernel='triangular'\"),\n", " g + geom_pointdensity(method='kde2d', kernel='biweight') + ggtitle(\"kernel='biweight'\"),\n", " g + geom_pointdensity(method='kde2d', kernel='epanechikov') + ggtitle(\"kernel='epanechikov'\"),\n", "], ncol=3)" ] }, { "cell_type": "code", "execution_count": 16, "id": "f663dc03-63e1-45e9-93d8-5923e87f10e5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + ggtitle(\"Default adjust\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d', adjust=10) + ggtitle(\"adjust=10\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d', adjust=.1) + ggtitle(\"adjust=0.1\"),\n", "], ncol=2)" ] }, { "cell_type": "code", "execution_count": 17, "id": "81f132a8-bd01-430b-8d70-2165baf7a0e5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + ggtitle(\"Default bw\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d', bw='nrd0') + ggtitle(\"bw='nrd0'\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d', bw=[1, 5000]) + ggtitle(\"bw=[1, 5000]\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d', bw=[.02, 500]) + ggtitle(\"bw=[.02, 500]\"),\n", "], ncol=2)" ] }, { "cell_type": "code", "execution_count": 18, "id": "9d0cb45a-c8e5-41a5-bdbd-6f14a7acefc7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + ggtitle(\"Default n\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d', n=3) + ggtitle(\"n=3\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d', n=999) + ggtitle(\"n=999\"),\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "d6e09504-15bd-492c-896d-ab0518d2bbfb", "metadata": {}, "source": [ "### Standard Parameters" ] }, { "cell_type": "code", "execution_count": 19, "id": "76cb1240-1a0e-488a-9707-e34e745e1ada", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + ggtitle(\"Default position\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d', position=position_nudge(y=5_000)) + \\\n", " coord_cartesian(ylim=[5_000, 25_000]) + ggtitle(\"position=position_nudge(y=5_000)\"),\n", "])" ] }, { "cell_type": "code", "execution_count": 20, "id": "89f845e5-98dc-4641-9750-143e962bc7bf", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + ggtitle(\"Default show_legend\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d', show_legend=False) + ggtitle(\"show_legend=False\"),\n", "])" ] }, { "cell_type": "code", "execution_count": 21, "id": "dfb94554-8c9a-4f63-bdac-bf0c41b08dec", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(alpha=\"..density..\")) + \\\n", " geom_pointdensity(aes(\"carat\", \"price\"), method='kde2d') + \\\n", " ggtitle(\"Default inherit_aes\"),\n", " ggplot(df, aes(alpha=\"..density..\")) + \\\n", " geom_pointdensity(aes(\"carat\", \"price\"), method='kde2d', inherit_aes=False) + \\\n", " ggtitle(\"inherit_aes=False\"),\n", "])" ] }, { "cell_type": "code", "execution_count": 22, "id": "31c463aa-896d-4881-a4ae-13d410479ad4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + ggtitle(\"Default manual_key\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d', manual_key=\"point density\") + ggtitle('manual_key=\"point density\"'),\n", "])" ] }, { "cell_type": "code", "execution_count": 23, "id": "d7626da8-947f-4fdb-a498-bea2dceb5d89", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"carat\", \"price\")) + \\\n", " geom_pointdensity(method='kde2d') + \\\n", " coord_cartesian(xlim=[0, 5], ylim=[0, 20_000]) + \\\n", " ggtitle(\"Default sampling\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + \\\n", " geom_pointdensity(method='kde2d', sampling=sampling_random(int(sample_size / 10), seed=42)) + \\\n", " coord_cartesian(xlim=[0, 5], ylim=[0, 20_000]) + \\\n", " ggtitle(\"sampling=sampling_random(250, seed=42)\"),\n", "])" ] }, { "cell_type": "code", "execution_count": 24, "id": "0891eebe-4155-4a91-9681-bcac6913eb68", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pointdensity_tooltips = layer_tooltips().title(\"@cut diamond\")\\\n", " .line(\"position|(^x, ^y)\")\\\n", " .line(\"@|@..density..\")\\\n", " .line(\"count|@..count..\")\\\n", " .line(\"scaled|@..scaled..\")\n", "ggplot(df, aes(\"carat\", \"price\")) + \\\n", " geom_pointdensity(method='kde2d', tooltips=pointdensity_tooltips) + \\\n", " ggtitle(\"Custom tooltips\")" ] }, { "cell_type": "code", "execution_count": 25, "id": "e2bbf9cf-fa4d-44e7-a162-f620902632ba", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"carat\", \"price\")) + \\\n", " geom_pointdensity(aes(paint_a=\"..density..\", paint_b=\"..density..\"),\n", " method='kde2d', shape=21,\n", " color_by='paint_a', fill_by='paint_b') + \\\n", " scale_brewer('paint_a', palette=\"Reds\") + \\\n", " scale_brewer('paint_b', palette=\"Oranges\") + \\\n", " ggtitle(\"Use of color_by and fill_by\")" ] }, { "cell_type": "markdown", "id": "9427ed0f-a0e8-4db1-b8ab-535e5e67a2a3", "metadata": {}, "source": [ "#### `map`, `map_join`, `use_crs`" ] }, { "cell_type": "code", "execution_count": 26, "id": "fbb84821-2810-4b14-8b66-dae352a545c6", "metadata": {}, "outputs": [], "source": [ "def get_border_gdf(country_name):\n", " return geocode_countries(country_name).get_boundaries(15)\n", "\n", "def get_cities_gdf(country_name, border_gdf):\n", " result = geocode_cities().scope(country_name).get_centroids()\n", " return result[result[\"geometry\"].intersects(border_gdf[\"geometry\"].iloc[0])]\n", "\n", "def get_cities_df(cities_gdf, *, value_limit=1_000_000, seed=42):\n", " np.random.seed(seed)\n", " return pd.DataFrame({\n", " \"name\": cities_gdf[\"found name\"],\n", " \"value\": np.random.randint(value_limit, size=cities_gdf.shape[0])\n", " })" ] }, { "cell_type": "code", "execution_count": 27, "id": "8ecb4bdb-f629-40f6-80de-72133d198c26", "metadata": {}, "outputs": [], "source": [ "country_name = \"Greece\"" ] }, { "cell_type": "code", "execution_count": 28, "id": "e82599c5-e418-4470-a202-702e1a3e5a04", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "EPSG:4326\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countryfound namegeometry
0GreeceGreeceMULTIPOLYGON (((19.41069 39.83865, 19.40278 39...
\n", "
" ], "text/plain": [ " country found name geometry\n", "0 Greece Greece MULTIPOLYGON (((19.41069 39.83865, 19.40278 39..." ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "border_gdf = get_border_gdf(country_name)\n", "print(border_gdf.crs)\n", "border_gdf" ] }, { "cell_type": "code", "execution_count": 29, "id": "4f5696dd-b5fd-49b2-86c8-bccf73b2e7cb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(562, 3)\n", "EPSG:4326\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cityfound namegeometry
0AthensAthensPOINT (23.74147 37.99086)
1StonařovStonařovPOINT (23.74147 37.99086)
2VoulaVoulaPOINT (23.77161 37.84796)
3Municipal Unit of NikaiaMunicipal Unit of NikaiaPOINT (23.63583 37.97915)
4Municipal Unit of Nea FiladelfeiaMunicipal Unit of Nea FiladelfeiaPOINT (23.74095 38.0449)
\n", "
" ], "text/plain": [ " city found name \\\n", "0 Athens Athens \n", "1 Stonařov Stonařov \n", "2 Voula Voula \n", "3 Municipal Unit of Nikaia Municipal Unit of Nikaia \n", "4 Municipal Unit of Nea Filadelfeia Municipal Unit of Nea Filadelfeia \n", "\n", " geometry \n", "0 POINT (23.74147 37.99086) \n", "1 POINT (23.74147 37.99086) \n", "2 POINT (23.77161 37.84796) \n", "3 POINT (23.63583 37.97915) \n", "4 POINT (23.74095 38.0449) " ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cities_gdf = get_cities_gdf(country_name, border_gdf)\n", "print(cities_gdf.shape)\n", "print(cities_gdf.crs)\n", "cities_gdf.head()" ] }, { "cell_type": "code", "execution_count": 30, "id": "5fd35d3a-a6a4-43a3-aa64-2e98c3a1a3e6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(562, 2)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namevalue
0Athens121958
1Stonařov671155
2Voula131932
3Municipal Unit of Nikaia365838
4Municipal Unit of Nea Filadelfeia259178
\n", "
" ], "text/plain": [ " name value\n", "0 Athens 121958\n", "1 Stonařov 671155\n", "2 Voula 131932\n", "3 Municipal Unit of Nikaia 365838\n", "4 Municipal Unit of Nea Filadelfeia 259178" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cities_df = get_cities_df(cities_gdf)\n", "print(cities_df.shape)\n", "cities_df.head()" ] }, { "cell_type": "code", "execution_count": 31, "id": "a5c1f732-0ced-4d3a-8dfb-f28da68540c5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot() + \\\n", " geom_map(data=border_gdf) + \\\n", " geom_pointdensity(map=cities_gdf) + \\\n", " scale_color_viridis(guide='none') + \\\n", " ggtitle(\"map=cities_gdf\"),\n", " ggplot() + \\\n", " geom_map(data=border_gdf) + \\\n", " geom_pointdensity(aes(size=\"value\"), data=cities_df, map=cities_gdf, map_join=[\"name\", \"found name\"]) + \\\n", " scale_color_viridis(guide='none') + \\\n", " scale_size(range=[1, 3]) + \\\n", " ggtitle('map_join=[\"name\", \"found name\"]'),\n", " ggplot() + \\\n", " geom_map(data=border_gdf, use_crs=3857) + \\\n", " geom_pointdensity(aes(size=\"value\"), data=cities_df, map=cities_gdf, map_join=[\"name\", \"found name\"], use_crs=3857) + \\\n", " scale_color_viridis(guide='none') + \\\n", " scale_size(range=[1, 3]) + \\\n", " ggtitle(\"use_crs=3857\"),\n", "]) + ggtb()" ] }, { "cell_type": "markdown", "id": "65e3b540-47f2-431a-8009-9452d5825585", "metadata": {}, "source": [ "## Stat" ] }, { "cell_type": "markdown", "id": "bc053d4e-b7d3-40b9-8364-fc061b481725", "metadata": {}, "source": [ "### `stat='identity'`" ] }, { "cell_type": "code", "execution_count": 32, "id": "0bcd0840-ce58-4614-bb7e-a33b6e0206fe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + ggtitle(\"Default stat\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(stat='identity') + ggtitle(\"stat='identity'\"),\n", "])" ] }, { "cell_type": "markdown", "id": "84081cff-1ed3-4a9e-bf83-375d4975c187", "metadata": {}, "source": [ "### `'pointdensity'` Stat" ] }, { "cell_type": "code", "execution_count": 33, "id": "51e61173-0c99-404f-b602-efcbf1db94b5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_bin2d() + ggtitle(\"geom_bin2d() with default stat\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_bin2d(aes(fill=\"..density..\"), stat='pointdensity', method='kde2d', width=3, height=3, size=3) + ggtitle(\"geom_bin2d(stat='pointdensity')\"),\n", "])" ] }, { "cell_type": "markdown", "id": "f3be237b-6a7b-4632-9791-2692f7ebfb68", "metadata": {}, "source": [ "## Interaction with other layers" ] }, { "cell_type": "markdown", "id": "b39dcee2-7210-4c41-8b76-71371d842531", "metadata": {}, "source": [ "### `ggmarginal()`" ] }, { "cell_type": "code", "execution_count": 34, "id": "04c02645-a939-4d9a-8c91-3ff64d9b2bd6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + ggmarginal(\"tr\", layer=geom_density())" ] }, { "cell_type": "markdown", "id": "1a26b5ec-bfab-4a94-9687-27905e143376", "metadata": {}, "source": [ "### `ggtb()`" ] }, { "cell_type": "code", "execution_count": 35, "id": "478e39d5-9a57-4763-bdc7-67599bbd6332", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + ggtb()" ] }, { "cell_type": "markdown", "id": "729d7529-d468-44db-b289-d23cfeb57d2a", "metadata": {}, "source": [ "### Facets" ] }, { "cell_type": "code", "execution_count": 36, "id": "66263f24-19d2-4abf-b8e3-3cd5efda28f4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + facet_grid(x=\"cut\")" ] }, { "cell_type": "markdown", "id": "0fa5051a-b900-40ca-90c7-576e8db42455", "metadata": {}, "source": [ "### Coordinate Systems" ] }, { "cell_type": "code", "execution_count": 37, "id": "b16c3f5b-13be-4db8-93a9-7d0aafd0179d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gggrid([\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + ggtitle(\"Default coordinate system\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + coord_flip() + ggtitle(\"coord_flip()\"),\n", " ggplot(df, aes(\"carat\", \"price\")) + geom_pointdensity(method='kde2d') + coord_polar() + ggtitle(\"coord_polar()\"),\n", "])" ] }, { "cell_type": "markdown", "id": "6e7020de-0425-4a47-983e-17e9b020dd3d", "metadata": {}, "source": [ "### Livemap" ] }, { "cell_type": "code", "execution_count": 38, "id": "bf93c0fa-ffd2-4db7-bc1f-2344bd47c0b9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot() + \\\n", " geom_livemap(data_size_zoomin=2) + \\\n", " geom_pointdensity(aes(size=\"value\"), data=cities_df, map=cities_gdf, map_join=[\"name\", \"found name\"]) + \\\n", " scale_color_viridis(guide='none') + \\\n", " scale_size(range=[1, 3])" ] }, { "cell_type": "markdown", "id": "80225ee4-26a0-4c09-9e9b-241445791edf", "metadata": {}, "source": [ "### Themes" ] }, { "cell_type": "code", "execution_count": 39, "id": "8b96cd30-d860-4670-b9ff-3261244343c5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"carat\", \"price\")) + \\\n", " geom_pointdensity(method='kde2d') + \\\n", " scale_color_brewer(palette=\"Oranges\") + \\\n", " theme_minimal() + \\\n", " theme(legend_position='bottom') + \\\n", " flavor_darcula()" ] }, { "cell_type": "markdown", "id": "f990d3b5-e715-4461-a8c5-00b3901673db", "metadata": {}, "source": [ "### Joint Plot" ] }, { "cell_type": "code", "execution_count": 40, "id": "03f3ab85-3009-4de8-b373-685e028a1aaf", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "joint_plot(df, \"carat\", \"price\", geom='pointdensity', marginal=\"box:tr:.02\")" ] }, { "cell_type": "markdown", "id": "062c06d9-6cfa-429e-b5f0-7d3c712d0caf", "metadata": {}, "source": [ "### Residual Plot" ] }, { "cell_type": "code", "execution_count": 41, "id": "d8342ad4-8bb8-4c2a-8074-5bef18a271f7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "residual_plot(df, \"carat\", \"price\", geom='pointdensity')" ] }, { "cell_type": "markdown", "id": "3d5987ce-6869-4b24-9153-5e25289ffbbe", "metadata": {}, "source": [ "## Tests" ] }, { "cell_type": "markdown", "id": "71679921-fe85-423b-a998-da85681f7e5f", "metadata": {}, "source": [ "### Boundary Tests" ] }, { "cell_type": "code", "execution_count": 42, "id": "2e4e163a-6998-47b7-beb4-4753c60b00b8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tests = [\n", " {\n", " 'title': \"Empty data\",\n", " 'data': {\n", " 'x': [],\n", " 'y': [],\n", " }\n", " },\n", " {\n", " 'title': \"One element\",\n", " 'data': {\n", " 'x': [0],\n", " 'y': [0],\n", " }\n", " },\n", " {\n", " 'title': \"NaN's in data\",\n", " 'data': {\n", " 'x': [0, np.nan, None, 1, 2],\n", " 'y': [0, 3, 4, np.nan, None],\n", " }\n", " },\n", "]\n", "\n", "gggrid([\n", " ggplot(t['data'], aes('x', 'y')) + \\\n", " geom_pointdensity() + \\\n", " ggtitle(t['title'])\n", " for t in tests\n", "], ncol=2)" ] }, { "cell_type": "markdown", "id": "836be9b1-115a-4b27-902c-1b701c58bdfd", "metadata": {}, "source": [ "### Emptiness Tests" ] }, { "cell_type": "code", "execution_count": 43, "id": "21c558b2-1ae7-4478-813a-26b27468d319", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_mapping(var):\n", " if var is None:\n", " return aes('x', 'y')\n", " else:\n", " return aes('x', 'y', color=var, fill=var)\n", "\n", "p_working = lambda var: ggplot({'x': [0], 'y': [0]}, get_mapping(var))\n", "\n", "p_no_data = lambda var: ggplot(mapping=aes('x', 'y', color=var, fill=var)) if var is not None else ggplot(mapping=aes('x', 'y'))\n", "\n", "p_empty = lambda var: ggplot({'x': [], 'y': []}, get_mapping(var))\n", "\n", "p_facet_nan = lambda var: ggplot({'x': [0, np.nan], 'y': [0, 0], 'g': [\"A\", \"B\"]}, get_mapping(var)) + facet_grid(x='g')\n", "\n", "p_facet_cross = lambda var: ggplot({'x': [0, 0], 'y': [0, 0], 'g1': [\"A\", \"B\"], 'g2': [\"C\", \"D\"]}, get_mapping(var)) + facet_grid(x='g1', y='g2')\n", "\n", "p_group = ggplot({'x': [0, np.nan], 'y': [0, 0], 'g': [\"A\", \"B\"]}, aes('x', 'y', color='g', fill='g'))\n", "\n", "p_nan = lambda var: ggplot({'x': [np.nan], 'y': [np.nan]}, get_mapping(var))\n", "\n", "p_lim = lambda var: ggplot({'x': [0], 'y': [0]}, get_mapping(var)) + xlim(1, 2)\n", "\n", "gggrid([\n", " p_working(\"..density..\") + geom_pointdensity(),\n", " p_no_data(\"..density..\") + geom_pointdensity(),\n", " p_empty(\"..density..\") + geom_pointdensity(),\n", " p_facet_nan(\"..density..\") + geom_pointdensity(),\n", " p_facet_cross(\"..density..\") + geom_pointdensity(),\n", " p_group + geom_pointdensity(),\n", " p_nan(\"..density..\") + geom_pointdensity(),\n", " p_lim(\"..density..\") + geom_pointdensity(),\n", "], ncol=2)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 5 }