{ "cells": [ { "cell_type": "markdown", "id": "24039ebc-170b-41bb-a0aa-95d223486c29", "metadata": {}, "source": [ "# Removed Data Points Messages by sampling\n", "\n", "Small examples for checking computation messages from sampling." ] }, { "cell_type": "code", "execution_count": 1, "id": "07eaa2cd-b165-4af5-8aea-1f1284d627a3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "from lets_plot import *\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 4, "id": "d59908e1-fc2e-4fad-9713-9a54cb893f13", "metadata": {}, "outputs": [], "source": [ "def data1(n, seed=123):\n", " np.random.seed(seed)\n", " cov = [[1, -.8], \n", " [-.8, 1]]\n", " x, y = np.random.multivariate_normal(mean=[0, 0], cov=cov, size=n).T\n", " return {\"x\": x, \"y\": y}" ] }, { "cell_type": "markdown", "id": "f2287203-2ba1-40f7-b344-f10b8f455fc4", "metadata": {}, "source": [ "## Message format" ] }, { "cell_type": "markdown", "id": "aeb5d1ae-fb8b-4aac-b697-f8760daec7aa", "metadata": {}, "source": [ "One sampling function" ] }, { "cell_type": "code", "execution_count": 8, "id": "b80cbd8b-e254-4d30-9f76-ca75dbaa3dc6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(data1(75000), aes('x', 'y')) + \\\n", " geom_point(sampling=sampling_random(500, seed=42)) " ] }, { "cell_type": "markdown", "id": "829b7646-adda-4645-b279-d155203ed32d", "metadata": {}, "source": [ "Multiple sampling function" ] }, { "cell_type": "code", "execution_count": 9, "id": "e408acce-f301-46b6-901b-fab9f94e715d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(data1(75000), aes('x', 'y')) + \\\n", " geom_point(sampling=sampling_random(500, seed=42) + sampling_systematic(100) + sampling_pick(50)) " ] }, { "cell_type": "markdown", "id": "ef9b141b-0b2c-48fd-a5d7-cbad809f75be", "metadata": {}, "source": [ "## Hide messages" ] }, { "cell_type": "markdown", "id": "eb6c1aef-10f7-4d48-8608-b54dfebe296f", "metadata": {}, "source": [ "Parameter `na_rm` hide messages for one layer" ] }, { "cell_type": "code", "execution_count": 10, "id": "2492004e-c713-4b8a-9757-b05b885d19e3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(data1(75000), aes('x', 'y')) + \\\n", " geom_point(sampling=sampling_random(200, seed=42), na_rm=True) + \\\n", " geom_point(sampling=sampling_random(500, seed=42) + sampling_systematic(100) + sampling_pick(50)) " ] }, { "cell_type": "markdown", "id": "a98b3830-e736-41d4-a832-6b7740125778", "metadata": {}, "source": [ "`theme(plot_message=element_blank())` hide all messages on plot" ] }, { "cell_type": "code", "execution_count": 12, "id": "e84bd77e-2a49-4fa7-9324-2bed55e60ec1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(data1(75000), aes('x', 'y')) + \\\n", " geom_point(sampling=sampling_random(200, seed=42)) + \\\n", " geom_point(sampling=sampling_random(500, seed=42) + sampling_systematic(100) + sampling_pick(50)) + \\\n", " theme(plot_message=element_blank())" ] }, { "cell_type": "code", "execution_count": 6, "id": "03d50424-acec-4c1b-8163-504d95cb8322", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame({\n", " \"id\": list(range(1, 11)),\n", " \"x\": [4, np.nan, 1, 9, 6, 2, 10, np.nan, 7, 5],\n", " \"y\": [7, 1, 9, 10, 4, np.nan, 3, np.nan, 6, 5],\n", " \"start\": [0,0,0,0,0,0,0,0,0,0]\n", "})" ] }, { "cell_type": "code", "execution_count": 7, "id": "e1207bd9-39bd-4196-a045-37f8033bb843", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes('x', 'y')) + \\\n", " geom_point(stat='count', color = 'red') + \\\n", " geom_point(sampling=sampling_random(7, 42), color='blue')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.20" } }, "nbformat": 4, "nbformat_minor": 5 }