{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Stratified sampling\n", "\n", "In large dataset a relatively small group of points might be overplotted by the dominant group. In this case **stratified** sampling can help." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:36:19.442338Z", "iopub.status.busy": "2024-04-17T07:36:19.441932Z", "iopub.status.idle": "2024-04-17T07:36:19.765711Z", "shell.execute_reply": "2024-04-17T07:36:19.765421Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "from lets_plot import *\n", "\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:36:19.779642Z", "iopub.status.busy": "2024-04-17T07:36:19.779377Z", "iopub.status.idle": "2024-04-17T07:36:19.781708Z", "shell.execute_reply": "2024-04-17T07:36:19.781479Z" } }, "outputs": [], "source": [ "N = 5000 \n", "small_group = 3\n", "large_group = N - small_group\n", "\n", "np.random.seed(123)\n", "data = dict(\n", " x = np.random.normal(0, 1, N),\n", " y = np.random.normal(0, 1, N),\n", " cond = ['A' for _ in range(small_group)] + ['B' for _ in range(large_group)]\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:36:19.782743Z", "iopub.status.busy": "2024-04-17T07:36:19.782665Z", "iopub.status.idle": "2024-04-17T07:36:19.837983Z", "shell.execute_reply": "2024-04-17T07:36:19.837776Z" } }, "outputs": [ { "data": { "text/html": [ " \n", " " ], "text/plain": [ "