{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Pick Sampling" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:31:04.380867Z", "iopub.status.busy": "2024-03-26T14:31:04.380468Z", "iopub.status.idle": "2024-03-26T14:31:04.704998Z", "shell.execute_reply": "2024-03-26T14:31:04.704763Z" } }, "outputs": [], "source": [ "import random, string\n", "\n", "import numpy as np\n", "\n", "from lets_plot import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:31:04.706447Z", "iopub.status.busy": "2024-03-26T14:31:04.706333Z", "iopub.status.idle": "2024-03-26T14:31:04.708434Z", "shell.execute_reply": "2024-03-26T14:31:04.708261Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:31:04.721513Z", "iopub.status.busy": "2024-03-26T14:31:04.721331Z", "iopub.status.idle": "2024-03-26T14:31:04.722713Z", "shell.execute_reply": "2024-03-26T14:31:04.722540Z" } }, "outputs": [], "source": [ "np.random.seed(42)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:31:04.723837Z", "iopub.status.busy": "2024-03-26T14:31:04.723723Z", "iopub.status.idle": "2024-03-26T14:31:04.725904Z", "shell.execute_reply": "2024-03-26T14:31:04.725706Z" } }, "outputs": [], "source": [ "def gen_word(length):\n", " letters = string.ascii_lowercase\n", " return ''.join(random.choice(letters) for _ in range(length))\n", "\n", "def gen_word_set(n):\n", " random.seed(1)\n", " word_set = set()\n", " while len(word_set) < n:\n", " word_set.add(gen_word(5))\n", " return word_set\n", "\n", "def data(n, word_set):\n", " word_choice = list(word_set)\n", " words=[]\n", " groups = []\n", " for _ in range(n):\n", " words.append(np.random.choice(word_choice))\n", " groups.append(np.random.choice(['a','b','c']))\n", "\n", " return dict(word=words, g=groups)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:31:04.727048Z", "iopub.status.busy": "2024-03-26T14:31:04.726935Z", "iopub.status.idle": "2024-03-26T14:31:04.761694Z", "shell.execute_reply": "2024-03-26T14:31:04.761509Z" } }, "outputs": [], "source": [ "N = 500\n", "word_set = gen_word_set(N)\n", "dat = data(1000, word_set)\n", "p = ggplot(dat, aes(x='word'))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:31:04.762777Z", "iopub.status.busy": "2024-03-26T14:31:04.762700Z", "iopub.status.idle": "2024-03-26T14:31:04.796818Z", "shell.execute_reply": "2024-03-26T14:31:04.796617Z" } }, "outputs": [ { "data": { "text/html": [ " \n", " " ], "text/plain": [ "