{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Pick Sampling" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%useLatestDescriptors\n", "%use lets-plot" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "fun genWord(length:Int):String {\n", " val letters = ('a'..'z')\n", " return List(length) { letters.random() }.joinToString(\"\")\n", "} \n", "\n", "fun genWordSet(n:Int):Set {\n", " val words = HashSet()\n", " while(words.size < n) {\n", " words.add(genWord(5))\n", " }\n", " return words\n", "}\n", "\n", "fun data(n:Int, words:Set):Map {\n", " return mapOf(\n", " \"word\" to List(n) { words.random() },\n", " \"g\" to List(n) { listOf('a','b','c').random() }\n", " )\n", "}" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "// Number of unique words exceeds threshold (50) of default 'pick' sampling on bar chart.\n", "val words = genWordSet(500)\n", "val dat = data(1000, words)\n", "val p = ggplot(dat) { x = \"word\" }" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Disable sampling to see the overplotting.\n", "p + geomBar(sampling = samplingNone, size = 0)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Draw plot with default sampling.\n", "p + geomBar(size = 0)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// 'pick' sampling preserves groups on bar chart.\n", "p + geomBar(size = 0) { fill = \"g\" }" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Orgering words by `count` might be a good idea.\n", "p + geomBar(size = 0) {\n", " fill = \"g\"\n", " x = asDiscrete(\"word\", orderBy = \"..count..\")\n", "}" ] } ], "metadata": { "kernelspec": { "display_name": "Kotlin", "language": "kotlin", "name": "kotlin" }, "language_info": { "codemirror_mode": "text/x-kotlin", "file_extension": ".kt", "mimetype": "text/x-kotlin", "name": "kotlin", "nbconvert_exporter": "", "pygments_lexer": "kotlin", "version": "1.8.0-dev-3517" } }, "nbformat": 4, "nbformat_minor": 4 }