{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This page is available as an executable or viewable Jupyter Notebook:\n", "

\n", " \n", " \n", "\n", " \n", " \n", "\n", "
\n", "
" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%use lets-plot\n", "import java.util.Random" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "val rand = java.util.Random(123)\n", "val n = 200\n", "val data = mapOf(\n", " \"cond\" to List(n) { \"A\" } + List(n) { \"B\" },\n", " \"rating\" to List(n) { rand.nextGaussian() } + List(n) { rand.nextGaussian() * 1.5 + 1.5 },\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Basic histogram of \"rating\"\n", "val p = lets_plot(data) { x = \"rating\" } + ggsize(500, 250)\n", "p + geom_histogram(binWidth=0.5)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Histogram overlaid with kernel density curve\n", "// - histogram with density instead of count on y-axis\n", "// - overlay with transparent density plot\n", "\n", "p + geom_histogram(binWidth=0.5, color=\"black\", fill=\"white\") { y = \"..density..\" } +\n", " geom_density(alpha=0.2, fill=0xFF6666)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p + geom_histogram(binWidth=.5, color=\"black\", fill=\"white\") +\n", " geom_vline(xintercept=(data[\"rating\"] as List).average(), color=\"red\", linetype=\"dashed\", size=1.0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Histogram and density plots with multiple groups" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val p1 = lets_plot(data) {x = \"rating\"; fill=\"cond\"} + ggsize(500, 250)\n", "\n", "// Default histogram (stacked)\n", "p1 + geom_histogram(binWidth=0.5, alpha=.5)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Overlaid histograms\n", "p1 + geom_histogram(binWidth=0.5, alpha=0.5, position=Pos.identity)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Interleaved histograms\n", "p1 + geom_histogram(binWidth=0.5, alpha=.5, position=Pos.dodge)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Density plot\n", "val p2 = ggplot(data) {x=\"rating\"; color=\"cond\"} + ggsize(500, 250)\n", "p2 + geom_density()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Density plot with semi-transparent fill\n", "p2 + geom_density(alpha=.3) {fill=\"cond\"} " ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{cond=[A, B], rating=[-0.011843241476365302, 1.5547269440141214]}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Find the mean of each group\n", "val means = (data[\"cond\"] as List zip data[\"rating\"] as List)\n", " .groupBy(keySelector = { it.first }, valueTransform = { it.second })\n", " .mapValues { it.value.average() }\n", "val cdat = mapOf(\n", " \"cond\" to means.keys,\n", " \"rating\" to means.values\n", ")\n", "cdat" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Overlaid histograms with means\n", "p2 + geom_histogram(alpha=.3, position=Pos.identity, size=0.0, bins=10) {fill=\"cond\"} +\n", " geom_vline(data=cdat, linetype=\"dashed\", size=1.0) {xintercept=\"rating\"; color=\"cond\"}\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Use frqpoly instead of histogram\n", "p2 + geom_freqpoly(bins=10) {color=\"cond\"} +\n", " geom_vline(data=cdat, linetype=\"dashed\", size=1.0) {xintercept=\"rating\"; color=\"cond\"}\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Density plots with means\n", "p2 + geom_density() +\n", " geom_vline(data=cdat, linetype=\"dashed\", size=1.0) {xintercept=\"rating\"; color=\"cond\"}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using facets" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(data) {x=\"rating\"} + \n", " geom_histogram(binWidth=.5, color=\"black\", fill=\"white\") +\n", " facet_grid(\"cond\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Box plots" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// A basic box plot\n", "val p3 = ggplot(data) {x=\"cond\"; y=\"rating\"} + ggsize(300, 200)\n", "p3 + geom_boxplot()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// A basic box with the conditions colored\n", "p3 + geom_boxplot {fill=\"cond\"}" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// Style outliers\n", "p3 + geom_boxplot(outlierColor=\"red\", outlierShape=8, outlierSize=5)" ] } ], "metadata": { "kernelspec": { "display_name": "Kotlin", "language": "kotlin", "name": "kotlin" }, "language_info": { "codemirror_mode": "text/x-kotlin", "file_extension": ".kt", "mimetype": "text/x-kotlin", "name": "kotlin", "pygments_lexer": "kotlin", "version": "1.4.20-dev-1121" }, "pycharm": { "stem_cell": { "cell_type": "raw", "source": [], "metadata": { "collapsed": false } } } }, "nbformat": 4, "nbformat_minor": 4 }