<em><sub>This page is available as an executable or viewable <strong>Jupyter Notebook</strong>:</sub></em>
<br/><br/>
<a href="https://mybinder.org/v2/gh/JetBrains/lets-plot-kotlin/v0.0.22demos2?filepath=docs%2Fexamples%2Fjupyter-notebooks%2Fdistributions.ipynb"
   target="_parent"> 
   <img align="left" 
        src="https://mybinder.org/badge_logo.svg">
</a>
<a href="https://nbviewer.jupyter.org/github/JetBrains/lets-plot-kotlin/blob/master/docs/examples/jupyter-notebooks/distributions.ipynb" 
   target="_parent"> 
   <img align="right" 
        src="https://raw.githubusercontent.com/jupyter/design/master/logos/Badges/nbviewer_badge.png" 
        width="109" height="20">
</a>
<br/>
<br/>

In [1]:
%use lets-plot
import java.util.Random

In [2]:
val rand = java.util.Random(123)
val n = 200
val data = mapOf<String, Any>(
    "cond" to List(n) { "A" } + List(n) { "B" },
    "rating" to List(n) { rand.nextGaussian() } + List(n) { rand.nextGaussian() * 1.5 + 1.5 },
)

In [3]:
// Basic histogram of "rating"
val p = lets_plot(data) { x = "rating" } + ggsize(500, 250)
p + geom_histogram(binWidth=0.5)

In [4]:
// Histogram overlaid with kernel density curve
//  - histogram with density instead of count on y-axis
//  - overlay with transparent density plot

p + geom_histogram(binWidth=0.5, color="black", fill="white") { y = "..density.." } +
    geom_density(alpha=0.2, fill=0xFF6666)

In [5]:
p + geom_histogram(binWidth=.5, color="black", fill="white") +
    geom_vline(xintercept=(data["rating"] as List<Double>).average(), color="red", linetype="dashed", size=1.0)

### Histogram and density plots with multiple groups

In [6]:
val p1 = lets_plot(data) {x = "rating"; fill="cond"} + ggsize(500, 250)

// Default histogram (stacked)
p1 + geom_histogram(binWidth=0.5, alpha=.5)

In [7]:
// Overlaid histograms
p1 + geom_histogram(binWidth=0.5, alpha=0.5, position=Pos.identity)

In [8]:
// Interleaved histograms
p1 + geom_histogram(binWidth=0.5, alpha=.5, position=Pos.dodge)

In [9]:
// Density plot
val p2 = ggplot(data) {x="rating"; color="cond"} + ggsize(500, 250)
p2 + geom_density()

In [10]:
// Density plot with semi-transparent fill
p2 + geom_density(alpha=.3) {fill="cond"} 

In [11]:
// Find the mean of each group
val means = (data["cond"] as List<String> zip data["rating"] as List<Double>)
        .groupBy(keySelector = { it.first }, valueTransform = { it.second })
        .mapValues { it.value.average() }
val cdat = mapOf(
    "cond" to means.keys,
    "rating" to means.values
)
cdat

{cond=[A, B], rating=[-0.011843241476365302, 1.5547269440141214]}

In [12]:
// Overlaid histograms with means
p2 + geom_histogram(alpha=.3, position=Pos.identity, size=0.0, bins=10) {fill="cond"} +
     geom_vline(data=cdat, linetype="dashed", size=1.0) {xintercept="rating"; color="cond"}


In [13]:
// Use frqpoly instead of histogram
p2 + geom_freqpoly(bins=10) {color="cond"} +
     geom_vline(data=cdat, linetype="dashed", size=1.0) {xintercept="rating"; color="cond"}


In [14]:
// Density plots with means
p2 + geom_density() +
     geom_vline(data=cdat, linetype="dashed", size=1.0) {xintercept="rating"; color="cond"}

### Using facets

In [15]:
ggplot(data) {x="rating"} + 
    geom_histogram(binWidth=.5, color="black", fill="white") +
    facet_grid("cond")

### Box plots

In [16]:
// A basic box plot
val p3 = ggplot(data) {x="cond"; y="rating"} + ggsize(300, 200)
p3 + geom_boxplot()

In [17]:
// A basic box with the conditions colored
p3 + geom_boxplot {fill="cond"}

In [18]:
// Style outliers
p3 + geom_boxplot(outlierColor="red", outlierShape=8, outlierSize=5)