# Simple Benchmark Analysis
This notebook demonstrates how you can analyze and plot benchmark results from a single benchmark run.
Several projects exist in the `examples` folder, but this notebook assumes we are working on the
JVM part of the `kotlin-multiplatform` project. But the same approach can be used for the other projects.

First, you need to run the benchmark. This can be done by running this command from the root of the project:

```shell
./gradlew :examples:kotlin-multiplatform:jvmBenchmark
```

Once it is completed, run this notebook, and it will automatically find the latest result.

In [1]:
%use serialization, dataframe, kandy

In [2]:
// Serialization classes matching the JMH-alike JSON format.
// We define these classes manually so we can keep `params` as a JsonObject, as it means we can handle them
// in a generic manner. If you benchmark have fixed params, using `"<jsonText>".deserializeThis()` is
// faster and easier.

@Serializable
public data class Benchmark(
    public val benchmark: String,
    public val mode: String,
    public val forks: Int = 1,
    public val warmupIterations: Int,
    public val warmupTime: String,
    public val measurementIterations: Int,
    public val measurementTime: String,
    public val primaryMetric: PrimaryMetric,
    public val secondaryMetrics: Map<String, PrimaryMetric>,
    public val params: JsonObject? = null
)

@Serializable
public data class PrimaryMetric(
    public val score: Double,
    public val scoreError: Double,
    public val scoreConfidence: List<Double>,
    public val scorePercentiles: Map<String, Double>,
    public val scoreUnit: String,
    public val rawData: List<List<Double>>,
)

In [3]:
import java.nio.file.Files
import java.nio.file.attribute.BasicFileAttributes
import kotlin.io.path.exists
import kotlin.io.path.forEachDirectoryEntry
import kotlin.io.path.isDirectory
import kotlin.io.path.listDirectoryEntries
import kotlin.io.path.readText

// Find latest result file, based on the their timestamp.
val runsDir = notebook.workingDir.resolve("kotlin-multiplatform/build/reports/benchmarks/main")
val lastRunDir = runsDir.listDirectoryEntries()
    .filter { it.isDirectory() }
    .sortedByDescending { dir -> Files.readAttributes(dir, BasicFileAttributes::class.java).creationTime() }
    .first()
val outputFile = lastRunDir.resolve("jvm.json")
val json = Json { ignoreUnknownKeys = true }
val benchmarkData = json.decodeFromString<List<Benchmark>>(outputFile.readText())

In [4]:
import kotlinx.serialization.json.encodeToJsonElement

// Helper class for tracking the information we need to use.
data class Benchmark(val name: String, val params: String, val score: Double, val error: Double, val unit: String)

// Split benchmark results into groups. Generally, each group consist of all tests from one test file,
// except when it is an parameterized test. In this case, each test (with all its variants) are put
// in its own group.
val benchmarkGroups = benchmarkData
    .groupBy {
        if (it.params != null) {
            it.benchmark
        } else {
            it.benchmark.substringBeforeLast(".")
        }
    }
    .mapValues { group ->
        val benchmarks = group.value.map { benchmark ->
            val paramInfo = benchmark.params?.entries.orEmpty()
                .sortedBy { it.key }
                .joinToString(",") { "${it.key}=${it.value.jsonPrimitive.content}" }
            val name = benchmark.benchmark
            Benchmark(
                name,
                paramInfo,
                benchmark.primaryMetric.score,
                benchmark.primaryMetric.scoreError,
                benchmark.primaryMetric.scoreUnit
            )
        }
        benchmarks.toDataFrame()
    }

// Un-commont this to see the benchmark data as DataFrames
// benchmarkGroups.forEach {
//     DISPLAY(it.value)
// }

In [5]:
// Prepare the data frames for plotting by:
// - Add calculated columns for errorMin / errorMax
// - Tests with parameters use the parameter values as the label
// - Tests without paramaters use the test name as the label
val plotData = benchmarkGroups.mapValues {
    it.value
        .add("errorMin") { it.getValue<Double>("score") - it.getValue<Double>("error") }
        .add("errorMax") { it.getValue<Double>("score") + it.getValue<Double>("error") }
        .insert("label") {
            // Re-format the benchmark labels to make them look "nicer"
            if (!it.getValue<String>("params").isBlank()) {
                it.getValue<String>("params").replace(",", "\n")
            } else {
                it.getValue<String>("name").substringAfterLast(".").removeSuffix("Benchmark")
            }
        }.at(0)
        .remove("name", "params")
}

In [6]:
import org.jetbrains.letsPlot.Geom
import org.jetbrains.letsPlot.core.spec.plotson.coord
import org.jetbrains.letsPlot.themes.margin

// Plot each group as a bar plot with the error displayed as error bars.
// This approach assumes that each group has tests roughly within the same "scale".
// If this is not the case, some plots might look very squished. If this happens,
// you can play around with using a LOG10 scale or modifying the limits to focus
// on the changes.
plotData.forEach { (fileName, dataframe) ->
    val plot = dataframe.plot {
        bars {
            x("label") {
                axis.name = ""
            }
            y("score")
        }
        errorBars {
            x("label")
            y("score")
            yMin("errorMin")
            yMax("errorMax")
        }
        coordinatesTransformation = CoordinatesTransformation.cartesianFlipped()
        // y.axis.limits = dataframe.min("errorMin")..dataframe.max("errorMax")
        layout {
            this.yAxisLabel = dataframe.first().getValue<String>("unit")
            style {
                global {
                    title {
                        margin(10.0, 0.0)
                    }
                    text {
                        fontFamily = FontFamily.MONO
                    }
                }
            }
            // Adjust the height of the Kandy plot based on the number of tests.
            size = 800 to ((50 * dataframe.size().nrow) + 100)
        }
    }
    DISPLAY(HTML("<h4 >$fileName</h4>"))
    DISPLAY(plot)
}