{ "cells": [ { "cell_type": "markdown", "id": "greater-request", "metadata": {}, "source": [ "# Q-Q Plots" ] }, { "cell_type": "code", "execution_count": 1, "id": "funny-secretariat", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%useLatestDescriptors\n", "%use lets-plot\n", "%use dataframe" ] }, { "cell_type": "code", "execution_count": 2, "id": "amateur-dress", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Lets-Plot Kotlin API v.4.4.2. Frontend: Notebook with dynamically loaded JS. Lets-Plot JS v.4.0.0." ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "LetsPlot.getInfo()" ] }, { "cell_type": "code", "execution_count": 3, "id": "intermediate-screening", "metadata": {}, "outputs": [ { "data": { "application/kotlindataframe+json": "{\"nrow\":3,\"ncol\":12,\"columns\":[\"untitled\",\"manufacturer\",\"model\",\"displ\",\"year\",\"cyl\",\"trans\",\"drv\",\"cty\",\"hwy\",\"fl\",\"class\"],\"kotlin_dataframe\":[{\"untitled\":1,\"manufacturer\":\"audi\",\"model\":\"a4\",\"displ\":1.8,\"year\":1999,\"cyl\":4,\"trans\":\"auto(l5)\",\"drv\":\"f\",\"cty\":18,\"hwy\":29,\"fl\":\"p\",\"class\":\"compact\"},{\"untitled\":2,\"manufacturer\":\"audi\",\"model\":\"a4\",\"displ\":1.8,\"year\":1999,\"cyl\":4,\"trans\":\"manual(m5)\",\"drv\":\"f\",\"cty\":21,\"hwy\":29,\"fl\":\"p\",\"class\":\"compact\"},{\"untitled\":3,\"manufacturer\":\"audi\",\"model\":\"a4\",\"displ\":2.0,\"year\":2008,\"cyl\":4,\"trans\":\"manual(m6)\",\"drv\":\"f\",\"cty\":20,\"hwy\":31,\"fl\":\"p\",\"class\":\"compact\"}]}", "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "\n", "

DataFrame: rowsCount = 3, columnsCount = 12

\n", " \n", " \n", " " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "var mpg = DataFrame.readCSV(\"https://raw.githubusercontent.com/JetBrains/lets-plot-kotlin/master/docs/examples/data/mpg.csv\")\n", "mpg.head(3)\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "italic-black", "metadata": {}, "outputs": [], "source": [ "val hwy = \"hwy\"\n", "val cty = \"cty\"\n", "val drv = \"drv\"\n" ] }, { "cell_type": "markdown", "id": "pending-gender", "metadata": {}, "source": [ "## Two types of Q-Q plots" ] }, { "cell_type": "markdown", "id": "varied-kidney", "metadata": {}, "source": [ "### `geomQQ()` and `geomQQLine()` functions." ] }, { "cell_type": "code", "execution_count": 5, "id": "cordless-boston", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "letsPlot(mpg.toMap()) {sample = hwy} + \n", " geomQQ(size = 5, alpha = .3) + \n", " geomQQLine(size = 1) + \n", " ggtitle(\"Distribution of highway miles per gallon\",\n", " \"Comparison of sample quantiles with normal distribution quantiles\")" ] }, { "cell_type": "markdown", "id": "underlying-knock", "metadata": {}, "source": [ "### `geomQQ()` and `geomQQ2Line()` functions." ] }, { "cell_type": "code", "execution_count": 6, "id": "freelance-audit", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "letsPlot(mpg.toMap()) {x = cty; y = hwy} + \n", " geomQQ2(size = 5, alpha = .3) + \n", " geomQQ2Line(size = 1) + \n", " ggtitle(\"City miles vs. highway miles (per gallon)\",\n", " \"Comparison of quantiles of two sample distributions\")" ] }, { "cell_type": "markdown", "id": "narrative-honey", "metadata": {}, "source": [ "## Quick Q-Q plot: the `qqPlot()` function\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "arabic-wyoming", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "qqPlot(mpg.toMap(), sample = hwy) + \n", " ggtitle(\"Distribution of highway miles per gallon\",\n", " \"Comparison of sample quantiles with normal distribution quantiles\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "supported-dairy", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "qqPlot(mpg.toMap(), x = cty, y = hwy) + \n", " ggtitle(\"City miles vs. highway miles (per gallon)\",\n", " \"Comparison of quantiles of two sample distributions\")" ] }, { "cell_type": "markdown", "id": "blessed-worcester", "metadata": {}, "source": [ "## Comparison with other 'theoretical' distributions\n", "The `distribution` parameter of the `qq_plot()` function." ] }, { "cell_type": "code", "execution_count": 9, "id": "vulnerable-flavor", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val p1 = qqPlot(mpg.toMap(), hwy, distribution = \"norm\", quantiles = .1 to .9) + \n", " ggtitle(\"Normal distribution\")\n", "val p2 = qqPlot(mpg.toMap(), hwy, distribution = \"uniform\", quantiles = .1 to .9) + \n", " ggtitle(\"Uniform distribution\")\n", "val p3 = qqPlot(mpg.toMap(), hwy, distribution = \"t\", quantiles = .1 to .9) + \n", " ggtitle(\"Student's t-distribution distribution\")\n", "val p4 = qqPlot(mpg.toMap(), hwy, distribution = \"exp\", quantiles = .1 to .9) + \n", " ggtitle(\"Exponential distribution\")\n", "\n", " \n", "gggrid(listOf(p1, p2, p3, p4), 2, 400, 250) " ] }, { "cell_type": "markdown", "id": "occupational-fever", "metadata": {}, "source": [ "## Q-Q stats with other geometries" ] }, { "cell_type": "code", "execution_count": 10, "id": "referenced-black", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "letsPlot(mpg.toMap()) {x = cty; y = hwy; color = drv} + \n", " geomLine(stat = Stat.qq2()) + \n", " geomPoint(stat = Stat.qq2(), shape = 15) +\n", " geomLine(stat = Stat.qq2Line(), color = \"#636363\", linetype = 5) +\n", " facetGrid(x = drv, scales = \"free\") +\n", " xlab(\"cty quantiles\") + ylab(\"hwy quantiles\")" ] } ], "metadata": { "kernelspec": { "display_name": "Kotlin", "language": "kotlin", "name": "kotlin" }, "language_info": { "codemirror_mode": "text/x-kotlin", "file_extension": ".kt", "mimetype": "text/x-kotlin", "name": "kotlin", "nbconvert_exporter": "", "pygments_lexer": "kotlin", "version": "1.8.20" } }, "nbformat": 4, "nbformat_minor": 5 }