{ "cells": [ { "cell_type": "markdown", "id": "1016084f-3daa-4692-8121-994d09387b23", "metadata": {}, "source": [ "# Regression and Other Stories: Heights and weights\n", "\n", "Height and weight distributions of women and men illustrating central limit theorem and normal distribution. See Chapter 3 in Regression and Other Stories.\n", "\n", "----" ] }, { "cell_type": "markdown", "id": "6a595253-68cf-48d1-a147-ad192f4b1dbe", "metadata": {}, "source": [ "### Load packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "21ce06f2-9c6e-48e6-a474-f665b8aff16f", "metadata": {}, "outputs": [], "source": [ "using StatsPlots, Distributions, KernelDensity" ] }, { "cell_type": "markdown", "id": "c6f7671d-b190-4590-9624-2fdc01031b08", "metadata": {}, "source": [ "### Summary data of height and weight distributions of women and men" ] }, { "cell_type": "code", "execution_count": 2, "id": "bbada44e-ba6c-4dee-8846-7b088f8aecbb", "metadata": {}, "outputs": [], "source": [ "height_counts_women = [80,107,296,695,1612,2680,4645,8201,9948,11733,10270,9942,6181,3990,2131,1154,245,257,0,0,0,0]*10339/74167\n", "weight_counts_women = [362,1677,4572,9363,11420,12328,9435,7023,5047,3621,2753,2081,1232,887,2366]*10339/74167\n", "height_counts_men = [0,0,0,0,0,0,0,542,668,1221,2175,4213,5535,7980,9566,9578,8867,6716,5019,2745,1464,1263]*9983/67552;" ] }, { "cell_type": "markdown", "id": "bdc8cef5-a5e6-4ab7-a435-3e7a60f7bf13", "metadata": {}, "source": [ "### Height distribution for all adults" ] }, { "cell_type": "code", "execution_count": 3, "id": "4cf43c1f-2182-4814-ad98-de16a44aced1", "metadata": {}, "outputs": [], "source": [ "height_counts = height_counts_men + height_counts_women;" ] }, { "cell_type": "markdown", "id": "1b640f2a-e848-4983-9684-5872b1252a4e", "metadata": {}, "source": [ "### Tick labels for heights in inches\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "a893c228-0161-404c-9d43-05e25c508ab1", "metadata": {}, "outputs": [], "source": [ "height_hist_names = [\"\",\"55\",\"\",\"\",\"\",\"\",\"60\",\"\",\"\",\"\",\"\",\"65\",\"\",\"\",\"\",\"\",\"70\",\"\",\"\",\"\",\"\",\"75\"];" ] }, { "cell_type": "markdown", "id": "76009adc-ad92-427e-87bb-5f42921e20d7", "metadata": {}, "source": [ "### Bar plots" ] }, { "cell_type": "code", "execution_count": 5, "id": "cac86c6e-089f-4917-974a-19bab0d3c23b", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bar(\n", " height_counts_women,\n", " legend=false,\n", " color=:grey80,\n", " title=\"heights of women\\n(histogram)\",\n", " xlab=\"height\",\n", " ylab=\"Count\",\n", " xticks=(1:22,height_hist_names),\n", " xaxis=false,\n", " grid=false\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "id": "7d98df31-cc7d-4f51-9db8-99977d2b3cae", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bar(\n", " height_counts,\n", " legend=false,\n", " color=:grey80,\n", " title=\"heights of all adults\\n(histogram)\",\n", " xlab=\"height\",\n", " ylab=\"Count\",\n", " xticks=(1:22,height_hist_names),\n", " xaxis=false, grid=false\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "id": "557bf999-ba2e-472a-a354-dc84f77b4788", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bar(\n", " weight_counts_women,\n", " legend=false,\n", " color=:grey80,\n", " title=\"weights of women\\n(histogram)\",\n", " xlab=\"weight\",\n", " ylab=\"Count\",\n", " xticks=false,\n", " xaxis=false, grid=false\n", ")" ] }, { "cell_type": "markdown", "id": "51ac41fd-f393-4b95-830e-fbeb86f1ae26", "metadata": {}, "source": [ "### Normal distribution for heights of women" ] }, { "cell_type": "code", "execution_count": 8, "id": "e44e4977-9b5d-45de-b5ca-9c350705a438", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot(\n", " 52:0.1:81,\n", " Normal(63,2.7),\n", " legend=false, grid=false, yaxis=false, yticks=false,\n", " xlabel=\"height (inches)\",\n", " title=\"heights of women\\n(normal distribution)\",\n", " ylim=(0,Inf),\n", " color=:black\n", ")" ] }, { "cell_type": "markdown", "id": "f5e132c7-cb7f-4f86-999c-de9fa77b39c3", "metadata": {}, "source": [ "### Normal distribution for heights of men" ] }, { "cell_type": "code", "execution_count": 9, "id": "b1a97058-3ddf-4d42-b796-0ccabd7a2876", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot(\n", " 52:0.1:81,\n", " Normal(69.1,2.9),\n", " legend=false, grid=false, yaxis=false, yticks=false,\n", " xlabel=\"height (inches)\",\n", " title=\"heights of men\\n(normal distribution)\",\n", " ylim=(0,Inf),\n", " color=:black\n", ")" ] }, { "cell_type": "markdown", "id": "2df56d86-9ccb-490d-9a85-9ef914ec698e", "metadata": {}, "source": [ "### Mixture of normals distribution for heights of all adults" ] }, { "cell_type": "code", "execution_count": 10, "id": "14a227a9-e354-42d7-928f-1c91fa6c582c", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = rand(\n", " MixtureModel(\n", " [Normal(63.7,2.7), Normal(69.1,2.9)],\n", " [0.52,0.48]\n", " ),\n", " 1_000_000\n", ")\n", "\n", "plot(\n", " kde(d, bandwidth=0.4),\n", " legend=false, grid=false, yaxis=false, yticks=false,\n", " xlabel=\"height (inches)\",\n", " title=\"heights of all adults\\n(not a normal distribution)\",\n", " ylim=(0,Inf),\n", " color=:black\n", ")" ] }, { "cell_type": "markdown", "id": "c1396a3f-120f-4e0a-8a40-c314c4dfe154", "metadata": {}, "source": [ "### Normal distribution for log weights of men" ] }, { "cell_type": "code", "execution_count": 11, "id": "821585d2-012f-4493-9451-42994dbec609", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot(\n", " 4:0.01:6,\n", " Normal(5.13,.17),\n", " legend=false, grid=false, yaxis=false, yticks=false,\n", " xlabel=\"logarithm of weight in pounds\",\n", " title=\"log weights of men\\n(normal distribution)\",\n", " ylim=(0,Inf),\n", " color=:black\n", ")" ] }, { "cell_type": "markdown", "id": "e0ec1436-6ab6-453f-a027-d794b5016d68", "metadata": {}, "source": [ "### Log-normal distribution for weights of men" ] }, { "cell_type": "code", "execution_count": 12, "id": "8abdccce-5e8d-47e6-b326-21f2d347c5f2", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot(\n", " 50:0.5:350,\n", " LogNormal(5.13,.17),\n", " legend=false, grid=false, yaxis=false, yticks=false,\n", " xlabel=\"weight in pounds\",\n", " title=\"weights of men\\n(lognormal distribution)\",\n", " ylim=(0,Inf),\n", " color=:black\n", ")" ] }, { "cell_type": "code", "execution_count": 13, "id": "3e299756-ac7a-48e4-904c-72fd4159006b", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "kwargs=Dict(\n", " :color=>:black,\n", " :grid=>false,\n", " :yaxis=>false,\n", " :yticks=>false,\n", " :legend=>false,\n", " :ylim=>(0,Inf),\n", " :xlims=>(-4,4),\n", " :title=>\"normal distribution\",\n", " :xticks=>-3:1:3\n", ")\n", "plot(-4:0.01:4, Normal(),fill=(0,:white);kwargs...)\n", "plot!(-3:0.01:3,Normal(),fill=(0,:gray30);kwargs...)\n", "plot!(-2:0.01:2,Normal(),color=:black,fill=(0,:gray50);kwargs...)\n", "plot!(-1:0.01:1,Normal(),color=:black,fill=(0,:gray70);kwargs...)\n", "annotate!(0, 0.35*pdf(Normal(),0), \"68%\")\n", "annotate!(-1.5, 0.3*pdf(Normal(),1.5), \"13.5%\")\n", "annotate!(1.5, 0.3*pdf(Normal(),1.5), \"13.5%\")" ] } ], "metadata": { "kernelspec": { "display_name": "Julia 1.6.0", "language": "julia", "name": "julia-1.6" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.6.0" } }, "nbformat": 4, "nbformat_minor": 5 }