{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Simpson's paradox" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**[Sex Bias in Graduate Admissions: Data from Berkeley](https://pdfs.semanticscholar.org/b704/3d57d399bd28b2d3e84fb9d342a307472458.pdf)**\n", "\n", "*[https://pdfs.semanticscholar.org/b704/3d57d399bd28b2d3e84fb9d342a307472458.pdf](https://pdfs.semanticscholar.org/b704/3d57d399bd28b2d3e84fb9d342a307472458.pdf)*" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example from article" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Actual data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4099349682676487" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "admission_rate = (3738 + 1494) / 12763\n", "admission_rate" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8442" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_men = 3738 + 4704\n", "total_men" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3460.6710021154904" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "expect_men_admitted = total_men * admission_rate\n", "expect_men_admitted" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4321" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_women = 1494 + 2827\n", "total_women" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1771.32899788451" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "expect_women_admitted = total_women * admission_rate\n", "expect_women_admitted" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data in the example" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4166666666666667" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "admission_rate = (250 + 250) / (250 + 250 + 300 + 400)\n", "admission_rate" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "550" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_men = 250 + 300\n", "total_men" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "650" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_women = 250 + 400\n", "total_women" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "229.16666666666669" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "expect_men_admitted = total_men * admission_rate\n", "expect_men_admitted" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "270.83333333333337" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "expect_women_admitted = total_women * admission_rate\n", "expect_women_admitted" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Regression example" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "men = pd.DataFrame({\"height\": np.random.normal(1.8, 0.05, 100), \"weight\": np.random.normal(85, 1, 100)})" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "men['bmi'] = men['weight'] / men['height']**2" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | height | \n", "weight | \n", "bmi | \n", "
---|---|---|---|
0 | \n", "1.846326 | \n", "84.350208 | \n", "24.743971 | \n", "
1 | \n", "1.705580 | \n", "83.648994 | \n", "28.755193 | \n", "
2 | \n", "1.756489 | \n", "84.277095 | \n", "27.316110 | \n", "
3 | \n", "1.848740 | \n", "84.762614 | \n", "24.800061 | \n", "
4 | \n", "1.828322 | \n", "85.054365 | \n", "25.444335 | \n", "
5 | \n", "1.792602 | \n", "85.289775 | \n", "26.541740 | \n", "
6 | \n", "1.790928 | \n", "86.048167 | \n", "26.827812 | \n", "
7 | \n", "1.809917 | \n", "83.658219 | \n", "25.538256 | \n", "
8 | \n", "1.804939 | \n", "84.539923 | \n", "25.949953 | \n", "
9 | \n", "1.755725 | \n", "86.369401 | \n", "28.018621 | \n", "
10 | \n", "1.817654 | \n", "85.530689 | \n", "25.888063 | \n", "
11 | \n", "1.798073 | \n", "84.488833 | \n", "26.132729 | \n", "
12 | \n", "1.789017 | \n", "85.597579 | \n", "26.744371 | \n", "
13 | \n", "1.736679 | \n", "85.642032 | \n", "28.395390 | \n", "
14 | \n", "1.717038 | \n", "84.934525 | \n", "28.808750 | \n", "
15 | \n", "1.754468 | \n", "85.220435 | \n", "27.685526 | \n", "
16 | \n", "1.745762 | \n", "86.236397 | \n", "28.295705 | \n", "
17 | \n", "1.738966 | \n", "83.296604 | \n", "27.545156 | \n", "
18 | \n", "1.744043 | \n", "84.484906 | \n", "27.775682 | \n", "
19 | \n", "1.814423 | \n", "87.287624 | \n", "26.514010 | \n", "
20 | \n", "1.696548 | \n", "85.193880 | \n", "29.598924 | \n", "
21 | \n", "1.834508 | \n", "86.174038 | \n", "25.605727 | \n", "
22 | \n", "1.847978 | \n", "85.316498 | \n", "24.982692 | \n", "
23 | \n", "1.838084 | \n", "84.265763 | \n", "24.941385 | \n", "
24 | \n", "1.791247 | \n", "86.197007 | \n", "26.864651 | \n", "
25 | \n", "1.738719 | \n", "85.308274 | \n", "28.218388 | \n", "
26 | \n", "1.810138 | \n", "84.111496 | \n", "25.670353 | \n", "
27 | \n", "1.828684 | \n", "84.670985 | \n", "25.319619 | \n", "
28 | \n", "1.822241 | \n", "85.705984 | \n", "25.810675 | \n", "
29 | \n", "1.727342 | \n", "87.253849 | \n", "29.243388 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
70 | \n", "1.812735 | \n", "85.353152 | \n", "25.974731 | \n", "
71 | \n", "1.820731 | \n", "85.452422 | \n", "25.777023 | \n", "
72 | \n", "1.850460 | \n", "84.517401 | \n", "24.682373 | \n", "
73 | \n", "1.758177 | \n", "84.955599 | \n", "27.483171 | \n", "
74 | \n", "1.720298 | \n", "83.389695 | \n", "28.177659 | \n", "
75 | \n", "1.818238 | \n", "84.249254 | \n", "25.483829 | \n", "
76 | \n", "1.801602 | \n", "85.523859 | \n", "26.349323 | \n", "
77 | \n", "1.738942 | \n", "85.662463 | \n", "28.328297 | \n", "
78 | \n", "1.828085 | \n", "84.353179 | \n", "25.241129 | \n", "
79 | \n", "1.853273 | \n", "84.966655 | \n", "24.738281 | \n", "
80 | \n", "1.779977 | \n", "84.804354 | \n", "26.766354 | \n", "
81 | \n", "1.851053 | \n", "84.969054 | \n", "24.798354 | \n", "
82 | \n", "1.807441 | \n", "84.043159 | \n", "25.726102 | \n", "
83 | \n", "1.856138 | \n", "85.852565 | \n", "24.919121 | \n", "
84 | \n", "1.837212 | \n", "85.128751 | \n", "25.220738 | \n", "
85 | \n", "1.794949 | \n", "86.020067 | \n", "26.699028 | \n", "
86 | \n", "1.771492 | \n", "84.545193 | \n", "26.940797 | \n", "
87 | \n", "1.882290 | \n", "86.189506 | \n", "24.326592 | \n", "
88 | \n", "1.817829 | \n", "85.770484 | \n", "25.955643 | \n", "
89 | \n", "1.799267 | \n", "85.202430 | \n", "26.318481 | \n", "
90 | \n", "1.846834 | \n", "84.033850 | \n", "24.637605 | \n", "
91 | \n", "1.773097 | \n", "85.051595 | \n", "27.053130 | \n", "
92 | \n", "1.732057 | \n", "84.835168 | \n", "28.278197 | \n", "
93 | \n", "1.800811 | \n", "84.196582 | \n", "25.963185 | \n", "
94 | \n", "1.850443 | \n", "85.347839 | \n", "24.925330 | \n", "
95 | \n", "1.721525 | \n", "83.980465 | \n", "28.336855 | \n", "
96 | \n", "1.798503 | \n", "83.329209 | \n", "25.761735 | \n", "
97 | \n", "1.835831 | \n", "86.217741 | \n", "25.581795 | \n", "
98 | \n", "1.790648 | \n", "85.394169 | \n", "26.632243 | \n", "
99 | \n", "1.855777 | \n", "84.535031 | \n", "24.546248 | \n", "
100 rows × 3 columns
\n", "\n", " | height | \n", "weight | \n", "bmi | \n", "
---|---|---|---|
0 | \n", "1.635764 | \n", "60.580269 | \n", "22.640706 | \n", "
1 | \n", "1.561134 | \n", "59.842627 | \n", "24.554452 | \n", "
2 | \n", "1.600291 | \n", "58.834190 | \n", "22.973758 | \n", "
3 | \n", "1.541475 | \n", "59.004968 | \n", "24.832234 | \n", "
4 | \n", "1.617897 | \n", "60.302164 | \n", "23.037285 | \n", "
5 | \n", "1.614042 | \n", "61.837639 | \n", "23.736871 | \n", "
6 | \n", "1.593266 | \n", "59.645419 | \n", "23.496357 | \n", "
7 | \n", "1.590521 | \n", "60.987380 | \n", "24.107989 | \n", "
8 | \n", "1.619571 | \n", "58.195426 | \n", "22.186507 | \n", "
9 | \n", "1.634805 | \n", "59.926340 | \n", "22.422582 | \n", "
10 | \n", "1.526086 | \n", "60.773087 | \n", "26.094743 | \n", "
11 | \n", "1.636303 | \n", "60.233596 | \n", "22.496313 | \n", "
12 | \n", "1.585557 | \n", "59.798904 | \n", "23.786453 | \n", "
13 | \n", "1.604561 | \n", "60.725505 | \n", "23.586247 | \n", "
14 | \n", "1.618125 | \n", "60.008555 | \n", "22.918658 | \n", "
15 | \n", "1.609263 | \n", "61.205441 | \n", "23.633928 | \n", "
16 | \n", "1.614334 | \n", "60.998740 | \n", "23.406365 | \n", "
17 | \n", "1.692908 | \n", "59.084046 | \n", "20.615958 | \n", "
18 | \n", "1.545380 | \n", "58.278846 | \n", "24.402824 | \n", "
19 | \n", "1.572084 | \n", "59.759869 | \n", "24.180088 | \n", "
20 | \n", "1.543847 | \n", "59.237603 | \n", "24.853593 | \n", "
21 | \n", "1.574731 | \n", "60.450504 | \n", "24.377392 | \n", "
22 | \n", "1.581154 | \n", "58.665411 | \n", "23.465723 | \n", "
23 | \n", "1.627137 | \n", "60.157900 | \n", "22.721890 | \n", "
24 | \n", "1.573138 | \n", "60.682653 | \n", "24.520576 | \n", "
25 | \n", "1.567199 | \n", "60.141314 | \n", "24.486370 | \n", "
26 | \n", "1.584278 | \n", "60.423229 | \n", "24.073614 | \n", "
27 | \n", "1.603280 | \n", "60.133212 | \n", "23.393512 | \n", "
28 | \n", "1.648226 | \n", "59.678417 | \n", "21.967655 | \n", "
29 | \n", "1.600238 | \n", "58.983817 | \n", "23.033699 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
70 | \n", "1.652888 | \n", "57.802395 | \n", "21.157243 | \n", "
71 | \n", "1.596816 | \n", "62.396018 | \n", "24.470728 | \n", "
72 | \n", "1.657528 | \n", "61.250404 | \n", "22.293958 | \n", "
73 | \n", "1.673148 | \n", "58.042489 | \n", "20.733715 | \n", "
74 | \n", "1.507153 | \n", "60.806251 | \n", "26.769101 | \n", "
75 | \n", "1.619881 | \n", "59.094099 | \n", "22.520506 | \n", "
76 | \n", "1.622517 | \n", "59.199302 | \n", "22.487330 | \n", "
77 | \n", "1.595906 | \n", "60.075127 | \n", "23.587412 | \n", "
78 | \n", "1.659761 | \n", "58.601985 | \n", "21.272637 | \n", "
79 | \n", "1.711083 | \n", "59.453638 | \n", "20.306556 | \n", "
80 | \n", "1.632763 | \n", "58.053691 | \n", "21.776266 | \n", "
81 | \n", "1.612295 | \n", "59.658539 | \n", "22.950060 | \n", "
82 | \n", "1.624213 | \n", "60.653229 | \n", "22.991522 | \n", "
83 | \n", "1.542336 | \n", "59.840166 | \n", "25.155607 | \n", "
84 | \n", "1.591212 | \n", "59.762734 | \n", "23.603385 | \n", "
85 | \n", "1.575955 | \n", "59.963829 | \n", "24.143583 | \n", "
86 | \n", "1.623107 | \n", "60.997220 | \n", "23.153438 | \n", "
87 | \n", "1.658766 | \n", "60.111609 | \n", "21.846824 | \n", "
88 | \n", "1.561854 | \n", "60.627608 | \n", "24.853624 | \n", "
89 | \n", "1.577502 | \n", "60.144334 | \n", "24.168787 | \n", "
90 | \n", "1.623968 | \n", "59.850311 | \n", "22.694030 | \n", "
91 | \n", "1.597063 | \n", "61.065658 | \n", "23.941591 | \n", "
92 | \n", "1.682761 | \n", "59.906006 | \n", "21.155602 | \n", "
93 | \n", "1.641190 | \n", "60.122483 | \n", "22.321282 | \n", "
94 | \n", "1.581411 | \n", "60.415878 | \n", "24.158047 | \n", "
95 | \n", "1.546597 | \n", "60.029364 | \n", "25.096279 | \n", "
96 | \n", "1.591066 | \n", "60.472437 | \n", "23.888081 | \n", "
97 | \n", "1.490416 | \n", "59.096371 | \n", "26.603924 | \n", "
98 | \n", "1.555316 | \n", "59.681624 | \n", "24.671940 | \n", "
99 | \n", "1.515912 | \n", "61.311259 | \n", "26.680384 | \n", "
100 rows × 3 columns
\n", "\n", " | height | \n", "weight | \n", "bmi | \n", "gender | \n", "
---|---|---|---|---|
0 | \n", "1.846326 | \n", "84.350208 | \n", "24.743971 | \n", "male | \n", "
1 | \n", "1.705580 | \n", "83.648994 | \n", "28.755193 | \n", "male | \n", "
2 | \n", "1.756489 | \n", "84.277095 | \n", "27.316110 | \n", "male | \n", "
3 | \n", "1.848740 | \n", "84.762614 | \n", "24.800061 | \n", "male | \n", "
4 | \n", "1.828322 | \n", "85.054365 | \n", "25.444335 | \n", "male | \n", "
5 | \n", "1.792602 | \n", "85.289775 | \n", "26.541740 | \n", "male | \n", "
6 | \n", "1.790928 | \n", "86.048167 | \n", "26.827812 | \n", "male | \n", "
7 | \n", "1.809917 | \n", "83.658219 | \n", "25.538256 | \n", "male | \n", "
8 | \n", "1.804939 | \n", "84.539923 | \n", "25.949953 | \n", "male | \n", "
9 | \n", "1.755725 | \n", "86.369401 | \n", "28.018621 | \n", "male | \n", "
10 | \n", "1.817654 | \n", "85.530689 | \n", "25.888063 | \n", "male | \n", "
11 | \n", "1.798073 | \n", "84.488833 | \n", "26.132729 | \n", "male | \n", "
12 | \n", "1.789017 | \n", "85.597579 | \n", "26.744371 | \n", "male | \n", "
13 | \n", "1.736679 | \n", "85.642032 | \n", "28.395390 | \n", "male | \n", "
14 | \n", "1.717038 | \n", "84.934525 | \n", "28.808750 | \n", "male | \n", "
15 | \n", "1.754468 | \n", "85.220435 | \n", "27.685526 | \n", "male | \n", "
16 | \n", "1.745762 | \n", "86.236397 | \n", "28.295705 | \n", "male | \n", "
17 | \n", "1.738966 | \n", "83.296604 | \n", "27.545156 | \n", "male | \n", "
18 | \n", "1.744043 | \n", "84.484906 | \n", "27.775682 | \n", "male | \n", "
19 | \n", "1.814423 | \n", "87.287624 | \n", "26.514010 | \n", "male | \n", "
20 | \n", "1.696548 | \n", "85.193880 | \n", "29.598924 | \n", "male | \n", "
21 | \n", "1.834508 | \n", "86.174038 | \n", "25.605727 | \n", "male | \n", "
22 | \n", "1.847978 | \n", "85.316498 | \n", "24.982692 | \n", "male | \n", "
23 | \n", "1.838084 | \n", "84.265763 | \n", "24.941385 | \n", "male | \n", "
24 | \n", "1.791247 | \n", "86.197007 | \n", "26.864651 | \n", "male | \n", "
25 | \n", "1.738719 | \n", "85.308274 | \n", "28.218388 | \n", "male | \n", "
26 | \n", "1.810138 | \n", "84.111496 | \n", "25.670353 | \n", "male | \n", "
27 | \n", "1.828684 | \n", "84.670985 | \n", "25.319619 | \n", "male | \n", "
28 | \n", "1.822241 | \n", "85.705984 | \n", "25.810675 | \n", "male | \n", "
29 | \n", "1.727342 | \n", "87.253849 | \n", "29.243388 | \n", "male | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
70 | \n", "1.652888 | \n", "57.802395 | \n", "21.157243 | \n", "female | \n", "
71 | \n", "1.596816 | \n", "62.396018 | \n", "24.470728 | \n", "female | \n", "
72 | \n", "1.657528 | \n", "61.250404 | \n", "22.293958 | \n", "female | \n", "
73 | \n", "1.673148 | \n", "58.042489 | \n", "20.733715 | \n", "female | \n", "
74 | \n", "1.507153 | \n", "60.806251 | \n", "26.769101 | \n", "female | \n", "
75 | \n", "1.619881 | \n", "59.094099 | \n", "22.520506 | \n", "female | \n", "
76 | \n", "1.622517 | \n", "59.199302 | \n", "22.487330 | \n", "female | \n", "
77 | \n", "1.595906 | \n", "60.075127 | \n", "23.587412 | \n", "female | \n", "
78 | \n", "1.659761 | \n", "58.601985 | \n", "21.272637 | \n", "female | \n", "
79 | \n", "1.711083 | \n", "59.453638 | \n", "20.306556 | \n", "female | \n", "
80 | \n", "1.632763 | \n", "58.053691 | \n", "21.776266 | \n", "female | \n", "
81 | \n", "1.612295 | \n", "59.658539 | \n", "22.950060 | \n", "female | \n", "
82 | \n", "1.624213 | \n", "60.653229 | \n", "22.991522 | \n", "female | \n", "
83 | \n", "1.542336 | \n", "59.840166 | \n", "25.155607 | \n", "female | \n", "
84 | \n", "1.591212 | \n", "59.762734 | \n", "23.603385 | \n", "female | \n", "
85 | \n", "1.575955 | \n", "59.963829 | \n", "24.143583 | \n", "female | \n", "
86 | \n", "1.623107 | \n", "60.997220 | \n", "23.153438 | \n", "female | \n", "
87 | \n", "1.658766 | \n", "60.111609 | \n", "21.846824 | \n", "female | \n", "
88 | \n", "1.561854 | \n", "60.627608 | \n", "24.853624 | \n", "female | \n", "
89 | \n", "1.577502 | \n", "60.144334 | \n", "24.168787 | \n", "female | \n", "
90 | \n", "1.623968 | \n", "59.850311 | \n", "22.694030 | \n", "female | \n", "
91 | \n", "1.597063 | \n", "61.065658 | \n", "23.941591 | \n", "female | \n", "
92 | \n", "1.682761 | \n", "59.906006 | \n", "21.155602 | \n", "female | \n", "
93 | \n", "1.641190 | \n", "60.122483 | \n", "22.321282 | \n", "female | \n", "
94 | \n", "1.581411 | \n", "60.415878 | \n", "24.158047 | \n", "female | \n", "
95 | \n", "1.546597 | \n", "60.029364 | \n", "25.096279 | \n", "female | \n", "
96 | \n", "1.591066 | \n", "60.472437 | \n", "23.888081 | \n", "female | \n", "
97 | \n", "1.490416 | \n", "59.096371 | \n", "26.603924 | \n", "female | \n", "
98 | \n", "1.555316 | \n", "59.681624 | \n", "24.671940 | \n", "female | \n", "
99 | \n", "1.515912 | \n", "61.311259 | \n", "26.680384 | \n", "female | \n", "
200 rows × 4 columns
\n", "