{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5-final" }, "orig_nbformat": 2, "kernelspec": { "name": "Python 3.8.5 64-bit ('bigquery': conda)", "display_name": "Python 3.8.5 64-bit ('bigquery': conda)", "metadata": { "interpreter": { "hash": "8e6f8fd53d913fe50345f9e659ed342277121f637d2311273da0eef260503de3" } } } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "source": [ "# Computational checking of the girl/boy probability problem" ], "cell_type": "markdown", "metadata": {} }, { "source": [ "Here I replay the classical [two child problem](https://en.wikipedia.org/wiki/Boy_or_Girl_paradox) and code it up in Python.\n", "\n", "In a nutshell, the solution changes depending on if and how we differentiate between the children (for example, whether we talk about 'older' or 'younger' child or just refering to them as 'either').\n", "\n", "I also chart the solutions while I run the simulation to see how the probabilites tend to converge to their values." ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import enum, random" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "class Kid(enum.Enum):\n", " Boy = 0\n", " Girl = 1" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def random_kid() -> Kid:\n", " return random.choice([Kid.Boy, Kid.Girl])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "random.seed(42)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "tags": [] }, "outputs": [], "source": [ "both_girls = 0\n", "older_girl = 0\n", "either_girl = 0\n", "\n", "results = []\n", "\n", "for _ in range(1000):\n", " younger = random_kid()\n", " older = random_kid()\n", "\n", " if older == Kid.Girl:\n", " older_girl += 1\n", " \n", " if older == Kid.Girl and younger == Kid.Girl:\n", " both_girls += 1\n", " \n", " if older == Kid.Girl or younger == Kid.Girl:\n", " either_girl += 1\n", "\n", " try:\n", " p_both_older = both_girls / older_girl\n", " except ZeroDivisionError:\n", " p_both_older = 0\n", " \n", " try: \n", " p_both_either = both_girls / either_girl\n", " except ZeroDivisionError:\n", " p_both_either = 0\n", "\n", " results.append([younger.name, older.name, both_girls, older_girl, either_girl, p_both_either, p_both_older])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "tags": [] }, "outputs": [], "source": [ "import altair as alt\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df_results = pd.DataFrame(results, columns=['younger', 'older', 'both girls', 'older girl', 'either girl', 'P(Both|Either)', 'P(Both|Older)']).reset_index()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": " index younger older both girls older girl either girl P(Both|Either) \\\n0 0 Boy Boy 0 0 0 0.000000 \n1 1 Girl Boy 0 0 1 0.000000 \n2 2 Boy Boy 0 0 1 0.000000 \n3 3 Boy Boy 0 0 1 0.000000 \n4 4 Girl Boy 0 0 2 0.000000 \n.. ... ... ... ... ... ... ... \n995 995 Girl Boy 263 513 763 0.344692 \n996 996 Girl Girl 264 514 764 0.345550 \n997 997 Girl Girl 265 515 765 0.346405 \n998 998 Girl Boy 265 515 766 0.345953 \n999 999 Girl Girl 266 516 767 0.346806 \n\n P(Both|Older) \n0 0.000000 \n1 0.000000 \n2 0.000000 \n3 0.000000 \n4 0.000000 \n.. ... \n995 0.512671 \n996 0.513619 \n997 0.514563 \n998 0.514563 \n999 0.515504 \n\n[1000 rows x 8 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
indexyoungerolderboth girlsolder girleither girlP(Both|Either)P(Both|Older)
00BoyBoy0000.0000000.000000
11GirlBoy0010.0000000.000000
22BoyBoy0010.0000000.000000
33BoyBoy0010.0000000.000000
44GirlBoy0020.0000000.000000
...........................
995995GirlBoy2635137630.3446920.512671
996996GirlGirl2645147640.3455500.513619
997997GirlGirl2655157650.3464050.514563
998998GirlBoy2655157660.3459530.514563
999999GirlGirl2665167670.3468060.515504
\n

1000 rows × 8 columns

\n
" }, "metadata": {}, "execution_count": 8 } ], "source": [ "df_results" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": " index variable value type\n0 0 younger Boy NaN\n1 1 younger Girl NaN\n2 2 younger Boy NaN\n3 3 younger Boy NaN\n4 4 younger Girl NaN\n... ... ... ... ...\n6995 995 P(Both|Older) 0.512671 probability\n6996 996 P(Both|Older) 0.513619 probability\n6997 997 P(Both|Older) 0.514563 probability\n6998 998 P(Both|Older) 0.514563 probability\n6999 999 P(Both|Older) 0.515504 probability\n\n[7000 rows x 4 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
indexvariablevaluetype
00youngerBoyNaN
11youngerGirlNaN
22youngerBoyNaN
33youngerBoyNaN
44youngerGirlNaN
...............
6995995P(Both|Older)0.512671probability
6996996P(Both|Older)0.513619probability
6997997P(Both|Older)0.514563probability
6998998P(Both|Older)0.514563probability
6999999P(Both|Older)0.515504probability
\n

7000 rows × 4 columns

\n
" }, "metadata": {}, "execution_count": 9 } ], "source": [ "to_plot = df_results.melt(id_vars='index')\n", "to_plot.loc[to_plot['variable'].isin(['both girls', 'older girl', 'either girl']), 'type'] = 'count'\n", "to_plot.loc[to_plot['variable'].isin(['P(Both|Either)', 'P(Both|Older)']), 'type'] = 'probability'\n", "to_plot" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "\n
\n", "text/plain": "alt.LayerChart(...)" }, "metadata": {}, "execution_count": 10 } ], "source": [ "chart = alt.Chart(to_plot).encode(alt.X('index:Q'))\n", "\n", "label = alt.selection_single(encodings=['x'], on='mouseover', nearest=True, empty='none')\n", "\n", "count_chart = alt.Chart(data=to_plot[to_plot['type'] == 'count']).mark_line().encode(\n", " alt.X('index:Q'), alt.Y('value:Q'), color=alt.Color('variable:N'), \n", ")\n", "\n", "probablity_chart = alt.Chart(to_plot[to_plot['type'] == 'probability']).mark_line().encode(\n", " alt.X('index:Q'), alt.Y('value:Q'), color=alt.Color('variable:N')\n", ")\n", "\n", "values_chart = alt.layer(\n", " probablity_chart.add_selection(label),\n", " probablity_chart.mark_rule(color='gray').encode(alt.X('index:Q')).transform_filter(label),\n", " count_chart,\n", ").resolve_scale(y='independent').properties(width=600)\n", "\n", "values_chart" ] } ] }