{
"cells": [
{
"cell_type": "markdown",
"id": "9e8b40d2",
"metadata": {},
"source": [
"**Notes**:\n",
"This notebook prepares the [example sleep data - sleep.csv](https://github.com/LSYS/pyforestplot/blob/main/examples/data/sleep.csv).\n",
"\n",
"The resulting output csv file ([sleep.csv](https://github.com/LSYS/pyforestplot/blob/main/examples/data/sleep.csv)) that indicates how certain individual characteristics correlates to the amount of sleep an one gets per week.\n",
"Rows are the variables correlating with sleep. Columns included the computed pearson correlation coefficient, sample size, p-value, confidence interval (95%), etc.\n",
"The `pingouin` is used to compute correlations.\n",
"\n",
"**Raw src**:\n",
"* `sleep75.csv` (/wooldridge/sleep75) from https://vincentarelbundock.github.io/Rdatasets/articles/data.html\n",
"* See https://rdrr.io/cran/wooldridge/man/sleep75.html for variable labels to the variables in `sleep75.csv`.\n",
"\n",
"\n",
"\n",
"**Requirements**: Mainly `pingouin`. See first cell of imports for requirements"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e05019fe",
"metadata": {
"ExecuteTime": {
"end_time": "2022-09-18T05:01:11.859723Z",
"start_time": "2022-09-18T05:01:04.107411Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" age | \n",
" black | \n",
" clerical | \n",
" construc | \n",
" educ | \n",
" earns74 | \n",
" gdhlth | \n",
" inlf | \n",
" smsa | \n",
" lhrwage | \n",
" ... | \n",
" spwrk75 | \n",
" totwrk | \n",
" union | \n",
" worknrm | \n",
" workscnd | \n",
" exper | \n",
" yngkid | \n",
" yrsmarr | \n",
" hrwage | \n",
" agesq | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 32 | \n",
" 0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 12 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1.955861 | \n",
" ... | \n",
" 0 | \n",
" 3438 | \n",
" 0 | \n",
" 3438 | \n",
" 0 | \n",
" 14 | \n",
" 0 | \n",
" 13 | \n",
" 7.070004 | \n",
" 1024 | \n",
"
\n",
" \n",
" 2 | \n",
" 31 | \n",
" 0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 14 | \n",
" 9500 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0.357674 | \n",
" ... | \n",
" 0 | \n",
" 5020 | \n",
" 0 | \n",
" 5020 | \n",
" 0 | \n",
" 11 | \n",
" 0 | \n",
" 0 | \n",
" 1.429999 | \n",
" 961 | \n",
"
\n",
" \n",
" 3 | \n",
" 44 | \n",
" 0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 17 | \n",
" 42500 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 3.021887 | \n",
" ... | \n",
" 1 | \n",
" 2815 | \n",
" 0 | \n",
" 2815 | \n",
" 0 | \n",
" 21 | \n",
" 0 | \n",
" 0 | \n",
" 20.529997 | \n",
" 1936 | \n",
"
\n",
" \n",
"
\n",
"
3 rows × 30 columns
\n",
"
"
],
"text/plain": [
" age black clerical construc educ earns74 gdhlth inlf smsa \\\n",
"1 32 0 0.0 0.0 12 0 0 1 0 \n",
"2 31 0 0.0 0.0 14 9500 1 1 0 \n",
"3 44 0 0.0 0.0 17 42500 1 1 1 \n",
"\n",
" lhrwage ... spwrk75 totwrk union worknrm workscnd exper yngkid \\\n",
"1 1.955861 ... 0 3438 0 3438 0 14 0 \n",
"2 0.357674 ... 0 5020 0 5020 0 11 0 \n",
"3 3.021887 ... 1 2815 0 2815 0 21 0 \n",
"\n",
" yrsmarr hrwage agesq \n",
"1 13 7.070004 1024 \n",
"2 0 1.429999 961 \n",
"3 0 20.529997 1936 \n",
"\n",
"[3 rows x 30 columns]"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pingouin as pg\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"_url = \"https://vincentarelbundock.github.io/Rdatasets/csv/wooldridge/sleep75.csv\"\n",
"drop_var = ['case', 'leis1', 'leis2', 'leis3']\n",
"df = (pd.read_csv(_url, index_col=0)\n",
" .drop(drop_var, axis=1)\n",
" )\n",
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e66b467a",
"metadata": {
"ExecuteTime": {
"end_time": "2022-09-18T05:01:11.892027Z",
"start_time": "2022-09-18T05:01:11.862006Z"
},
"code_folding": [],
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" var | \n",
" group | \n",
" label | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" age | \n",
" age | \n",
" in years | \n",
"
\n",
" \n",
" 1 | \n",
" black | \n",
" other factors | \n",
" =1 if black | \n",
"
\n",
" \n",
" 2 | \n",
" clerical | \n",
" occupation | \n",
" =1 if clerical worker | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" var group label\n",
"0 age age in years\n",
"1 black other factors =1 if black\n",
"2 clerical occupation =1 if clerical worker"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Prep variable lablels (fold cell)\n",
"# varlabels: http://fmwww.bc.edu/ec-p/data/wooldridge/sleep75.des\n",
"df_label = (pd.read_csv('data/sleep75-des.csv', encoding=\"ISO-8859-1\")\n",
" .assign(label=lambda df: df['des'].str.encode('ascii', 'ignore').str.decode('ascii'))\n",
" .drop(['des'], axis=1)\n",
" .set_index('var')\n",
" .drop(drop_var)\n",
" .reset_index()\n",
" )\n",
"\n",
"df_label.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "efdb6621",
"metadata": {
"ExecuteTime": {
"end_time": "2022-09-18T05:01:14.801972Z",
"start_time": "2022-09-18T05:01:11.897008Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" n | \n",
" r | \n",
" CI95% | \n",
" p-val | \n",
" BF10 | \n",
" power | \n",
" var | \n",
" hl | \n",
" ll | \n",
" moerror | \n",
" group | \n",
" label | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 706 | \n",
" 0.090373 | \n",
" [0.02, 0.16] | \n",
" 1.630887e-02 | \n",
" 0.839 | \n",
" 0.67 | \n",
" age | \n",
" 0.16 | \n",
" 0.02 | \n",
" 0.069627 | \n",
" age | \n",
" in years | \n",
"
\n",
" \n",
" 1 | \n",
" 706 | \n",
" -0.027057 | \n",
" [-0.1, 0.05] | \n",
" 4.728889e-01 | \n",
" 0.061 | \n",
" 0.11 | \n",
" black | \n",
" 0.05 | \n",
" -0.10 | \n",
" 0.077057 | \n",
" other factors | \n",
" =1 if black | \n",
"
\n",
" \n",
" 2 | \n",
" 706 | \n",
" 0.048081 | \n",
" [-0.03, 0.12] | \n",
" 2.019484e-01 | \n",
" 0.106 | \n",
" 0.25 | \n",
" clerical | \n",
" 0.12 | \n",
" -0.03 | \n",
" 0.071919 | \n",
" occupation | \n",
" =1 if clerical worker | \n",
"
\n",
" \n",
" 3 | \n",
" 706 | \n",
" 0.041229 | \n",
" [-0.03, 0.11] | \n",
" 2.739475e-01 | \n",
" 0.086 | \n",
" 0.19 | \n",
" construc | \n",
" 0.11 | \n",
" -0.03 | \n",
" 0.068771 | \n",
" occupation | \n",
" =1 if construction worker | \n",
"
\n",
" \n",
" 4 | \n",
" 706 | \n",
" -0.095004 | \n",
" [-0.17, -0.02] | \n",
" 1.155151e-02 | \n",
" 1.137 | \n",
" 0.72 | \n",
" educ | \n",
" -0.02 | \n",
" -0.17 | \n",
" 0.075004 | \n",
" labor factors | \n",
" years of schooling | \n",
"
\n",
" \n",
" 5 | \n",
" 706 | \n",
" -0.076890 | \n",
" [-0.15, -0.0] | \n",
" 4.110934e-02 | \n",
" 0.378 | \n",
" 0.53 | \n",
" earns74 | \n",
" -0.00 | \n",
" -0.15 | \n",
" 0.076890 | \n",
" labor factors | \n",
" total earnings, 1974 | \n",
"
\n",
" \n",
" 6 | \n",
" 706 | \n",
" -0.102825 | \n",
" [-0.18, -0.03] | \n",
" 6.246660e-03 | \n",
" 1.967 | \n",
" 0.78 | \n",
" gdhlth | \n",
" -0.03 | \n",
" -0.18 | \n",
" 0.072825 | \n",
" health factors | \n",
" =1 if in good or excel. health | \n",
"
\n",
" \n",
" 7 | \n",
" 706 | \n",
" -0.027126 | \n",
" [-0.1, 0.05] | \n",
" 4.717698e-01 | \n",
" 0.061 | \n",
" 0.11 | \n",
" inlf | \n",
" 0.05 | \n",
" -0.10 | \n",
" 0.077126 | \n",
" labor factors | \n",
" =1 if in labor force | \n",
"
\n",
" \n",
" 8 | \n",
" 706 | \n",
" -0.066997 | \n",
" [-0.14, 0.01] | \n",
" 7.524015e-02 | \n",
" 0.229 | \n",
" 0.43 | \n",
" smsa | \n",
" 0.01 | \n",
" -0.14 | \n",
" 0.076997 | \n",
" area of residence | \n",
" =1 if live in smsa | \n",
"
\n",
" \n",
" 9 | \n",
" 532 | \n",
" -0.067197 | \n",
" [-0.15, 0.02] | \n",
" 1.216222e-01 | \n",
" 0.179 | \n",
" 0.34 | \n",
" lhrwage | \n",
" 0.02 | \n",
" -0.15 | \n",
" 0.087197 | \n",
" labor factors | \n",
" log hourly wage | \n",
"
\n",
" \n",
" 10 | \n",
" 706 | \n",
" 0.036661 | \n",
" [-0.04, 0.11] | \n",
" 3.306971e-01 | \n",
" 0.076 | \n",
" 0.16 | \n",
" lothinc | \n",
" 0.11 | \n",
" -0.04 | \n",
" 0.073339 | \n",
" labor factors | \n",
" log othinc, unless othinc < 0 | \n",
"
\n",
" \n",
" 11 | \n",
" 706 | \n",
" -0.035909 | \n",
" [-0.11, 0.04] | \n",
" 3.407214e-01 | \n",
" 0.074 | \n",
" 0.16 | \n",
" male | \n",
" 0.04 | \n",
" -0.11 | \n",
" 0.075909 | \n",
" other factors | \n",
" =1 if male | \n",
"
\n",
" \n",
" 12 | \n",
" 706 | \n",
" 0.053757 | \n",
" [-0.02, 0.13] | \n",
" 1.536188e-01 | \n",
" 0.13 | \n",
" 0.30 | \n",
" marr | \n",
" 0.13 | \n",
" -0.02 | \n",
" 0.076243 | \n",
" family factors | \n",
" =1 if married | \n",
"
\n",
" \n",
" 13 | \n",
" 706 | \n",
" 0.027147 | \n",
" [-0.05, 0.1] | \n",
" 4.714176e-01 | \n",
" 0.061 | \n",
" 0.11 | \n",
" prot | \n",
" 0.10 | \n",
" -0.05 | \n",
" 0.072853 | \n",
" other factors | \n",
" =1 if Protestant | \n",
"
\n",
" \n",
" 14 | \n",
" 706 | \n",
" 0.867744 | \n",
" [0.85, 0.88] | \n",
" 6.051022e-216 | \n",
" 6.697e+211 | \n",
" 1.00 | \n",
" rlxall | \n",
" 0.88 | \n",
" 0.85 | \n",
" 0.012256 | \n",
" other sleep factors | \n",
" slpnaps + personal activs | \n",
"
\n",
" \n",
" 15 | \n",
" 706 | \n",
" 0.001782 | \n",
" [-0.07, 0.08] | \n",
" 9.623058e-01 | \n",
" 0.047 | \n",
" 0.05 | \n",
" selfe | \n",
" 0.08 | \n",
" -0.07 | \n",
" 0.078218 | \n",
" labor factors | \n",
" =1 if self employed | \n",
"
\n",
" \n",
" 16 | \n",
" 706 | \n",
" 0.893043 | \n",
" [0.88, 0.91] | \n",
" 2.339108e-246 | \n",
" 1.38e+242 | \n",
" 1.00 | \n",
" slpnaps | \n",
" 0.91 | \n",
" 0.88 | \n",
" 0.016957 | \n",
" other sleep factors | \n",
" minutes sleep, inc. naps | \n",
"
\n",
" \n",
" 17 | \n",
" 706 | \n",
" 0.078600 | \n",
" [0.0, 0.15] | \n",
" 3.679946e-02 | \n",
" 0.415 | \n",
" 0.55 | \n",
" south | \n",
" 0.15 | \n",
" 0.00 | \n",
" 0.071400 | \n",
" area of residence | \n",
" =1 if live in south | \n",
"
\n",
" \n",
" 18 | \n",
" 706 | \n",
" 0.007881 | \n",
" [-0.07, 0.08] | \n",
" 8.344125e-01 | \n",
" 0.048 | \n",
" 0.06 | \n",
" spsepay | \n",
" 0.08 | \n",
" -0.07 | \n",
" 0.072119 | \n",
" other factors | \n",
" spousal wage income | \n",
"
\n",
" \n",
" 19 | \n",
" 706 | \n",
" 0.007868 | \n",
" [-0.07, 0.08] | \n",
" 8.346888e-01 | \n",
" 0.048 | \n",
" 0.05 | \n",
" spwrk75 | \n",
" 0.08 | \n",
" -0.07 | \n",
" 0.072132 | \n",
" other factors | \n",
" =1 if spouse works | \n",
"
\n",
" \n",
" 20 | \n",
" 706 | \n",
" -0.321384 | \n",
" [-0.39, -0.25] | \n",
" 1.994095e-18 | \n",
" 1.961e+15 | \n",
" 1.00 | \n",
" totwrk | \n",
" -0.25 | \n",
" -0.39 | \n",
" 0.071384 | \n",
" labor factors | \n",
" mins worked per week | \n",
"
\n",
" \n",
" 21 | \n",
" 706 | \n",
" 0.009965 | \n",
" [-0.06, 0.08] | \n",
" 7.915440e-01 | \n",
" 0.049 | \n",
" 0.06 | \n",
" union | \n",
" 0.08 | \n",
" -0.06 | \n",
" 0.070035 | \n",
" labor factors | \n",
" =1 if belong to union | \n",
"
\n",
" \n",
" 22 | \n",
" 706 | \n",
" -0.322300 | \n",
" [-0.39, -0.25] | \n",
" 1.577335e-18 | \n",
" 2.471e+15 | \n",
" 1.00 | \n",
" worknrm | \n",
" -0.25 | \n",
" -0.39 | \n",
" 0.072300 | \n",
" labor factors | \n",
" mins work main job | \n",
"
\n",
" \n",
" 23 | \n",
" 706 | \n",
" 0.001139 | \n",
" [-0.07, 0.07] | \n",
" 9.759034e-01 | \n",
" 0.047 | \n",
" 0.05 | \n",
" workscnd | \n",
" 0.07 | \n",
" -0.07 | \n",
" 0.068861 | \n",
" labor factors | \n",
" mins work second job | \n",
"
\n",
" \n",
" 24 | \n",
" 706 | \n",
" 0.104191 | \n",
" [0.03, 0.18] | \n",
" 5.587422e-03 | \n",
" 2.175 | \n",
" 0.79 | \n",
" exper | \n",
" 0.18 | \n",
" 0.03 | \n",
" 0.075809 | \n",
" labor factors | \n",
" age - educ - 6 | \n",
"
\n",
" \n",
" 25 | \n",
" 706 | \n",
" -0.013262 | \n",
" [-0.09, 0.06] | \n",
" 7.250012e-01 | \n",
" 0.05 | \n",
" 0.06 | \n",
" yngkid | \n",
" 0.06 | \n",
" -0.09 | \n",
" 0.073262 | \n",
" family factors | \n",
" =1 if children < 3 present | \n",
"
\n",
" \n",
" 26 | \n",
" 706 | \n",
" 0.063997 | \n",
" [-0.01, 0.14] | \n",
" 8.928507e-02 | \n",
" 0.199 | \n",
" 0.40 | \n",
" yrsmarr | \n",
" 0.14 | \n",
" -0.01 | \n",
" 0.076003 | \n",
" family factors | \n",
" years married | \n",
"
\n",
" \n",
" 27 | \n",
" 532 | \n",
" -0.049450 | \n",
" [-0.13, 0.04] | \n",
" 2.548774e-01 | \n",
" 0.104 | \n",
" 0.21 | \n",
" hrwage | \n",
" 0.04 | \n",
" -0.13 | \n",
" 0.089450 | \n",
" labor factors | \n",
" hourly wage | \n",
"
\n",
" \n",
" 28 | \n",
" 706 | \n",
" 0.099722 | \n",
" [0.03, 0.17] | \n",
" 8.010946e-03 | \n",
" 1.574 | \n",
" 0.76 | \n",
" agesq | \n",
" 0.17 | \n",
" 0.03 | \n",
" 0.070278 | \n",
" age | \n",
" age^2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" n r CI95% p-val BF10 power var \\\n",
"0 706 0.090373 [0.02, 0.16] 1.630887e-02 0.839 0.67 age \n",
"1 706 -0.027057 [-0.1, 0.05] 4.728889e-01 0.061 0.11 black \n",
"2 706 0.048081 [-0.03, 0.12] 2.019484e-01 0.106 0.25 clerical \n",
"3 706 0.041229 [-0.03, 0.11] 2.739475e-01 0.086 0.19 construc \n",
"4 706 -0.095004 [-0.17, -0.02] 1.155151e-02 1.137 0.72 educ \n",
"5 706 -0.076890 [-0.15, -0.0] 4.110934e-02 0.378 0.53 earns74 \n",
"6 706 -0.102825 [-0.18, -0.03] 6.246660e-03 1.967 0.78 gdhlth \n",
"7 706 -0.027126 [-0.1, 0.05] 4.717698e-01 0.061 0.11 inlf \n",
"8 706 -0.066997 [-0.14, 0.01] 7.524015e-02 0.229 0.43 smsa \n",
"9 532 -0.067197 [-0.15, 0.02] 1.216222e-01 0.179 0.34 lhrwage \n",
"10 706 0.036661 [-0.04, 0.11] 3.306971e-01 0.076 0.16 lothinc \n",
"11 706 -0.035909 [-0.11, 0.04] 3.407214e-01 0.074 0.16 male \n",
"12 706 0.053757 [-0.02, 0.13] 1.536188e-01 0.13 0.30 marr \n",
"13 706 0.027147 [-0.05, 0.1] 4.714176e-01 0.061 0.11 prot \n",
"14 706 0.867744 [0.85, 0.88] 6.051022e-216 6.697e+211 1.00 rlxall \n",
"15 706 0.001782 [-0.07, 0.08] 9.623058e-01 0.047 0.05 selfe \n",
"16 706 0.893043 [0.88, 0.91] 2.339108e-246 1.38e+242 1.00 slpnaps \n",
"17 706 0.078600 [0.0, 0.15] 3.679946e-02 0.415 0.55 south \n",
"18 706 0.007881 [-0.07, 0.08] 8.344125e-01 0.048 0.06 spsepay \n",
"19 706 0.007868 [-0.07, 0.08] 8.346888e-01 0.048 0.05 spwrk75 \n",
"20 706 -0.321384 [-0.39, -0.25] 1.994095e-18 1.961e+15 1.00 totwrk \n",
"21 706 0.009965 [-0.06, 0.08] 7.915440e-01 0.049 0.06 union \n",
"22 706 -0.322300 [-0.39, -0.25] 1.577335e-18 2.471e+15 1.00 worknrm \n",
"23 706 0.001139 [-0.07, 0.07] 9.759034e-01 0.047 0.05 workscnd \n",
"24 706 0.104191 [0.03, 0.18] 5.587422e-03 2.175 0.79 exper \n",
"25 706 -0.013262 [-0.09, 0.06] 7.250012e-01 0.05 0.06 yngkid \n",
"26 706 0.063997 [-0.01, 0.14] 8.928507e-02 0.199 0.40 yrsmarr \n",
"27 532 -0.049450 [-0.13, 0.04] 2.548774e-01 0.104 0.21 hrwage \n",
"28 706 0.099722 [0.03, 0.17] 8.010946e-03 1.574 0.76 agesq \n",
"\n",
" hl ll moerror group label \n",
"0 0.16 0.02 0.069627 age in years \n",
"1 0.05 -0.10 0.077057 other factors =1 if black \n",
"2 0.12 -0.03 0.071919 occupation =1 if clerical worker \n",
"3 0.11 -0.03 0.068771 occupation =1 if construction worker \n",
"4 -0.02 -0.17 0.075004 labor factors years of schooling \n",
"5 -0.00 -0.15 0.076890 labor factors total earnings, 1974 \n",
"6 -0.03 -0.18 0.072825 health factors =1 if in good or excel. health \n",
"7 0.05 -0.10 0.077126 labor factors =1 if in labor force \n",
"8 0.01 -0.14 0.076997 area of residence =1 if live in smsa \n",
"9 0.02 -0.15 0.087197 labor factors log hourly wage \n",
"10 0.11 -0.04 0.073339 labor factors log othinc, unless othinc < 0 \n",
"11 0.04 -0.11 0.075909 other factors =1 if male \n",
"12 0.13 -0.02 0.076243 family factors =1 if married \n",
"13 0.10 -0.05 0.072853 other factors =1 if Protestant \n",
"14 0.88 0.85 0.012256 other sleep factors slpnaps + personal activs \n",
"15 0.08 -0.07 0.078218 labor factors =1 if self employed \n",
"16 0.91 0.88 0.016957 other sleep factors minutes sleep, inc. naps \n",
"17 0.15 0.00 0.071400 area of residence =1 if live in south \n",
"18 0.08 -0.07 0.072119 other factors spousal wage income \n",
"19 0.08 -0.07 0.072132 other factors =1 if spouse works \n",
"20 -0.25 -0.39 0.071384 labor factors mins worked per week \n",
"21 0.08 -0.06 0.070035 labor factors =1 if belong to union \n",
"22 -0.25 -0.39 0.072300 labor factors mins work main job \n",
"23 0.07 -0.07 0.068861 labor factors mins work second job \n",
"24 0.18 0.03 0.075809 labor factors age - educ - 6 \n",
"25 0.06 -0.09 0.073262 family factors =1 if children < 3 present \n",
"26 0.14 -0.01 0.076003 family factors years married \n",
"27 0.04 -0.13 0.089450 labor factors hourly wage \n",
"28 0.17 0.03 0.070278 age age^2 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Compute correlations\n",
"df_corr = (pg.pairwise_corr(df)\n",
" .rename(columns={'p-unc': 'p-val'})\n",
" .query('Y==\"sleep\"|X==\"sleep\"')\n",
" .assign(var=lambda df: df['X'])\n",
" .assign(var=lambda df: np.where(df['var']==\"sleep\", df['Y'], df['var']))\n",
" .drop([\"Y\", \"X\", \"method\", \"alternative\"], axis=1)\n",
" .assign(\n",
" hl=lambda df: [float(ci[1]) for ci in df['CI95%']],\n",
" ll=lambda df: [float(ci[0]) for ci in df['CI95%']],\n",
" moerror=lambda df: df['hl'] - df['r'],\n",
" power=lambda df: df.power.round(decimals=2),\n",
" n=lambda df: df.n.map(str)\n",
" )\n",
" # Get labels\n",
" .merge(df_label, how='left', on='var', validate='1:1')\n",
" .reset_index(drop=True)\n",
" )\n",
"df_corr"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c988902c",
"metadata": {
"ExecuteTime": {
"end_time": "2022-09-18T05:01:14.852081Z",
"start_time": "2022-09-18T05:01:14.801972Z"
}
},
"outputs": [],
"source": [
"df_corr.to_csv('data/sleep-untruncated.csv', index=False)\n",
"\n",
"_drop = ['earns74', 'inlf', 'lothinc', 'workscnd', 'lhrwage', 'worknrm', \n",
" 'spwrk75', 'marr', 'black', 'agesq', 'union', 'exper', 'rlxall', 'slpnaps']\n",
"df_corr.query('var not in @_drop').to_csv('data/sleep.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "d0b63fbd",
"metadata": {
"ExecuteTime": {
"end_time": "2022-09-18T05:01:14.876068Z",
"start_time": "2022-09-18T05:01:14.852081Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"| | var | r | moerror | label | group | ll | hl | n | power | p-val |\n",
"|---:|:---------|-----------:|----------:|:----------------------|:--------------|------:|-----:|----:|--------:|----------:|\n",
"| 0 | age | 0.0903729 | 0.0696271 | in years | age | 0.02 | 0.16 | 706 | 0.67 | 0.0163089 |\n",
"| 1 | black | -0.0270573 | 0.0770573 | =1 if black | other factors | -0.1 | 0.05 | 706 | 0.11 | 0.472889 |\n",
"| 2 | clerical | 0.0480811 | 0.0719189 | =1 if clerical worker | occupation | -0.03 | 0.12 | 706 | 0.25 | 0.201948 |\n"
]
}
],
"source": [
"_cols = ['var', 'r', 'moerror', 'label', 'group', 'll', 'hl', 'n', 'power', 'p-val']\n",
"print(df_corr[_cols].head(3).to_markdown())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8a67137",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 5
}