{ "cells": [ { "cell_type": "code", "execution_count": 120, "metadata": { "ExecuteTime": { "end_time": "2021-08-02T19:05:44.521662Z", "start_time": "2021-08-02T19:05:44.515478Z" } }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "sns.set_style(\"whitegrid\")\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "from scipy.stats import kurtosis, skew\n", "\n", "import sys\n", "sys.path.insert(0, '..')\n", "from mlconfound.stats import _binom_ci\n", "from mlconfound.simulate import sinh_arcsinh\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2021-08-02T09:43:13.175485Z", "start_time": "2021-08-02T09:43:13.172183Z" } }, "outputs": [], "source": [ "alpha=0.05" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "ExecuteTime": { "end_time": "2021-08-02T10:19:20.190511Z", "start_time": "2021-08-02T10:19:19.854056Z" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | p | \n", "r2_y_c | \n", "r2_yhat_c | \n", "r2_y_yhat | \n", "n | \n", "c_to_y_ratio_in_yhat | \n", "yc_in_yhat | \n", "cov_y_c | \n", "num_perms | \n", "random_seed | \n", "y_ratio | \n", "c_ratio | \n", "noise_ratio | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.330 | \n", "0.018327 | \n", "0.018327 | \n", "0.033823 | \n", "50 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1000 | \n", "1019282514 | \n", "0.00 | \n", "0.00 | \n", "1.0 | \n", "
| 1 | \n", "0.525 | \n", "0.007353 | \n", "0.007353 | \n", "0.018785 | \n", "50 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1000 | \n", "1679872250 | \n", "0.00 | \n", "0.00 | \n", "1.0 | \n", "
| 2 | \n", "0.472 | \n", "0.010726 | \n", "0.010726 | \n", "0.034227 | \n", "50 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1000 | \n", "1837932928 | \n", "0.00 | \n", "0.00 | \n", "1.0 | \n", "
| 3 | \n", "0.093 | \n", "0.059060 | \n", "0.059060 | \n", "0.034390 | \n", "50 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1000 | \n", "1442006727 | \n", "0.00 | \n", "0.00 | \n", "1.0 | \n", "
| 4 | \n", "0.854 | \n", "0.000768 | \n", "0.000768 | \n", "0.039799 | \n", "50 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1000 | \n", "221822463 | \n", "0.00 | \n", "0.00 | \n", "1.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 39995 | \n", "0.000 | \n", "0.896217 | \n", "0.896217 | \n", "0.888008 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "301235933 | \n", "0.45 | \n", "0.45 | \n", "0.1 | \n", "
| 39996 | \n", "0.000 | \n", "0.888342 | \n", "0.888342 | \n", "0.891314 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1011369396 | \n", "0.45 | \n", "0.45 | \n", "0.1 | \n", "
| 39997 | \n", "0.000 | \n", "0.884954 | \n", "0.884954 | \n", "0.890494 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1369868937 | \n", "0.45 | \n", "0.45 | \n", "0.1 | \n", "
| 39998 | \n", "0.000 | \n", "0.893212 | \n", "0.893212 | \n", "0.889125 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1775743152 | \n", "0.45 | \n", "0.45 | \n", "0.1 | \n", "
| 39999 | \n", "0.000 | \n", "0.884304 | \n", "0.884304 | \n", "0.881556 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "18004435 | \n", "0.45 | \n", "0.45 | \n", "0.1 | \n", "
40000 rows × 13 columns
\n", "| \n", " | p | \n", "r2_y_c | \n", "r2_yhat_c | \n", "r2_y_yhat | \n", "n | \n", "c_to_y_ratio_in_yhat | \n", "yc_in_yhat | \n", "cov_y_c | \n", "num_perms | \n", "random_seed | \n", "y_ratio | \n", "c_ratio | \n", "noise_ratio | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.330 | \n", "0.018327 | \n", "0.018327 | \n", "0.033823 | \n", "50 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1000 | \n", "1019282514 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| 1 | \n", "0.525 | \n", "0.007353 | \n", "0.007353 | \n", "0.018785 | \n", "50 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1000 | \n", "1679872250 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| 2 | \n", "0.472 | \n", "0.010726 | \n", "0.010726 | \n", "0.034227 | \n", "50 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1000 | \n", "1837932928 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| 3 | \n", "0.093 | \n", "0.059060 | \n", "0.059060 | \n", "0.034390 | \n", "50 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1000 | \n", "1442006727 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| 4 | \n", "0.854 | \n", "0.000768 | \n", "0.000768 | \n", "0.039799 | \n", "50 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1000 | \n", "221822463 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 38395 | \n", "0.375 | \n", "0.613132 | \n", "0.613132 | \n", "0.986808 | \n", "1000 | \n", "0.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1040193158 | \n", "0.9 | \n", "0.0 | \n", "0.1 | \n", "
| 38396 | \n", "0.505 | \n", "0.607980 | \n", "0.607980 | \n", "0.986165 | \n", "1000 | \n", "0.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "364082909 | \n", "0.9 | \n", "0.0 | \n", "0.1 | \n", "
| 38397 | \n", "0.498 | \n", "0.622202 | \n", "0.622202 | \n", "0.987334 | \n", "1000 | \n", "0.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1449959609 | \n", "0.9 | \n", "0.0 | \n", "0.1 | \n", "
| 38398 | \n", "0.595 | \n", "0.647852 | \n", "0.647852 | \n", "0.986994 | \n", "1000 | \n", "0.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1597178978 | \n", "0.9 | \n", "0.0 | \n", "0.1 | \n", "
| 38399 | \n", "0.457 | \n", "0.591136 | \n", "0.591136 | \n", "0.987621 | \n", "1000 | \n", "0.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "741372999 | \n", "0.9 | \n", "0.0 | \n", "0.1 | \n", "
16000 rows × 13 columns
\n", "| \n", " | p | \n", "r2_y_c | \n", "r2_yhat_c | \n", "r2_y_yhat | \n", "n | \n", "c_to_y_ratio_in_yhat | \n", "yc_in_yhat | \n", "cov_y_c | \n", "num_perms | \n", "random_seed | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 2400 | \n", "0.533 | \n", "0.008597 | \n", "0.008597 | \n", "0.190566 | \n", "50 | \n", "0.1 | \n", "0.3 | \n", "0.0 | \n", "1000 | \n", "323672003 | \n", "
| 2401 | \n", "0.962 | \n", "0.000070 | \n", "0.000070 | \n", "0.299623 | \n", "50 | \n", "0.1 | \n", "0.3 | \n", "0.0 | \n", "1000 | \n", "1931310255 | \n", "
| 2402 | \n", "0.422 | \n", "0.014213 | \n", "0.014213 | \n", "0.061297 | \n", "50 | \n", "0.1 | \n", "0.3 | \n", "0.0 | \n", "1000 | \n", "983343362 | \n", "
| 2403 | \n", "0.096 | \n", "0.074110 | \n", "0.074110 | \n", "0.171384 | \n", "50 | \n", "0.1 | \n", "0.3 | \n", "0.0 | \n", "1000 | \n", "18978686 | \n", "
| 2404 | \n", "0.782 | \n", "0.001629 | \n", "0.001629 | \n", "0.064680 | \n", "50 | \n", "0.1 | \n", "0.3 | \n", "0.0 | \n", "1000 | \n", "1136612079 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 39995 | \n", "0.000 | \n", "0.896217 | \n", "0.896217 | \n", "0.888008 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "301235933 | \n", "
| 39996 | \n", "0.000 | \n", "0.888342 | \n", "0.888342 | \n", "0.891314 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1011369396 | \n", "
| 39997 | \n", "0.000 | \n", "0.884954 | \n", "0.884954 | \n", "0.890494 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1369868937 | \n", "
| 39998 | \n", "0.000 | \n", "0.893212 | \n", "0.893212 | \n", "0.889125 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1775743152 | \n", "
| 39999 | \n", "0.000 | \n", "0.884304 | \n", "0.884304 | \n", "0.881556 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "18004435 | \n", "
24000 rows × 10 columns
\n", "| \n", " | p | \n", "r2_y_c | \n", "r2_yhat_c | \n", "r2_y_yhat | \n", "n | \n", "c_to_y_ratio_in_yhat | \n", "yc_in_yhat | \n", "cov_y_c | \n", "num_perms | \n", "random_seed | \n", "c_ratio | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2000 | \n", "0.234 | \n", "0.002637 | \n", "0.001435 | \n", "0.745829 | \n", "50 | \n", "0.0 | \n", "0.3 | \n", "0.0 | \n", "1000 | \n", "1077883300 | \n", "0.30 | \n", "
| 2001 | \n", "0.000 | \n", "0.440270 | \n", "0.105202 | \n", "0.001563 | \n", "50 | \n", "0.0 | \n", "0.3 | \n", "0.0 | \n", "1000 | \n", "94267575 | \n", "0.30 | \n", "
| 2002 | \n", "0.010 | \n", "0.072779 | \n", "0.391289 | \n", "0.172047 | \n", "50 | \n", "0.0 | \n", "0.3 | \n", "0.0 | \n", "1000 | \n", "237191205 | \n", "0.30 | \n", "
| 2003 | \n", "0.879 | \n", "0.007295 | \n", "0.000497 | \n", "0.000417 | \n", "50 | \n", "0.0 | \n", "0.3 | \n", "0.0 | \n", "1000 | \n", "1261102006 | \n", "0.30 | \n", "
| 2004 | \n", "0.153 | \n", "0.001297 | \n", "0.003113 | \n", "0.256454 | \n", "50 | \n", "0.0 | \n", "0.3 | \n", "0.0 | \n", "1000 | \n", "95286841 | \n", "0.30 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 39995 | \n", "0.575 | \n", "0.333095 | \n", "0.581533 | \n", "0.908563 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "301235933 | \n", "0.45 | \n", "
| 39996 | \n", "0.000 | \n", "0.292999 | \n", "0.687075 | \n", "0.739938 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1011369396 | \n", "0.45 | \n", "
| 39997 | \n", "0.000 | \n", "0.257280 | \n", "0.796954 | \n", "0.665060 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1369868937 | \n", "0.45 | \n", "
| 39998 | \n", "0.000 | \n", "0.571661 | \n", "0.811419 | \n", "0.872621 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "1775743152 | \n", "0.45 | \n", "
| 39999 | \n", "0.000 | \n", "0.536587 | \n", "0.867187 | \n", "0.779643 | \n", "1000 | \n", "1.0 | \n", "0.9 | \n", "0.8 | \n", "1000 | \n", "18004435 | \n", "0.45 | \n", "
30000 rows × 11 columns
\n", "