{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# `causalml` - Meta-Learner Example Notebook\n", "This notebook only contains regression examples." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:47:09.085819Z", "start_time": "2020-04-14T18:47:09.066588Z" } }, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:47:12.227097Z", "start_time": "2020-04-14T18:47:09.088487Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/jeong/.conda/envs/py36/lib/python3.6/site-packages/sklearn/utils/deprecation.py:144: FutureWarning: The sklearn.utils.testing module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.utils. Anything that cannot be imported from sklearn.utils is now part of the private API.\n", " warnings.warn(message, FutureWarning)\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from matplotlib import pyplot as plt\n", "from sklearn.linear_model import LinearRegression, LogisticRegression\n", "from sklearn.model_selection import train_test_split\n", "import statsmodels.api as sm\n", "from xgboost import XGBRegressor, XGBClassifier\n", "import warnings\n", "\n", "# from causalml.inference.meta import XGBTLearner, MLPTLearner\n", "from causalml.inference.meta import BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor\n", "from causalml.inference.meta import BaseSClassifier, BaseTClassifier, BaseXClassifier, BaseRClassifier\n", "from causalml.inference.meta import LRSRegressor\n", "from causalml.match import NearestNeighborMatch, MatchOptimizer, create_table_one\n", "from causalml.propensity import ElasticNetPropensityModel\n", "from causalml.dataset import *\n", "from causalml.metrics import *\n", "\n", "warnings.filterwarnings('ignore')\n", "plt.style.use('fivethirtyeight')\n", "pd.set_option('display.float_format', lambda x: '%.4f' % x)\n", "\n", "# imports from package\n", "import logging\n", "from sklearn.dummy import DummyRegressor\n", "from sklearn.metrics import mean_squared_error as mse\n", "from sklearn.metrics import mean_absolute_error as mae\n", "import statsmodels.api as sm\n", "from copy import deepcopy\n", "\n", "logger = logging.getLogger('causalml')\n", "logging.basicConfig(level=logging.INFO)\n", "\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Single Treatment Case" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generate synthetic data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:47:12.283931Z", "start_time": "2020-04-14T18:47:12.230839Z" } }, "outputs": [], "source": [ "# Generate synthetic data using mode 1\n", "y, X, treatment, tau, b, e = synthetic_data(mode=1, n=10000, p=8, sigma=1.0)\n", "\n", "treatment = np.array(['treatment_a' if val==1 else 'control' for val in treatment])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## S-Learner" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:47:13.857975Z", "start_time": "2020-04-14T18:47:12.286727Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.6622\n", "INFO:causalml: RMSE (Treatment): 0.6941\n", "INFO:causalml: sMAPE (Control): 0.6536\n", "INFO:causalml: sMAPE (Treatment): 0.3721\n", "INFO:causalml: Gini (Control): 0.8248\n", "INFO:causalml: Gini (Treatment): 0.8156\n" ] } ], "source": [ "learner_s = BaseSRegressor(XGBRegressor(), control_name='control')\n", "ate_s = learner_s.estimate_ate(X=X, treatment=treatment, y=y, return_ci=False, bootstrap_ci=False)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:47:13.912096Z", "start_time": "2020-04-14T18:47:13.861042Z" } }, "outputs": [ { "data": { "text/plain": [ "array([0.57431368])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ate_s" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:47:15.541087Z", "start_time": "2020-04-14T18:47:13.914579Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.6622\n", "INFO:causalml: RMSE (Treatment): 0.6941\n", "INFO:causalml: sMAPE (Control): 0.6536\n", "INFO:causalml: sMAPE (Treatment): 0.3721\n", "INFO:causalml: Gini (Control): 0.8248\n", "INFO:causalml: Gini (Treatment): 0.8156\n" ] } ], "source": [ "alpha = 0.05\n", "learner_s = BaseSRegressor(XGBRegressor(), ate_alpha=alpha, control_name='control')\n", "ate_s, ate_s_lb, ate_s_ub = learner_s.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True,\n", " bootstrap_ci=False)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:47:15.593203Z", "start_time": "2020-04-14T18:47:15.545759Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.54689052],\n", " [0.57431368],\n", " [0.60173684]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_s_lb, ate_s, ate_s_ub))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Boostrap Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:48:31.923961Z", "start_time": "2020-04-14T18:47:15.597096Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.6622\n", "INFO:causalml: RMSE (Treatment): 0.6941\n", "INFO:causalml: sMAPE (Control): 0.6536\n", "INFO:causalml: sMAPE (Treatment): 0.3721\n", "INFO:causalml: Gini (Control): 0.8248\n", "INFO:causalml: Gini (Treatment): 0.8156\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [01:14<00:00, 1.34it/s]\n" ] } ], "source": [ "ate_s_b, ate_s_lb_b, ate_s_ub_b = learner_s.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True,\n", " bootstrap_ci=True, n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:48:31.965447Z", "start_time": "2020-04-14T18:48:31.926284Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.51141982],\n", " [0.57431368],\n", " [0.64097547]])" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_s_lb_b, ate_s_b, ate_s_ub_b))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:48:33.309900Z", "start_time": "2020-04-14T18:48:31.968542Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.6622\n", "INFO:causalml: RMSE (Treatment): 0.6941\n", "INFO:causalml: sMAPE (Control): 0.6536\n", "INFO:causalml: sMAPE (Treatment): 0.3721\n", "INFO:causalml: Gini (Control): 0.8248\n", "INFO:causalml: Gini (Treatment): 0.8156\n" ] } ], "source": [ "learner_s = BaseSRegressor(XGBRegressor(), control_name='control')\n", "cate_s = learner_s.fit_predict(X=X, treatment=treatment, y=y, return_ci=False)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:48:33.349476Z", "start_time": "2020-04-14T18:48:33.311840Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.37674308],\n", " [0.42519259],\n", " [0.60864675],\n", " ...,\n", " [0.19940662],\n", " [0.35013032],\n", " [0.78372002]])" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_s" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE w/ Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:49:37.587994Z", "start_time": "2020-04-14T18:48:33.351595Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.6622\n", "INFO:causalml: RMSE (Treatment): 0.6941\n", "INFO:causalml: sMAPE (Control): 0.6536\n", "INFO:causalml: sMAPE (Treatment): 0.3721\n", "INFO:causalml: Gini (Control): 0.8248\n", "INFO:causalml: Gini (Treatment): 0.8156\n", "INFO:causalml:Bootstrap Confidence Intervals\n", "100%|██████████| 100/100 [01:02<00:00, 1.59it/s]\n" ] } ], "source": [ "alpha = 0.05\n", "learner_s = BaseSRegressor(XGBRegressor(), ate_alpha=alpha, control_name='control')\n", "cate_s, cate_s_lb, cate_s_ub = learner_s.fit_predict(X=X, treatment=treatment, y=y, return_ci=True,\n", " n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:49:37.669038Z", "start_time": "2020-04-14T18:49:37.591481Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.37674308],\n", " [0.42519259],\n", " [0.60864675],\n", " ...,\n", " [0.19940662],\n", " [0.35013032],\n", " [0.78372002]])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_s" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:49:37.759221Z", "start_time": "2020-04-14T18:49:37.674451Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-0.18972662],\n", " [ 0.20548496],\n", " [ 0.09983036],\n", " ...,\n", " [-0.62837307],\n", " [-0.19766161],\n", " [-0.07736247]])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_s_lb" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:49:37.856614Z", "start_time": "2020-04-14T18:49:37.764939Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.8139405 ],\n", " [1.278447 ],\n", " [1.21720439],\n", " ...,\n", " [0.90244564],\n", " [0.9450083 ],\n", " [1.1529291 ]])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_s_ub" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## T-Learner" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:49:39.262164Z", "start_time": "2020-04-14T18:49:37.860129Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n" ] } ], "source": [ "learner_t = BaseTRegressor(XGBRegressor(), control_name='control')\n", "ate_t, ate_t_lb, ate_t_ub = learner_t.estimate_ate(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:49:39.304936Z", "start_time": "2020-04-14T18:49:39.264017Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.55534845],\n", " [0.58090983],\n", " [0.60647121]])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_t_lb, ate_t, ate_t_ub))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Boostrap Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:50:40.797544Z", "start_time": "2020-04-14T18:49:39.307236Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [01:00<00:00, 1.66it/s]\n" ] } ], "source": [ "ate_t_b, ate_t_lb_b, ate_t_ub_b = learner_t.estimate_ate(X=X, treatment=treatment, y=y, bootstrap_ci=True,\n", " n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:50:40.836006Z", "start_time": "2020-04-14T18:50:40.799256Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.51343277],\n", " [0.58090983],\n", " [0.65843097]])" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_t_lb_b, ate_t_b, ate_t_ub_b))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:50:41.916753Z", "start_time": "2020-04-14T18:50:40.837869Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n" ] } ], "source": [ "learner_t = BaseTRegressor(XGBRegressor(), control_name='control')\n", "cate_t = learner_t.fit_predict(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:50:41.956040Z", "start_time": "2020-04-14T18:50:41.918664Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.23669004],\n", " [-0.0793891 ],\n", " [-0.10774326],\n", " ...,\n", " [ 0.30539629],\n", " [ 0.50784194],\n", " [ 0.00356007]])" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_t" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE w/ Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:51:42.554430Z", "start_time": "2020-04-14T18:50:41.963277Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n", "INFO:causalml:Bootstrap Confidence Intervals\n", "100%|██████████| 100/100 [00:59<00:00, 1.68it/s]\n" ] } ], "source": [ "learner_t = BaseTRegressor(XGBRegressor(), control_name='control')\n", "cate_t, cate_t_lb, cate_t_ub = learner_t.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=100,\n", " bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:51:42.599599Z", "start_time": "2020-04-14T18:51:42.559391Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.23669004],\n", " [-0.0793891 ],\n", " [-0.10774326],\n", " ...,\n", " [ 0.30539629],\n", " [ 0.50784194],\n", " [ 0.00356007]])" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_t" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:51:42.639342Z", "start_time": "2020-04-14T18:51:42.601624Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-0.6752711 ],\n", " [-0.72038152],\n", " [-1.2330182 ],\n", " ...,\n", " [-0.82131582],\n", " [-0.48846376],\n", " [-0.39046848]])" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_t_lb" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:51:42.678368Z", "start_time": "2020-04-14T18:51:42.641296Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[1.66480025],\n", " [1.60697527],\n", " [2.06829221],\n", " ...,\n", " [1.64941401],\n", " [1.59083122],\n", " [1.53139764]])" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_t_ub" ] }, { "cell_type": "markdown", "metadata": { "toc-hr-collapsed": false }, "source": [ "## X-Learner" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:51:44.935095Z", "start_time": "2020-04-14T18:51:42.680407Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n" ] } ], "source": [ "learner_x = BaseXRegressor(XGBRegressor(), control_name='control')\n", "ate_x, ate_x_lb, ate_x_ub = learner_x.estimate_ate(X=X, treatment=treatment, y=y, p=e)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:51:44.972119Z", "start_time": "2020-04-14T18:51:44.936710Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.51454586],\n", " [0.53721713],\n", " [0.55988839]])" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_x_lb, ate_x, ate_x_ub))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score input" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:51:47.712668Z", "start_time": "2020-04-14T18:51:44.974067Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n" ] } ], "source": [ "ate_x_no_p, ate_x_lb_no_p, ate_x_ub_no_p = learner_x.estimate_ate(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:51:47.750441Z", "start_time": "2020-04-14T18:51:47.714685Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.51334384],\n", " [0.53600211],\n", " [0.55866038]])" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_x_lb_no_p, ate_x_no_p, ate_x_ub_no_p))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:51:47.793093Z", "start_time": "2020-04-14T18:51:47.752418Z" } }, "outputs": [ { "data": { "text/plain": [ "{'treatment_a': {'all training': LogisticRegressionCV(Cs=array([1.00230524, 2.15608891, 4.63802765, 9.97700064]),\n", " class_weight=None,\n", " cv=StratifiedKFold(n_splits=3, random_state=None, shuffle=True),\n", " dual=False, fit_intercept=True, intercept_scaling=1.0,\n", " l1_ratios=array([0.001 , 0.33366667, 0.66633333, 0.999 ]),\n", " max_iter=100, multi_class='auto', n_jobs=None,\n", " penalty='elasticnet', random_state=None, refit=True,\n", " scoring=None, solver='saga', tol=0.0001, verbose=0)}}" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learner_x.propensity_model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Boostrap Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:53:45.532120Z", "start_time": "2020-04-14T18:51:47.795412Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [01:55<00:00, 1.15s/it]\n" ] } ], "source": [ "ate_x_b, ate_x_lb_b, ate_x_ub_b = learner_x.estimate_ate(X=X, treatment=treatment, y=y, p=e, bootstrap_ci=True,\n", " n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:53:45.570961Z", "start_time": "2020-04-14T18:53:45.534229Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.46262759],\n", " [0.53721713],\n", " [0.59662513]])" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_x_lb_b, ate_x_b, ate_x_ub_b))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:55:44.972969Z", "start_time": "2020-04-14T18:53:45.572878Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [01:56<00:00, 1.17s/it]\n" ] } ], "source": [ "ate_x_b_no_p, ate_x_lb_b_no_p, ate_x_ub_b_no_p = learner_x.estimate_ate(X=X, treatment=treatment, y=y, bootstrap_ci=True,\n", " n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:55:45.012081Z", "start_time": "2020-04-14T18:55:44.975086Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.44360865],\n", " [0.53598752],\n", " [0.59794413]])" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_x_lb_b_no_p, ate_x_b_no_p, ate_x_ub_b_no_p))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:55:47.269808Z", "start_time": "2020-04-14T18:55:45.013958Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n" ] } ], "source": [ "learner_x = BaseXRegressor(XGBRegressor(), control_name='control')\n", "cate_x = learner_x.fit_predict(X=X, treatment=treatment, y=y, p=e)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:55:47.308060Z", "start_time": "2020-04-14T18:55:47.271872Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.05178452],\n", " [0.01907274],\n", " [0.79584839],\n", " ...,\n", " [0.18147876],\n", " [0.34742898],\n", " [0.23145415]])" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:55:50.057658Z", "start_time": "2020-04-14T18:55:47.310097Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n" ] } ], "source": [ "cate_x_no_p = learner_x.fit_predict(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:55:50.095258Z", "start_time": "2020-04-14T18:55:50.059363Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.06426511],\n", " [0.0189166 ],\n", " [0.78233515],\n", " ...,\n", " [0.2237187 ],\n", " [0.29647103],\n", " [0.2359861 ]])" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_no_p" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE w/ Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:57:07.153422Z", "start_time": "2020-04-14T18:55:50.097185Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n", "INFO:causalml:Bootstrap Confidence Intervals\n", "100%|██████████| 100/100 [01:14<00:00, 1.34it/s]\n" ] } ], "source": [ "learner_x = BaseXRegressor(XGBRegressor(), control_name='control')\n", "cate_x, cate_x_lb, cate_x_ub = learner_x.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True,\n", " n_bootstraps=100, bootstrap_size=3000)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:57:07.202131Z", "start_time": "2020-04-14T18:57:07.155610Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.05178452],\n", " [0.01907274],\n", " [0.79584839],\n", " ...,\n", " [0.18147876],\n", " [0.34742898],\n", " [0.23145415]])" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:57:07.252726Z", "start_time": "2020-04-14T18:57:07.205064Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-0.71763188],\n", " [-0.79487709],\n", " [-0.329782 ],\n", " ...,\n", " [-0.57672694],\n", " [-0.48450804],\n", " [-0.43157597]])" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_lb" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:57:07.298204Z", "start_time": "2020-04-14T18:57:07.254908Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[1.40320321],\n", " [1.59906792],\n", " [1.59324502],\n", " ...,\n", " [1.07747513],\n", " [1.30836353],\n", " [1.18985624]])" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_ub" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:58:26.822473Z", "start_time": "2020-04-14T18:57:07.300843Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4868\n", "INFO:causalml: RMSE (Treatment): 0.5434\n", "INFO:causalml: sMAPE (Control): 0.5230\n", "INFO:causalml: sMAPE (Treatment): 0.3114\n", "INFO:causalml: Gini (Control): 0.9216\n", "INFO:causalml: Gini (Treatment): 0.8988\n", "INFO:causalml:Bootstrap Confidence Intervals\n", "100%|██████████| 100/100 [01:16<00:00, 1.31it/s]\n" ] } ], "source": [ "cate_x_no_p, cate_x_lb_no_p, cate_x_ub_no_p = learner_x.fit_predict(X=X, treatment=treatment, y=y, return_ci=True,\n", " n_bootstraps=100, bootstrap_size=3000)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:58:26.864389Z", "start_time": "2020-04-14T18:58:26.824577Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.06430496],\n", " [0.01891659],\n", " [0.78209735],\n", " ...,\n", " [0.22376976],\n", " [0.29645377],\n", " [0.23597794]])" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_no_p" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:58:26.906146Z", "start_time": "2020-04-14T18:58:26.866620Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-0.62013372],\n", " [-0.90236405],\n", " [-0.31043938],\n", " ...,\n", " [-0.54219561],\n", " [-0.2852425 ],\n", " [-0.37437315]])" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_lb_no_p" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:58:26.945545Z", "start_time": "2020-04-14T18:58:26.908137Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[1.4199368 ],\n", " [1.45096372],\n", " [1.57656827],\n", " ...,\n", " [1.34583137],\n", " [1.37899369],\n", " [1.25074382]])" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_ub_no_p" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## R-Learner" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:58:33.047158Z", "start_time": "2020-04-14T18:58:26.947521Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n" ] } ], "source": [ "learner_r = BaseRRegressor(XGBRegressor(), control_name='control')\n", "ate_r, ate_r_lb, ate_r_ub = learner_r.estimate_ate(X=X, treatment=treatment, y=y, p=e)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:58:33.087239Z", "start_time": "2020-04-14T18:58:33.049284Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.55904178],\n", " [0.55951123],\n", " [0.55998069]])" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_r_lb, ate_r, ate_r_ub))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:58:38.036497Z", "start_time": "2020-04-14T18:58:33.089093Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n" ] } ], "source": [ "ate_r_no_p, ate_r_lb_no_p, ate_r_ub_no_p = learner_r.estimate_ate(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:58:38.072770Z", "start_time": "2020-04-14T18:58:38.038825Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.49307912],\n", " [0.49354918],\n", " [0.49401924]])" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_r_lb_no_p, ate_r_no_p, ate_r_ub_no_p))" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T18:58:38.109243Z", "start_time": "2020-04-14T18:58:38.074501Z" } }, "outputs": [ { "data": { "text/plain": [ "{'treatment_a': {'all training': LogisticRegressionCV(Cs=array([1.00230524, 2.15608891, 4.63802765, 9.97700064]),\n", " class_weight=None,\n", " cv=KFold(n_splits=5, random_state=None, shuffle=True),\n", " dual=False, fit_intercept=True, intercept_scaling=1.0,\n", " l1_ratios=array([0.001 , 0.33366667, 0.66633333, 0.999 ]),\n", " max_iter=100, multi_class='auto', n_jobs=None,\n", " penalty='elasticnet', random_state=None, refit=True,\n", " scoring=None, solver='saga', tol=0.0001, verbose=0)}}" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learner_r.propensity_model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Boostrap Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:00:38.045754Z", "start_time": "2020-04-14T18:58:38.111041Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [01:56<00:00, 1.17s/it]\n" ] } ], "source": [ "ate_r_b, ate_r_lb_b, ate_r_ub_b = learner_r.estimate_ate(X=X, treatment=treatment, y=y, p=e, bootstrap_ci=True,\n", " n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:00:38.089296Z", "start_time": "2020-04-14T19:00:38.047834Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.37951505],\n", " [0.54612646],\n", " [0.53701368]])" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_r_lb_b, ate_r_b, ate_r_ub_b))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:03:24.546297Z", "start_time": "2020-04-14T19:00:38.091485Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [02:42<00:00, 1.63s/it]\n" ] } ], "source": [ "ate_r_b_no_p, ate_r_lb_b_no_p, ate_r_ub_b_no_p = learner_r.estimate_ate(X=X, treatment=treatment, y=y, bootstrap_ci=True,\n", " n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:03:24.617403Z", "start_time": "2020-04-14T19:03:24.549832Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.37126915],\n", " [0.50635052],\n", " [0.51400059]])" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_r_lb_b_no_p, ate_r_b_no_p, ate_r_ub_b_no_p))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:03:29.033458Z", "start_time": "2020-04-14T19:03:24.621209Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n" ] } ], "source": [ "learner_r = BaseRRegressor(XGBRegressor(), control_name='control')\n", "cate_r = learner_r.fit_predict(X=X, treatment=treatment, y=y, p=e)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:03:29.087607Z", "start_time": "2020-04-14T19:03:29.036023Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 1.57365084],\n", " [-0.63619554],\n", " [-0.05320793],\n", " ...,\n", " [ 0.56346375],\n", " [ 0.56288183],\n", " [ 0.87085617]])" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:03:33.641108Z", "start_time": "2020-04-14T19:03:29.090259Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n" ] } ], "source": [ "cate_r_no_p = learner_r.fit_predict(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:03:33.692456Z", "start_time": "2020-04-14T19:03:33.644376Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-0.19582933],\n", " [-0.29006499],\n", " [ 0.46513131],\n", " ...,\n", " [ 0.89712083],\n", " [ 0.81002617],\n", " [ 0.82598114]])" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r_no_p" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE w/ Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:04:23.515500Z", "start_time": "2020-04-14T19:03:33.694879Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:Bootstrap Confidence Intervals\n", "100%|██████████| 100/100 [00:46<00:00, 2.15it/s]\n" ] } ], "source": [ "learner_r = BaseRRegressor(XGBRegressor(), control_name='control')\n", "cate_r, cate_r_lb, cate_r_ub = learner_r.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True,\n", " n_bootstraps=100, bootstrap_size=1000)" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:04:23.561881Z", "start_time": "2020-04-14T19:04:23.517576Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.43967736],\n", " [-0.27467608],\n", " [-0.36704457],\n", " ...,\n", " [ 1.70213294],\n", " [ 0.53581667],\n", " [ 0.67119908]])" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:04:23.608087Z", "start_time": "2020-04-14T19:04:23.564124Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-2.36270347],\n", " [-2.10110987],\n", " [-3.33190218],\n", " ...,\n", " [-2.25005704],\n", " [-2.08611215],\n", " [-1.89283199]])" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r_lb" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:04:23.655535Z", "start_time": "2020-04-14T19:04:23.610212Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[3.23361461],\n", " [4.39421365],\n", " [3.95620847],\n", " ...,\n", " [3.15905744],\n", " [3.23586204],\n", " [2.31788745]])" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r_ub" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:04:58.689399Z", "start_time": "2020-04-14T19:04:23.658096Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:Bootstrap Confidence Intervals\n", "100%|██████████| 100/100 [00:31<00:00, 3.14it/s]\n" ] } ], "source": [ "learner_r = BaseRRegressor(XGBRegressor(), control_name='control')\n", "cate_r_no_p, cate_r_lb_no_p, cate_r_ub_no_p = learner_r.fit_predict(X=X, treatment=treatment, y=y, return_ci=True,\n", " n_bootstraps=100, bootstrap_size=1000)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:04:58.736814Z", "start_time": "2020-04-14T19:04:58.691749Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-0.14972556],\n", " [ 0.18446118],\n", " [ 0.23380044],\n", " ...,\n", " [ 0.55917108],\n", " [-0.16540062],\n", " [ 0.62050438]])" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r_no_p" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:04:58.783181Z", "start_time": "2020-04-14T19:04:58.739229Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-2.37674593],\n", " [-1.66803797],\n", " [-3.47868801],\n", " ...,\n", " [-1.95877534],\n", " [-2.32770172],\n", " [-1.68704787]])" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r_lb_no_p" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:04:58.843766Z", "start_time": "2020-04-14T19:04:58.798145Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[2.9130644 ],\n", " [3.99895564],\n", " [3.61212277],\n", " ...,\n", " [3.174209 ],\n", " [3.38644627],\n", " [2.62858756]])" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r_ub_no_p" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Visualize" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:04:59.305207Z", "start_time": "2020-04-14T19:04:58.849620Z" } }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "groups = learner_r._classes\n", "\n", "alpha = 1\n", "linewidth = 2\n", "bins = 30\n", "for group,idx in sorted(groups.items(), key=lambda x: x[1]):\n", " plt.figure(figsize=(12,8))\n", " plt.hist(cate_t[:,idx], alpha=alpha, bins=bins, label='T Learner ({})'.format(group),\n", " histtype='step', linewidth=linewidth, density=True)\n", " plt.hist(cate_x[:,idx], alpha=alpha, bins=bins, label='X Learner ({})'.format(group),\n", " histtype='step', linewidth=linewidth, density=True)\n", " plt.hist(cate_r[:,idx], alpha=alpha, bins=bins, label='R Learner ({})'.format(group),\n", " histtype='step', linewidth=linewidth, density=True)\n", " plt.hist(tau, alpha=alpha, bins=bins, label='Actual ATE distr',\n", " histtype='step', linewidth=linewidth, color='green', density=True)\n", " plt.vlines(cate_s[0,idx], 0, plt.axes().get_ylim()[1], label='S Learner ({})'.format(group),\n", " linestyles='dotted', linewidth=linewidth)\n", " plt.vlines(tau.mean(), 0, plt.axes().get_ylim()[1], label='Actual ATE',\n", " linestyles='dotted', linewidth=linewidth, color='green')\n", " \n", " plt.title('Distribution of CATE Predictions for {}'.format(group))\n", " plt.xlabel('Individual Treatment Effect (ITE/CATE)')\n", " plt.ylabel('# of Samples')\n", " _=plt.legend()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "# Multiple Treatment Case" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generate synthetic data\n", "Note: we randomize the assignment of treatment flag AFTER the synthetic data generation process, so it doesn't make sense to measure accuracy metrics here. Next steps would be to include multi-treatment in the DGP itself." ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:04:59.357345Z", "start_time": "2020-04-14T19:04:59.307042Z" } }, "outputs": [], "source": [ "# Generate synthetic data using mode 1\n", "y, X, treatment, tau, b, e = synthetic_data(mode=1, n=10000, p=8, sigma=1.0)\n", "\n", "treatment = np.array([('treatment_a' if np.random.random() > 0.2 else 'treatment_b') \n", " if val==1 else 'control' for val in treatment])\n", "\n", "e = {group: e for group in np.unique(treatment)}" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:04:59.412822Z", "start_time": "2020-04-14T19:04:59.359396Z" } }, "outputs": [ { "data": { "text/plain": [ "control 4768\n", "treatment_a 4146\n", "treatment_b 1086\n", "dtype: int64" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.Series(treatment).value_counts()" ] }, { "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true }, "source": [ "## S-Learner" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:05:01.278019Z", "start_time": "2020-04-14T19:04:59.415228Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.6339\n", "INFO:causalml: RMSE (Treatment): 0.6447\n", "INFO:causalml: sMAPE (Control): 0.6148\n", "INFO:causalml: sMAPE (Treatment): 0.3498\n", "INFO:causalml: Gini (Control): 0.8528\n", "INFO:causalml: Gini (Treatment): 0.8492\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.5584\n", "INFO:causalml: RMSE (Treatment): 0.4771\n", "INFO:causalml: sMAPE (Control): 0.5699\n", "INFO:causalml: sMAPE (Treatment): 0.2768\n", "INFO:causalml: Gini (Control): 0.8921\n", "INFO:causalml: Gini (Treatment): 0.9227\n" ] } ], "source": [ "learner_s = BaseSRegressor(XGBRegressor(), control_name='control')\n", "ate_s = learner_s.estimate_ate(X=X, treatment=treatment, y=y, return_ci=False, bootstrap_ci=False)" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:05:01.320962Z", "start_time": "2020-04-14T19:05:01.279909Z" } }, "outputs": [ { "data": { "text/plain": [ "array([0.58349553, 0.58778215])" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ate_s" ] }, { "cell_type": "code", "execution_count": 73, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:05:01.368038Z", "start_time": "2020-04-14T19:05:01.323307Z" } }, "outputs": [ { "data": { "text/plain": [ "{'treatment_a': 0, 'treatment_b': 1}" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learner_s._classes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:05:03.211605Z", "start_time": "2020-04-14T19:05:01.370785Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.6339\n", "INFO:causalml: RMSE (Treatment): 0.6447\n", "INFO:causalml: sMAPE (Control): 0.6148\n", "INFO:causalml: sMAPE (Treatment): 0.3498\n", "INFO:causalml: Gini (Control): 0.8528\n", "INFO:causalml: Gini (Treatment): 0.8492\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.5584\n", "INFO:causalml: RMSE (Treatment): 0.4771\n", "INFO:causalml: sMAPE (Control): 0.5699\n", "INFO:causalml: sMAPE (Treatment): 0.2768\n", "INFO:causalml: Gini (Control): 0.8921\n", "INFO:causalml: Gini (Treatment): 0.9227\n" ] } ], "source": [ "alpha = 0.05\n", "learner_s = BaseSRegressor(XGBRegressor(), ate_alpha=alpha, control_name='control')\n", "ate_s, ate_s_lb, ate_s_ub = learner_s.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True,\n", " bootstrap_ci=False)" ] }, { "cell_type": "code", "execution_count": 75, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:05:03.255641Z", "start_time": "2020-04-14T19:05:03.213558Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.5555693 , 0.55278018],\n", " [0.58349553, 0.58778215],\n", " [0.61142176, 0.62278413]])" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_s_lb, ate_s, ate_s_ub))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Boostrap Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:06:45.403405Z", "start_time": "2020-04-14T19:05:03.258090Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.6339\n", "INFO:causalml: RMSE (Treatment): 0.6447\n", "INFO:causalml: sMAPE (Control): 0.6148\n", "INFO:causalml: sMAPE (Treatment): 0.3498\n", "INFO:causalml: Gini (Control): 0.8528\n", "INFO:causalml: Gini (Treatment): 0.8492\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.5584\n", "INFO:causalml: RMSE (Treatment): 0.4771\n", "INFO:causalml: sMAPE (Control): 0.5699\n", "INFO:causalml: sMAPE (Treatment): 0.2768\n", "INFO:causalml: Gini (Control): 0.8921\n", "INFO:causalml: Gini (Treatment): 0.9227\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [01:40<00:00, 1.00s/it]\n" ] } ], "source": [ "ate_s_b, ate_s_lb_b, ate_s_ub_b = learner_s.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True,\n", " bootstrap_ci=True, n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:06:45.442749Z", "start_time": "2020-04-14T19:06:45.405407Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.52550035, 0.52550035],\n", " [0.58349553, 0.58778215],\n", " [0.64944596, 0.64944596]])" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_s_lb_b, ate_s_b, ate_s_ub_b))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:06:47.149107Z", "start_time": "2020-04-14T19:06:45.444724Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.6339\n", "INFO:causalml: RMSE (Treatment): 0.6447\n", "INFO:causalml: sMAPE (Control): 0.6148\n", "INFO:causalml: sMAPE (Treatment): 0.3498\n", "INFO:causalml: Gini (Control): 0.8528\n", "INFO:causalml: Gini (Treatment): 0.8492\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.5584\n", "INFO:causalml: RMSE (Treatment): 0.4771\n", "INFO:causalml: sMAPE (Control): 0.5699\n", "INFO:causalml: sMAPE (Treatment): 0.2768\n", "INFO:causalml: Gini (Control): 0.8921\n", "INFO:causalml: Gini (Treatment): 0.9227\n" ] } ], "source": [ "learner_s = BaseSRegressor(XGBRegressor(), control_name='control')\n", "cate_s = learner_s.fit_predict(X=X, treatment=treatment, y=y, return_ci=False)" ] }, { "cell_type": "code", "execution_count": 79, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:06:47.187393Z", "start_time": "2020-04-14T19:06:47.150866Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.91381967, 0.82956386],\n", " [-0.17692167, -0.15709245],\n", " [ 0.90877771, 0.92332006],\n", " ...,\n", " [ 0.86159408, 0.53687155],\n", " [ 0.66541922, 0.78590739],\n", " [ 1.05691028, 1.03345728]])" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_s" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE w/ Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 80, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:07:52.420017Z", "start_time": "2020-04-14T19:06:47.189370Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.6339\n", "INFO:causalml: RMSE (Treatment): 0.6447\n", "INFO:causalml: sMAPE (Control): 0.6148\n", "INFO:causalml: sMAPE (Treatment): 0.3498\n", "INFO:causalml: Gini (Control): 0.8528\n", "INFO:causalml: Gini (Treatment): 0.8492\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.5584\n", "INFO:causalml: RMSE (Treatment): 0.4771\n", "INFO:causalml: sMAPE (Control): 0.5699\n", "INFO:causalml: sMAPE (Treatment): 0.2768\n", "INFO:causalml: Gini (Control): 0.8921\n", "INFO:causalml: Gini (Treatment): 0.9227\n", "INFO:causalml:Bootstrap Confidence Intervals\n", "100%|██████████| 100/100 [01:03<00:00, 1.58it/s]\n" ] } ], "source": [ "alpha = 0.05\n", "learner_s = BaseSRegressor(XGBRegressor(), ate_alpha=alpha, control_name='control')\n", "cate_s, cate_s_lb, cate_s_ub = learner_s.fit_predict(X=X, treatment=treatment, y=y, return_ci=True,\n", " n_bootstraps=100, bootstrap_size=3000)" ] }, { "cell_type": "code", "execution_count": 81, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:07:52.463305Z", "start_time": "2020-04-14T19:07:52.422192Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.91381967, 0.82956386],\n", " [-0.17692167, -0.15709245],\n", " [ 0.90877771, 0.92332006],\n", " ...,\n", " [ 0.86159408, 0.53687155],\n", " [ 0.66541922, 0.78590739],\n", " [ 1.05691028, 1.03345728]])" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_s" ] }, { "cell_type": "code", "execution_count": 82, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:07:52.503242Z", "start_time": "2020-04-14T19:07:52.465394Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.23816384, -0.32713253],\n", " [-0.44141183, -0.42676411],\n", " [-0.00206863, -0.43860602],\n", " ...,\n", " [ 0.29240462, -0.16563866],\n", " [-0.01797467, -0.10772878],\n", " [-0.51486325, -0.31691882]])" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_s_lb" ] }, { "cell_type": "code", "execution_count": 83, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:07:52.543787Z", "start_time": "2020-04-14T19:07:52.505112Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[1.40557503, 1.1807412 ],\n", " [1.06860972, 1.55298753],\n", " [1.38529261, 1.6596471 ],\n", " ...,\n", " [1.56729684, 1.47052228],\n", " [1.16166003, 1.1144281 ],\n", " [1.68127107, 1.58984778]])" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_s_ub" ] }, { "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true }, "source": [ "## T-Learner" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 84, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:07:54.253387Z", "start_time": "2020-04-14T19:07:52.545793Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n" ] } ], "source": [ "learner_t = BaseTRegressor(XGBRegressor(), control_name='control')\n", "ate_t, ate_t_lb, ate_t_ub = learner_t.estimate_ate(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 85, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:07:54.292831Z", "start_time": "2020-04-14T19:07:54.255519Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.53107041, 0.5296616 ],\n", " [0.55739303, 0.55794811],\n", " [0.58371565, 0.58623463]])" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_t_lb, ate_t, ate_t_ub))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Boostrap Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 86, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:09:28.986981Z", "start_time": "2020-04-14T19:07:54.294826Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [01:32<00:00, 1.08it/s]\n" ] } ], "source": [ "ate_t_b, ate_t_lb_b, ate_t_ub_b = learner_t.estimate_ate(X=X, treatment=treatment, y=y, bootstrap_ci=True,\n", " n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 87, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:09:29.025336Z", "start_time": "2020-04-14T19:09:28.988777Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.51777538, 0.51777538],\n", " [0.55739303, 0.55794811],\n", " [0.67471492, 0.67471492]])" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_t_lb_b, ate_t_b, ate_t_ub_b))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE" ] }, { "cell_type": "code", "execution_count": 88, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:09:30.687586Z", "start_time": "2020-04-14T19:09:29.027317Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n" ] } ], "source": [ "learner_t = BaseTRegressor(XGBRegressor(), control_name='control')\n", "cate_t = learner_t.fit_predict(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 89, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:09:30.724632Z", "start_time": "2020-04-14T19:09:30.689302Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 1.47525787, -0.06651461],\n", " [ 1.26169336, 1.14718354],\n", " [ 1.68760026, 0.75878632],\n", " ...,\n", " [ 0.37292147, 0.20537615],\n", " [ 0.84290075, 0.80045319],\n", " [ 1.64227223, 1.91352534]])" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_t" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE w/ Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 90, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:10:38.696792Z", "start_time": "2020-04-14T19:09:30.726511Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n", "INFO:causalml:Bootstrap Confidence Intervals\n", "100%|██████████| 100/100 [01:06<00:00, 1.51it/s]\n" ] } ], "source": [ "learner_t = BaseTRegressor(XGBRegressor(), control_name='control')\n", "cate_t, cate_t_lb, cate_t_ub = learner_t.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=100,\n", " bootstrap_size=3000)" ] }, { "cell_type": "code", "execution_count": 91, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:10:38.738058Z", "start_time": "2020-04-14T19:10:38.698876Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 1.47525787, -0.06651461],\n", " [ 1.26169336, 1.14718354],\n", " [ 1.68760026, 0.75878632],\n", " ...,\n", " [ 0.37292147, 0.20537615],\n", " [ 0.84290075, 0.80045319],\n", " [ 1.64227223, 1.91352534]])" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_t" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:10:38.778042Z", "start_time": "2020-04-14T19:10:38.739946Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-0.18706408, -0.84940575],\n", " [-1.01419897, -0.7311732 ],\n", " [-0.0427315 , -0.16378173],\n", " ...,\n", " [-0.39076423, -0.16869925],\n", " [-0.17401927, -0.19503389],\n", " [-0.61903974, -1.15808628]])" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_t_lb" ] }, { "cell_type": "code", "execution_count": 93, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:10:38.817236Z", "start_time": "2020-04-14T19:10:38.780066Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[2.47563672, 1.69891493],\n", " [2.04089584, 1.76605188],\n", " [2.3567108 , 2.40833322],\n", " ...,\n", " [2.17926003, 2.26919731],\n", " [2.15714553, 1.91076722],\n", " [2.27031788, 2.03901908]])" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_t_ub" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## X-Learner" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 94, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:10:42.153573Z", "start_time": "2020-04-14T19:10:38.819233Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n" ] } ], "source": [ "learner_x = BaseXRegressor(XGBRegressor(), control_name='control')\n", "ate_x, ate_x_lb, ate_x_ub = learner_x.estimate_ate(X=X, treatment=treatment, y=y, p=e)" ] }, { "cell_type": "code", "execution_count": 95, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:10:42.191367Z", "start_time": "2020-04-14T19:10:42.155488Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.49573269, 0.54002602],\n", " [0.51860246, 0.56163457],\n", " [0.54147223, 0.58324311]])" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_x_lb, ate_x, ate_x_ub))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 96, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:10:46.431322Z", "start_time": "2020-04-14T19:10:42.193271Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n" ] } ], "source": [ "ate_x_no_p, ate_x_lb_no_p, ate_x_ub_no_p = learner_x.estimate_ate(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 97, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:10:46.467980Z", "start_time": "2020-04-14T19:10:46.433128Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.50418298, 0.56976992],\n", " [0.52706595, 0.59243233],\n", " [0.54994892, 0.61509475]])" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_x_lb_no_p, ate_x_no_p, ate_x_ub_no_p))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Boostrap Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 98, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:13:45.310480Z", "start_time": "2020-04-14T19:10:46.469940Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [02:55<00:00, 1.75s/it]\n" ] } ], "source": [ "ate_x_b, ate_x_lb_b, ate_x_ub_b = learner_x.estimate_ate(X=X, treatment=treatment, y=y, p=e, bootstrap_ci=True,\n", " n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 99, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:13:45.355233Z", "start_time": "2020-04-14T19:13:45.312425Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.49600789, 0.49600789],\n", " [0.51860246, 0.56163457],\n", " [0.63696386, 0.63696386]])" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_x_lb_b, ate_x_b, ate_x_ub_b))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 100, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:16:44.130037Z", "start_time": "2020-04-14T19:13:45.357393Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [02:54<00:00, 1.74s/it]\n" ] } ], "source": [ "ate_x_b_no_p, ate_x_lb_b_no_p, ate_x_ub_b_no_p = learner_x.estimate_ate(X=X, treatment=treatment, y=y, bootstrap_ci=True,\n", " n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 101, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:16:44.187331Z", "start_time": "2020-04-14T19:16:44.132067Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.50100288, 0.50100288],\n", " [0.52706414, 0.59242806],\n", " [0.66020792, 0.66020792]])" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_x_lb_b_no_p, ate_x_b_no_p, ate_x_ub_b_no_p))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 102, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:16:47.515109Z", "start_time": "2020-04-14T19:16:44.189448Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n" ] } ], "source": [ "learner_x = BaseXRegressor(XGBRegressor(), control_name='control')\n", "cate_x = learner_x.fit_predict(X=X, treatment=treatment, y=y, p=e)" ] }, { "cell_type": "code", "execution_count": 103, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:16:47.556487Z", "start_time": "2020-04-14T19:16:47.516863Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.57149441, 0.10240081],\n", " [-0.43192272, 1.48913118],\n", " [ 1.13622262, 0.65923928],\n", " ...,\n", " [ 0.44651704, -0.23119723],\n", " [ 0.93875551, 0.77003003],\n", " [ 0.96697381, 0.99990004]])" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 104, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:16:51.907370Z", "start_time": "2020-04-14T19:16:47.558866Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n" ] } ], "source": [ "cate_x_no_p = learner_x.fit_predict(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 105, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:16:51.951219Z", "start_time": "2020-04-14T19:16:51.909187Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.62959351, -0.00493521],\n", " [-0.48863166, 1.54109948],\n", " [ 1.17988308, 1.26200671],\n", " ...,\n", " [ 0.41320951, 0.73251634],\n", " [ 0.91104634, 0.82359481],\n", " [ 1.08867931, 1.44193089]])" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_no_p" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE w/ Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 106, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:17:46.988230Z", "start_time": "2020-04-14T19:16:51.953440Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n", "INFO:causalml:Bootstrap Confidence Intervals\n", "100%|██████████| 100/100 [00:51<00:00, 1.94it/s]\n" ] } ], "source": [ "learner_x = BaseXRegressor(XGBRegressor(), control_name='control')\n", "cate_x, cate_x_lb, cate_x_ub = learner_x.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True,\n", " n_bootstraps=100, bootstrap_size=1000)" ] }, { "cell_type": "code", "execution_count": 107, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:17:47.035158Z", "start_time": "2020-04-14T19:17:46.990429Z" } }, "outputs": [ { "data": { "text/plain": [ "{'treatment_a': 0, 'treatment_b': 1}" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learner_x._classes" ] }, { "cell_type": "code", "execution_count": 108, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:17:47.080571Z", "start_time": "2020-04-14T19:17:47.037415Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.57149441, 0.10240081],\n", " [-0.43192272, 1.48913118],\n", " [ 1.13622262, 0.65923928],\n", " ...,\n", " [ 0.44651704, -0.23119723],\n", " [ 0.93875551, 0.77003003],\n", " [ 0.96697381, 0.99990004]])" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x" ] }, { "cell_type": "code", "execution_count": 109, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:17:47.125458Z", "start_time": "2020-04-14T19:17:47.082758Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-0.23574115, -0.21029023],\n", " [-0.95699419, -1.05203708],\n", " [-0.49402807, -0.48280283],\n", " ...,\n", " [-0.12162789, -0.26408791],\n", " [-0.52562958, -0.19338615],\n", " [-0.40858565, -0.88119588]])" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_lb" ] }, { "cell_type": "code", "execution_count": 110, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:17:47.171213Z", "start_time": "2020-04-14T19:17:47.127785Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[1.79950407, 2.11258332],\n", " [1.45309225, 1.48831446],\n", " [1.75564219, 2.03222137],\n", " ...,\n", " [2.15191078, 2.30032378],\n", " [1.65228261, 1.40411322],\n", " [1.74815254, 1.68257617]])" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_ub" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 111, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:18:43.066112Z", "start_time": "2020-04-14T19:17:47.173533Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Error metrics for group treatment_a\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.4669\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.2675\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9297\n", "INFO:causalml:Error metrics for group treatment_b\n", "INFO:causalml: RMSE (Control): 0.4743\n", "INFO:causalml: RMSE (Treatment): 0.0747\n", "INFO:causalml: sMAPE (Control): 0.5062\n", "INFO:causalml: sMAPE (Treatment): 0.0568\n", "INFO:causalml: Gini (Control): 0.9280\n", "INFO:causalml: Gini (Treatment): 0.9984\n", "INFO:causalml:Bootstrap Confidence Intervals\n", "100%|██████████| 100/100 [00:51<00:00, 1.94it/s]\n" ] } ], "source": [ "cate_x_no_p, cate_x_lb_no_p, cate_x_ub_no_p = learner_x.fit_predict(X=X, treatment=treatment, y=y, return_ci=True,\n", " n_bootstraps=100, bootstrap_size=1000)" ] }, { "cell_type": "code", "execution_count": 112, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:18:43.114297Z", "start_time": "2020-04-14T19:18:43.068442Z" } }, "outputs": [ { "data": { "text/plain": [ "{'treatment_a': 0, 'treatment_b': 1}" ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learner_x._classes" ] }, { "cell_type": "code", "execution_count": 113, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:18:43.159279Z", "start_time": "2020-04-14T19:18:43.116452Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.6294132 , -0.00492528],\n", " [-0.48876998, 1.54111376],\n", " [ 1.17989094, 1.2620318 ],\n", " ...,\n", " [ 0.41319463, 0.73237091],\n", " [ 0.9108665 , 0.82359564],\n", " [ 1.08868219, 1.441931 ]])" ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_no_p" ] }, { "cell_type": "code", "execution_count": 114, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:18:43.206463Z", "start_time": "2020-04-14T19:18:43.162141Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-0.10073893, -0.38800051],\n", " [-0.81971717, -0.8298923 ],\n", " [-0.18606629, -0.32586878],\n", " ...,\n", " [ 0.18372251, -0.12170252],\n", " [-0.21309623, -0.38600234],\n", " [-0.44863794, -0.39716903]])" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_lb_no_p" ] }, { "cell_type": "code", "execution_count": 115, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:18:43.251400Z", "start_time": "2020-04-14T19:18:43.208825Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[2.00312255, 2.10486085],\n", " [1.59355675, 1.76340695],\n", " [1.77980204, 2.35535097],\n", " ...,\n", " [1.94828429, 1.94720835],\n", " [2.04021647, 1.71337955],\n", " [1.60121219, 1.82820234]])" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_x_ub_no_p" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## R-Learner" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 116, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:18:49.522197Z", "start_time": "2020-04-14T19:18:43.253881Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:training the treatment effect model for treatment_b with R-loss\n" ] } ], "source": [ "learner_r = BaseRRegressor(XGBRegressor(), control_name='control')\n", "ate_r, ate_r_lb, ate_r_ub = learner_r.estimate_ate(X=X, treatment=treatment, y=y, p=e)" ] }, { "cell_type": "code", "execution_count": 117, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:18:49.569287Z", "start_time": "2020-04-14T19:18:49.524357Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.52326968, 0.57744164],\n", " [0.52374892, 0.5781462 ],\n", " [0.52422816, 0.57885076]])" ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_r_lb, ate_r, ate_r_ub))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 118, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:18:54.689767Z", "start_time": "2020-04-14T19:18:49.571426Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:training the treatment effect model for treatment_b with R-loss\n" ] } ], "source": [ "learner_r = BaseRRegressor(XGBRegressor(), control_name='control')\n", "ate_r_no_p, ate_r_lb_no_p, ate_r_ub_no_p = learner_r.estimate_ate(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 119, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:18:54.730346Z", "start_time": "2020-04-14T19:18:54.691652Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.44161159, 0.71836119],\n", " [0.44209269, 0.71904979],\n", " [0.44257378, 0.71973838]])" ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_r_lb_no_p, ate_r_no_p, ate_r_ub_no_p))" ] }, { "cell_type": "code", "execution_count": 120, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:18:54.779756Z", "start_time": "2020-04-14T19:18:54.732335Z" } }, "outputs": [ { "data": { "text/plain": [ "{'treatment_a': {'all training': LogisticRegressionCV(Cs=array([1.00230524, 2.15608891, 4.63802765, 9.97700064]),\n", " class_weight=None,\n", " cv=KFold(n_splits=5, random_state=None, shuffle=True),\n", " dual=False, fit_intercept=True, intercept_scaling=1.0,\n", " l1_ratios=array([0.001 , 0.33366667, 0.66633333, 0.999 ]),\n", " max_iter=100, multi_class='auto', n_jobs=None,\n", " penalty='elasticnet', random_state=None, refit=True,\n", " scoring=None, solver='saga', tol=0.0001, verbose=0)},\n", " 'treatment_b': {'all training': LogisticRegressionCV(Cs=array([1.00230524, 2.15608891, 4.63802765, 9.97700064]),\n", " class_weight=None,\n", " cv=KFold(n_splits=5, random_state=None, shuffle=True),\n", " dual=False, fit_intercept=True, intercept_scaling=1.0,\n", " l1_ratios=array([0.001 , 0.33366667, 0.66633333, 0.999 ]),\n", " max_iter=100, multi_class='auto', n_jobs=None,\n", " penalty='elasticnet', random_state=None, refit=True,\n", " scoring=None, solver='saga', tol=0.0001, verbose=0)}}" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learner_r.propensity_model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ATE w/ Boostrap Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 121, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:21:17.612601Z", "start_time": "2020-04-14T19:18:54.781916Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:training the treatment effect model for treatment_b with R-loss\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [02:19<00:00, 1.39s/it]\n" ] } ], "source": [ "ate_r_b, ate_r_lb_b, ate_r_ub_b = learner_r.estimate_ate(X=X, treatment=treatment, y=y, p=e, bootstrap_ci=True,\n", " n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 122, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:21:17.655865Z", "start_time": "2020-04-14T19:21:17.614542Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.40326436, 0.40326436],\n", " [0.50620059, 0.5478152 ],\n", " [0.5697328 , 0.5697328 ]])" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_r_lb_b, ate_r_b, ate_r_ub_b))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 123, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:23:41.531458Z", "start_time": "2020-04-14T19:21:17.657918Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:training the treatment effect model for treatment_b with R-loss\n", "INFO:causalml:Bootstrap Confidence Intervals for ATE\n", "100%|██████████| 100/100 [02:19<00:00, 1.39s/it]\n" ] } ], "source": [ "learner_r = BaseRRegressor(XGBRegressor(), control_name='control')\n", "ate_r_b_no_p, ate_r_lb_b_no_p, ate_r_ub_b_no_p = learner_r.estimate_ate(X=X, treatment=treatment, y=y, bootstrap_ci=True,\n", " n_bootstraps=100, bootstrap_size=5000)" ] }, { "cell_type": "code", "execution_count": 124, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:23:41.578488Z", "start_time": "2020-04-14T19:23:41.533496Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[0.45994051, 0.45994051],\n", " [0.44481491, 0.66323246],\n", " [0.68981572, 0.68981572]])" ] }, "execution_count": 124, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.vstack((ate_r_lb_b_no_p, ate_r_b_no_p, ate_r_ub_b_no_p))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 125, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:23:44.255819Z", "start_time": "2020-04-14T19:23:41.580879Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:training the treatment effect model for treatment_b with R-loss\n" ] } ], "source": [ "learner_r = BaseRRegressor(XGBRegressor(), control_name='control')\n", "cate_r = learner_r.fit_predict(X=X, treatment=treatment, y=y, p=e)" ] }, { "cell_type": "code", "execution_count": 126, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:23:44.297265Z", "start_time": "2020-04-14T19:23:44.257762Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 5.57098567e-01, 1.77359581e-03],\n", " [ 1.08587885e+00, 2.48472750e-01],\n", " [ 3.34437251e-01, 1.69020355e+00],\n", " ...,\n", " [-9.96065974e-01, -8.98482800e-02],\n", " [ 1.70625651e+00, 9.55640435e-01],\n", " [-1.88456130e+00, 6.50659442e-01]])" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 127, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:23:48.815108Z", "start_time": "2020-04-14T19:23:44.299436Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:Generating propensity score\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:Calibrating propensity scores.\n", "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:training the treatment effect model for treatment_b with R-loss\n" ] } ], "source": [ "learner_r = BaseRRegressor(XGBRegressor(), control_name='control')\n", "cate_r_no_p = learner_r.fit_predict(X=X, treatment=treatment, y=y)" ] }, { "cell_type": "code", "execution_count": 128, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:23:48.859511Z", "start_time": "2020-04-14T19:23:48.817196Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.55478877, 0.87992519],\n", " [ 1.10120189, 1.29564619],\n", " [ 0.62448621, 0.41555083],\n", " ...,\n", " [-0.53886592, 0.44593787],\n", " [ 1.25231111, 0.79904991],\n", " [-0.64419305, -0.23014426]])" ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r_no_p" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CATE w/ Confidence Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### With Propensity Score Input" ] }, { "cell_type": "code", "execution_count": 129, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:24:29.398563Z", "start_time": "2020-04-14T19:23:48.862628Z" }, "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:training the treatment effect model for treatment_b with R-loss\n", "INFO:causalml:Bootstrap Confidence Intervals\n", "100%|██████████| 100/100 [00:37<00:00, 2.65it/s]\n" ] } ], "source": [ "learner_r = BaseRRegressor(XGBRegressor(), control_name='control')\n", "cate_r, cate_r_lb, cate_r_ub = learner_r.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True,\n", " n_bootstraps=100, bootstrap_size=1000)" ] }, { "cell_type": "code", "execution_count": 130, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:24:29.445452Z", "start_time": "2020-04-14T19:24:29.400875Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 1.75007784, 0.67752302],\n", " [ 0.77257723, 0.12910607],\n", " [ 1.08854032, 0.81679094],\n", " ...,\n", " [-0.92310214, 0.645491 ],\n", " [ 0.92478108, 0.79903334],\n", " [-0.48311949, 1.00291944]])" ] }, "execution_count": 130, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r" ] }, { "cell_type": "code", "execution_count": 131, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:24:29.493876Z", "start_time": "2020-04-14T19:24:29.447754Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[-0.801657 , -0.48754777],\n", " [-3.05317249, -5.37572038],\n", " [-1.50823961, -1.16822439],\n", " ...,\n", " [-1.27909884, -1.2460175 ],\n", " [-1.42656819, -1.59059022],\n", " [-1.90115855, -2.10247456]])" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r_lb" ] }, { "cell_type": "code", "execution_count": 132, "metadata": { "ExecuteTime": { "end_time": "2020-04-14T19:24:29.541179Z", "start_time": "2020-04-14T19:24:29.496419Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[4.06750882, 3.68516954],\n", " [4.21587243, 4.50271177],\n", " [4.33370841, 3.79358828],\n", " ...,\n", " [3.53610538, 3.48638564],\n", " [3.71832166, 3.48292163],\n", " [5.01262635, 3.27047309]])" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cate_r_ub" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Without Propensity Score Input" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2020-04-14T18:47:09.698Z" }, "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:causalml:generating out-of-fold CV outcome estimates\n", "INFO:causalml:training the treatment effect model for treatment_a with R-loss\n", "INFO:causalml:training the treatment effect model for treatment_b with R-loss\n", "INFO:causalml:Bootstrap Confidence Intervals\n", " 2%|▏ | 2/100 [00:00<00:36, 2.69it/s]" ] } ], "source": [ "learner_r = BaseRRegressor(XGBRegressor(), control_name='control')\n", "cate_r_no_p, cate_r_lb_no_p, cate_r_ub_no_p = learner_r.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True,\n", " n_bootstraps=100, bootstrap_size=1000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2020-04-14T18:47:09.702Z" } }, "outputs": [], "source": [ "cate_r_no_p" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2020-04-14T18:47:09.706Z" } }, "outputs": [], "source": [ "cate_r_lb_no_p" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2020-04-14T18:47:09.710Z" } }, "outputs": [], "source": [ "cate_r_ub_no_p" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" }, "toc": { "base_numbering": 1, "nav_menu": { "height": "174px", "width": "252px" }, "number_sections": false, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "203px" }, "toc_section_display": "block", "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 4 }