{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": true, "pycharm": { "name": "#%% md\n" } }, "source": [ "# 2SLS Estimation Examples" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "We demonstrate the use of 2SLS from the package to estimate the average treatment effect by semi-synthetic data and full synthetic data." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-06-22T18:34:07.556482Z", "start_time": "2020-06-22T18:34:07.075342Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-06-22T18:34:08.026503Z", "start_time": "2020-06-22T18:34:07.998418Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import os\n", "base_path = os.path.abspath(\"../\")\n", "os.chdir(base_path)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "ExecuteTime": { "end_time": "2020-06-22T19:20:57.201672Z", "start_time": "2020-06-22T19:20:57.102105Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import logging\n", "from matplotlib import pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import sys\n", "from scipy import stats" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "ExecuteTime": { "end_time": "2020-06-22T19:04:33.015381Z", "start_time": "2020-06-22T19:04:32.964582Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import causalml\n", "from causalml.inference.iv import IVRegressor\n", "from sklearn.preprocessing import StandardScaler\n", "import statsmodels.api as sm" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Semi-Synthetic Data from NLSYM" ] }, { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2020-06-22T19:32:40.653280Z", "start_time": "2020-06-22T19:32:40.595806Z" } }, "source": [ "The data generation mechanism is described in Syrgkanis et al \"*Machine Learning Estimation of Heterogeneous Treatment Effects with Instruments*\" (2019)." ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "### Data Loading" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2020-06-22T18:34:16.787310Z", "start_time": "2020-06-22T18:34:16.720144Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "df = pd.read_csv(\"examples/data/card.csv\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2020-06-22T18:34:17.310674Z", "start_time": "2020-06-22T18:34:17.231429Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | id | \n", "nearc2 | \n", "nearc4 | \n", "educ | \n", "age | \n", "fatheduc | \n", "motheduc | \n", "weight | \n", "momdad14 | \n", "sinmom14 | \n", "... | \n", "smsa66 | \n", "wage | \n", "enroll | \n", "kww | \n", "iq | \n", "married | \n", "libcrd14 | \n", "exper | \n", "lwage | \n", "expersq | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "2 | \n", "0 | \n", "0 | \n", "7 | \n", "29 | \n", "NaN | \n", "NaN | \n", "158413 | \n", "1 | \n", "0 | \n", "... | \n", "1 | \n", "548 | \n", "0 | \n", "15.0 | \n", "NaN | \n", "1.0 | \n", "0.0 | \n", "16 | \n", "6.306275 | \n", "256 | \n", "
| 1 | \n", "3 | \n", "0 | \n", "0 | \n", "12 | \n", "27 | \n", "8.0 | \n", "8.0 | \n", "380166 | \n", "1 | \n", "0 | \n", "... | \n", "1 | \n", "481 | \n", "0 | \n", "35.0 | \n", "93.0 | \n", "1.0 | \n", "1.0 | \n", "9 | \n", "6.175867 | \n", "81 | \n", "
| 2 | \n", "4 | \n", "0 | \n", "0 | \n", "12 | \n", "34 | \n", "14.0 | \n", "12.0 | \n", "367470 | \n", "1 | \n", "0 | \n", "... | \n", "1 | \n", "721 | \n", "0 | \n", "42.0 | \n", "103.0 | \n", "1.0 | \n", "1.0 | \n", "16 | \n", "6.580639 | \n", "256 | \n", "
| 3 | \n", "5 | \n", "1 | \n", "1 | \n", "11 | \n", "27 | \n", "11.0 | \n", "12.0 | \n", "380166 | \n", "1 | \n", "0 | \n", "... | \n", "1 | \n", "250 | \n", "0 | \n", "25.0 | \n", "88.0 | \n", "1.0 | \n", "1.0 | \n", "10 | \n", "5.521461 | \n", "100 | \n", "
| 4 | \n", "6 | \n", "1 | \n", "1 | \n", "12 | \n", "34 | \n", "8.0 | \n", "7.0 | \n", "367470 | \n", "1 | \n", "0 | \n", "... | \n", "1 | \n", "729 | \n", "0 | \n", "34.0 | \n", "108.0 | \n", "1.0 | \n", "0.0 | \n", "16 | \n", "6.591674 | \n", "256 | \n", "
5 rows × 34 columns
\n", "| \n", " | fatheduc | \n", "motheduc | \n", "momdad14 | \n", "sinmom14 | \n", "reg661 | \n", "reg662 | \n", "reg663 | \n", "reg664 | \n", "reg665 | \n", "reg666 | \n", "reg667 | \n", "reg668 | \n", "reg669 | \n", "south66 | \n", "black | \n", "smsa | \n", "south | \n", "smsa66 | \n", "exper | \n", "expersq | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", "2.991000e+03 | \n", "2.991000e+03 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2991.000000 | \n", "2.991000e+03 | \n", "2.991000e+03 | \n", "
| mean | \n", "-3.529069e-16 | \n", "-1.704346e-15 | \n", "0.790371 | \n", "0.100301 | \n", "0.046807 | \n", "0.161484 | \n", "0.196924 | \n", "0.064527 | \n", "0.205951 | \n", "0.094952 | \n", "0.109997 | \n", "0.028419 | \n", "0.090939 | \n", "0.410899 | \n", "0.231361 | \n", "0.715145 | \n", "0.400201 | \n", "0.651622 | \n", "4.285921e-16 | \n", "3.040029e-17 | \n", "
| std | \n", "1.000167e+00 | \n", "1.000167e+00 | \n", "0.407112 | \n", "0.300451 | \n", "0.211261 | \n", "0.368039 | \n", "0.397741 | \n", "0.245730 | \n", "0.404463 | \n", "0.293197 | \n", "0.312938 | \n", "0.166193 | \n", "0.287571 | \n", "0.492079 | \n", "0.421773 | \n", "0.451421 | \n", "0.490021 | \n", "0.476536 | \n", "1.000167e+00 | \n", "1.000167e+00 | \n", "
| min | \n", "-3.101056e+00 | \n", "-3.502453e+00 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "-2.159127e+00 | \n", "-1.147691e+00 | \n", "
| 25% | \n", "-6.303764e-01 | \n", "-4.656485e-01 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "-6.858865e-01 | \n", "-7.077287e-01 | \n", "
| 50% | \n", "0.000000e+00 | \n", "2.091970e-01 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "0.000000 | \n", "1.000000 | \n", "-1.948066e-01 | \n", "-3.655360e-01 | \n", "
| 75% | \n", "6.049634e-01 | \n", "5.466197e-01 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "5.418134e-01 | \n", "3.310707e-01 | \n", "
| max | \n", "2.457973e+00 | \n", "2.571156e+00 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "3.242753e+00 | \n", "4.767355e+00 | \n", "