{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:root:Missing Data: 3620 rows removed from AdultDataset.\n",
      "/home/hirzel/python3.6venv/lib/python3.6/site-packages/aif360/datasets/standard_dataset.py:121: FutureWarning: outer method for ufunc <ufunc 'equal'> is not implemented on pandas objects. Returning an ndarray, but in the future this will raise a 'NotImplementedError'. Consider explicitly converting the Series to an array with '.array' first.\n",
      "  priv = np.logical_or.reduce(np.equal.outer(vals, df[attr]))\n",
      "/home/hirzel/python3.6venv/lib/python3.6/site-packages/aif360/datasets/standard_dataset.py:142: FutureWarning: outer method for ufunc <ufunc 'equal'> is not implemented on pandas objects. Returning an ndarray, but in the future this will raise a 'NotImplementedError'. Consider explicitly converting the Series to an array with '.array' first.\n",
      "  df[label_name]))\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'favorable_label': 1.0,\n",
       " 'unfavorable_label': 0.0,\n",
       " 'protected_attribute_names': ['race', 'sex'],\n",
       " 'unprivileged_groups': [{'race': 0.0, 'sex': 0.0}],\n",
       " 'privileged_groups': [{'race': 1.0, 'sex': 1.0}]}"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import aif360.datasets\n",
    "import lale.lib.aif360\n",
    "orig_ds = aif360.datasets.AdultDataset()\n",
    "fairness_info = lale.lib.aif360.dataset_fairness_info(orig_ds)\n",
    "fairness_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shapes: train_X (7913, 98), train_y (7913,), test_X (3392, 98), test_y (3392,)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>income-per-year</th>\n",
       "      <th>age</th>\n",
       "      <th>education-num</th>\n",
       "      <th>race</th>\n",
       "      <th>sex</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>workclass=Federal-gov</th>\n",
       "      <th>workclass=Local-gov</th>\n",
       "      <th>...</th>\n",
       "      <th>native-country=Portugal</th>\n",
       "      <th>native-country=Puerto-Rico</th>\n",
       "      <th>native-country=Scotland</th>\n",
       "      <th>native-country=South</th>\n",
       "      <th>native-country=Taiwan</th>\n",
       "      <th>native-country=Thailand</th>\n",
       "      <th>native-country=Trinadad&amp;Tobago</th>\n",
       "      <th>native-country=United-States</th>\n",
       "      <th>native-country=Vietnam</th>\n",
       "      <th>native-country=Yugoslavia</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1669.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 99 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   income-per-year   age  education-num  race  sex  capital-gain  \\\n",
       "0              1.0  28.0           14.0   1.0  1.0           0.0   \n",
       "1              0.0  60.0            5.0   1.0  1.0           0.0   \n",
       "2              0.0  43.0           10.0   1.0  1.0           0.0   \n",
       "3              0.0  35.0            9.0   1.0  1.0           0.0   \n",
       "4              1.0  36.0           10.0   1.0  1.0           0.0   \n",
       "\n",
       "   capital-loss  hours-per-week  workclass=Federal-gov  workclass=Local-gov  \\\n",
       "0           0.0            45.0                    0.0                  0.0   \n",
       "1           0.0            45.0                    0.0                  0.0   \n",
       "2        1669.0            45.0                    0.0                  0.0   \n",
       "3           0.0            40.0                    0.0                  0.0   \n",
       "4           0.0            40.0                    0.0                  0.0   \n",
       "\n",
       "   ...  native-country=Portugal  native-country=Puerto-Rico  \\\n",
       "0  ...                      0.0                         0.0   \n",
       "1  ...                      0.0                         0.0   \n",
       "2  ...                      0.0                         0.0   \n",
       "3  ...                      0.0                         0.0   \n",
       "4  ...                      0.0                         0.0   \n",
       "\n",
       "   native-country=Scotland  native-country=South  native-country=Taiwan  \\\n",
       "0                      0.0                   0.0                    0.0   \n",
       "1                      0.0                   0.0                    0.0   \n",
       "2                      0.0                   0.0                    0.0   \n",
       "3                      0.0                   0.0                    0.0   \n",
       "4                      0.0                   0.0                    0.0   \n",
       "\n",
       "   native-country=Thailand  native-country=Trinadad&Tobago  \\\n",
       "0                      0.0                             0.0   \n",
       "1                      0.0                             0.0   \n",
       "2                      0.0                             0.0   \n",
       "3                      0.0                             0.0   \n",
       "4                      0.0                             0.0   \n",
       "\n",
       "   native-country=United-States  native-country=Vietnam  \\\n",
       "0                           0.0                     0.0   \n",
       "1                           1.0                     0.0   \n",
       "2                           1.0                     0.0   \n",
       "3                           1.0                     0.0   \n",
       "4                           1.0                     0.0   \n",
       "\n",
       "   native-country=Yugoslavia  \n",
       "0                        0.0  \n",
       "1                        0.0  \n",
       "2                        0.0  \n",
       "3                        0.0  \n",
       "4                        0.0  \n",
       "\n",
       "[5 rows x 99 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from lale.lib.aif360 import dataset_to_pandas\n",
    "#using only 25% of the data so this notebook runs faster\n",
    "used_ds, ignore_ds = orig_ds.split([0.25], shuffle=True, seed=42)\n",
    "train_ds, test_ds = used_ds.split([0.7])\n",
    "train_X, train_y = dataset_to_pandas(train_ds)\n",
    "test_X, test_y = dataset_to_pandas(test_ds)\n",
    "print(f'shapes: train_X {train_X.shape}, train_y {train_y.shape}, test_X {test_X.shape}, test_y {test_y.shape}')\n",
    "pd.concat([train_y.head(), train_X.head()], axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Search Initial Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.decomposition import PCA\n",
    "from lale.lib.lale import NoOp\n",
    "from sklearn.linear_model import LogisticRegression as LR\n",
    "from xgboost import XGBClassifier as XGBoost\n",
    "import lale\n",
    "lale.wrap_imported_operators()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"184pt\" height=\"146pt\"\n",
       " viewBox=\"0.00 0.00 184.00 146.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 142)\">\n",
       "<title>cluster:(root)</title>\n",
       "<g id=\"a_graph0\"><a xlink:title=\"(root) = ...\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-142 180,-142 180,4 -4,4\"/>\n",
       "</a>\n",
       "</g>\n",
       "<g id=\"clust1\" class=\"cluster\"><title>cluster:choice_0</title>\n",
       "<g id=\"a_clust1\"><a xlink:title=\"choice_0 = pca | no_op\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"8,-8 8,-130 78,-130 78,-8 8,-8\"/>\n",
       "<text text-anchor=\"middle\" x=\"43\" y=\"-114.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust2\" class=\"cluster\"><title>cluster:choice_1</title>\n",
       "<g id=\"a_clust2\"><a xlink:title=\"choice_1 = lr | xg_boost\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"98,-8 98,-130 168,-130 168,-8 98,-8\"/>\n",
       "<text text-anchor=\"middle\" x=\"133\" y=\"-114.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- pca -->\n",
       "<g id=\"node1\" class=\"node\"><title>pca</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html\" xlink:title=\"pca = PCA\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"43\" cy=\"-81\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"43\" y=\"-78.2\" font-family=\"Times,serif\" font-size=\"11.00\">PCA</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- lr -->\n",
       "<g id=\"node3\" class=\"node\"><title>lr</title>\n",
       "<g id=\"a_node3\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\" xlink:title=\"lr = LR\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"133\" cy=\"-81\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"133\" y=\"-78.2\" font-family=\"Times,serif\" font-size=\"11.00\">LR</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- pca&#45;&gt;lr -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>pca&#45;&gt;lr</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M77.7296,-81C83.6523,-81 89.838,-81 95.8241,-81\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"88.0002,-84.5005 98,-81 87.9998,-77.5005 88.0002,-84.5005\"/>\n",
       "</g>\n",
       "<!-- no_op -->\n",
       "<g id=\"node2\" class=\"node\"><title>no_op</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op = NoOp\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"43\" cy=\"-36\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"43\" y=\"-39.2\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"43\" y=\"-27.2\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- xg_boost -->\n",
       "<g id=\"node4\" class=\"node\"><title>xg_boost</title>\n",
       "<g id=\"a_node4\"><a xlink:href=\"https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.sklearn\" xlink:title=\"xg_boost = XGBoost\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"133\" cy=\"-36\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"133\" y=\"-39.2\" font-family=\"Times,serif\" font-size=\"11.00\">XG&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"133\" y=\"-27.2\" font-family=\"Times,serif\" font-size=\"11.00\">Boost</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7ff103d367b8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "initial_planned = (PCA | NoOp) >> (LR | XGBoost)\n",
    "initial_planned.visualize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3/3 [00:34<00:00, 14.73s/trial, best loss: -0.813349862426007]\n"
     ]
    }
   ],
   "source": [
    "from lale.lib.lale import Hyperopt\n",
    "initial_trained = initial_planned.auto_configure(\n",
    "    train_X, train_y, optimizer=Hyperopt, cv=3, max_evals=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"152pt\" height=\"48pt\"\n",
       " viewBox=\"0.00 0.00 152.00 47.60\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 43.598)\">\n",
       "<title>cluster:(root)</title>\n",
       "<g id=\"a_graph0\"><a xlink:title=\"(root) = ...\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-43.598 148,-43.598 148,4 -4,4\"/>\n",
       "</a>\n",
       "</g>\n",
       "<!-- no_op -->\n",
       "<g id=\"node1\" class=\"node\"><title>no_op</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op = NoOp()\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"27\" cy=\"-19.799\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"27\" y=\"-22.999\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"27\" y=\"-10.999\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- lr -->\n",
       "<g id=\"node2\" class=\"node\"><title>lr</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\" xlink:title=\"lr = LR(C=1327.7214911678875, dual=True, tol=0.0856386428135445)\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"117\" cy=\"-19.799\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"117\" y=\"-16.999\" font-family=\"Times,serif\" font-size=\"11.00\">LR</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op&#45;&gt;lr -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>no_op&#45;&gt;lr</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M54.4029,-19.799C62.3932,-19.799 71.3106,-19.799 79.8241,-19.799\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"79.919,-23.2991 89.919,-19.799 79.919,-16.2991 79.919,-23.2991\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7ff0e51b36a0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "```python\n",
       "lr = LR(C=1327.7214911678875, dual=True, tol=0.0856386428135445)\n",
       "pipeline = NoOp() >> lr\n",
       "```"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "initial_trained.visualize()\n",
    "initial_trained.pretty_print(ipython_display=True, show_imports=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Evaluate Initial Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy 83.1%\n"
     ]
    }
   ],
   "source": [
    "#accuracy, ideal 100%, higher values indicate better predictive performance\n",
    "import sklearn.metrics\n",
    "accuracy_scorer = sklearn.metrics.make_scorer(sklearn.metrics.accuracy_score)\n",
    "print(f'accuracy {accuracy_scorer(initial_trained, test_X, test_y):.1%}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "statistical parity difference -0.26\n"
     ]
    }
   ],
   "source": [
    "#statistical parity difference, ideal 0, fair between -0.1 and +0.1\n",
    "stat_par_scorer = lale.lib.aif360.statistical_parity_difference(**fairness_info)\n",
    "print(f'statistical parity difference {stat_par_scorer(initial_trained, test_X, test_y):.2f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "disparate impact 0.25\n"
     ]
    }
   ],
   "source": [
    "#disparate impact, ideal 1, fair between 0.8 and 1.2\n",
    "disparate_impact_scorer = lale.lib.aif360.disparate_impact(**fairness_info)\n",
    "print(f'disparate impact {disparate_impact_scorer(initial_trained, test_X, test_y):.2f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Equalized Odds Postprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"200pt\" height=\"119pt\"\n",
       " viewBox=\"0.00 0.00 200.00 119.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 115)\">\n",
       "<title>cluster:(root)</title>\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-115 196,-115 196,4 -4,4\"/>\n",
       "<g id=\"clust1\" class=\"cluster\"><title>cluster:(root)</title>\n",
       "<g id=\"a_clust1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.eq_odds_postprocessing.html\" xlink:title=\"(root) = EqOddsPostprocessing(estimator=pipeline, favorable_label=1.0, unfavorable_label=0.0, protected_attribute_names=[&#39;race&#39;, &#39;sex&#39;], unprivileged_groups=[{&#39;race&#39;: 0.0, &#39;sex&#39;: 0.0}], privileged_groups=[{&#39;sex&#39;: 1.0, &#39;race&#39;: 1.0}])\">\n",
       "<polygon fill=\"#b0e2ff\" stroke=\"black\" points=\"8,-8 8,-103 184,-103 184,-8 8,-8\"/>\n",
       "<text text-anchor=\"middle\" x=\"96\" y=\"-87.8\" font-family=\"Times,serif\" font-size=\"14.00\">EqOddsPostprocessing</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust2\" class=\"cluster\"><title>cluster:pipeline</title>\n",
       "<g id=\"a_clust2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.eq_odds_postprocessing.html\" xlink:title=\"pipeline = ...\">\n",
       "<path fill=\"white\" stroke=\"black\" d=\"M28,-16C28,-16 164,-16 164,-16 170,-16 176,-22 176,-28 176,-28 176,-60 176,-60 176,-66 170,-72 164,-72 164,-72 28,-72 28,-72 22,-72 16,-66 16,-60 16,-60 16,-28 16,-28 16,-22 22,-16 28,-16\"/>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op -->\n",
       "<g id=\"node1\" class=\"node\"><title>no_op</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op = NoOp()\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"51\" cy=\"-44\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"51\" y=\"-47.2\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"51\" y=\"-35.2\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- lr -->\n",
       "<g id=\"node2\" class=\"node\"><title>lr</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\" xlink:title=\"lr = LR(C=1327.7214911678875, dual=True, tol=0.0856386428135445)\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"141\" cy=\"-44\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"141\" y=\"-41.2\" font-family=\"Times,serif\" font-size=\"11.00\">LR</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op&#45;&gt;lr -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>no_op&#45;&gt;lr</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M78.4029,-44C86.3932,-44 95.3106,-44 103.824,-44\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"103.919,-47.5001 113.919,-44 103.919,-40.5001 103.919,-47.5001\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7ff103996470>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from lale.lib.aif360 import EqOddsPostprocessing\n",
    "eqodds_trainable = EqOddsPostprocessing(\n",
    "    estimator=initial_trained, **fairness_info)\n",
    "eqodds_trainable.visualize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.73 s, sys: 172 ms, total: 1.91 s\n",
      "Wall time: 2.21 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "eqodds_trained = eqodds_trainable.fit(train_X, train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy 73.6%\n",
      "statistical parity difference 0.01\n",
      "disparate impact 1.02\n"
     ]
    }
   ],
   "source": [
    "print(f'accuracy {accuracy_scorer(eqodds_trained, test_X, test_y):.1%}')\n",
    "print(f'statistical parity difference {stat_par_scorer(eqodds_trained, test_X, test_y):.2f}')\n",
    "print(f'disparate impact {disparate_impact_scorer(eqodds_trained, test_X, test_y):.2f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Calibrated Equalized Odds Postprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"205pt\" height=\"119pt\"\n",
       " viewBox=\"0.00 0.00 205.00 119.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 115)\">\n",
       "<title>cluster:(root)</title>\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-115 201,-115 201,4 -4,4\"/>\n",
       "<g id=\"clust1\" class=\"cluster\"><title>cluster:(root)</title>\n",
       "<g id=\"a_clust1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.calibrated_eq_odds_postprocessing.html\" xlink:title=\"(root) = CalibratedEqOddsPostprocessing(estimator=pipeline, favorable_label=1.0, unfavorable_label=0.0, protected_attribute_names=[&#39;race&#39;, &#39;sex&#39;], unprivileged_groups=[{&#39;race&#39;: 0.0, &#39;sex&#39;: 0.0}], privileged_groups=[{&#39;sex&#39;: 1.0, &#39;race&#39;: 1.0}])\">\n",
       "<polygon fill=\"#b0e2ff\" stroke=\"black\" points=\"0,-8 0,-103 197,-103 197,-8 0,-8\"/>\n",
       "<text text-anchor=\"middle\" x=\"98.5\" y=\"-87.8\" font-family=\"Times,serif\" font-size=\"14.00\">CalibratedEqOddsPostprocessing</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust2\" class=\"cluster\"><title>cluster:pipeline</title>\n",
       "<g id=\"a_clust2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.calibrated_eq_odds_postprocessing.html\" xlink:title=\"pipeline = ...\">\n",
       "<path fill=\"white\" stroke=\"black\" d=\"M30,-16C30,-16 166,-16 166,-16 172,-16 178,-22 178,-28 178,-28 178,-60 178,-60 178,-66 172,-72 166,-72 166,-72 30,-72 30,-72 24,-72 18,-66 18,-60 18,-60 18,-28 18,-28 18,-22 24,-16 30,-16\"/>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op -->\n",
       "<g id=\"node1\" class=\"node\"><title>no_op</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op = NoOp()\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"53\" cy=\"-44\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"53\" y=\"-47.2\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"53\" y=\"-35.2\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- lr -->\n",
       "<g id=\"node2\" class=\"node\"><title>lr</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\" xlink:title=\"lr = LR(C=1327.7214911678875, dual=True, tol=0.0856386428135445)\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"143\" cy=\"-44\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"143\" y=\"-41.2\" font-family=\"Times,serif\" font-size=\"11.00\">LR</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op&#45;&gt;lr -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>no_op&#45;&gt;lr</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M80.4029,-44C88.3932,-44 97.3106,-44 105.824,-44\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"105.919,-47.5001 115.919,-44 105.919,-40.5001 105.919,-47.5001\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7ff0e5246cc0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from lale.lib.aif360 import CalibratedEqOddsPostprocessing\n",
    "caleqo_trainable = CalibratedEqOddsPostprocessing(\n",
    "    estimator=initial_trained, **fairness_info)\n",
    "caleqo_trainable.visualize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2.23 s, sys: 250 ms, total: 2.48 s\n",
      "Wall time: 2.61 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "caleqo_trained = caleqo_trainable.fit(train_X, train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy 75.5%\n",
      "statistical parity difference 0.02\n",
      "disparate impact inf\n"
     ]
    }
   ],
   "source": [
    "print(f'accuracy {accuracy_scorer(caleqo_trained, test_X, test_y):.1%}')\n",
    "print(f'statistical parity difference {stat_par_scorer(caleqo_trained, test_X, test_y):.2f}')\n",
    "print(f'disparate impact {disparate_impact_scorer(caleqo_trained, test_X, test_y):.2f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reject Option Classification Postprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"200pt\" height=\"119pt\"\n",
       " viewBox=\"0.00 0.00 200.00 119.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 115)\">\n",
       "<title>cluster:(root)</title>\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-115 196,-115 196,4 -4,4\"/>\n",
       "<g id=\"clust1\" class=\"cluster\"><title>cluster:(root)</title>\n",
       "<g id=\"a_clust1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.reject_option_classification.html\" xlink:title=\"(root) = RejectOptionClassification(estimator=pipeline, favorable_label=1.0, unfavorable_label=0.0, protected_attribute_names=[&#39;race&#39;, &#39;sex&#39;], unprivileged_groups=[{&#39;race&#39;: 0.0, &#39;sex&#39;: 0.0}], privileged_groups=[{&#39;sex&#39;: 1.0, &#39;race&#39;: 1.0}])\">\n",
       "<polygon fill=\"#b0e2ff\" stroke=\"black\" points=\"8,-8 8,-103 184,-103 184,-8 8,-8\"/>\n",
       "<text text-anchor=\"middle\" x=\"96\" y=\"-87.8\" font-family=\"Times,serif\" font-size=\"14.00\">RejectOptionClassification</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust2\" class=\"cluster\"><title>cluster:pipeline</title>\n",
       "<g id=\"a_clust2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.reject_option_classification.html\" xlink:title=\"pipeline = ...\">\n",
       "<path fill=\"white\" stroke=\"black\" d=\"M28,-16C28,-16 164,-16 164,-16 170,-16 176,-22 176,-28 176,-28 176,-60 176,-60 176,-66 170,-72 164,-72 164,-72 28,-72 28,-72 22,-72 16,-66 16,-60 16,-60 16,-28 16,-28 16,-22 22,-16 28,-16\"/>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op -->\n",
       "<g id=\"node1\" class=\"node\"><title>no_op</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op = NoOp()\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"51\" cy=\"-44\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"51\" y=\"-47.2\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"51\" y=\"-35.2\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- lr -->\n",
       "<g id=\"node2\" class=\"node\"><title>lr</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\" xlink:title=\"lr = LR(C=1327.7214911678875, dual=True, tol=0.0856386428135445)\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"141\" cy=\"-44\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"141\" y=\"-41.2\" font-family=\"Times,serif\" font-size=\"11.00\">LR</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op&#45;&gt;lr -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>no_op&#45;&gt;lr</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M78.4029,-44C86.3932,-44 95.3106,-44 103.824,-44\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"103.919,-47.5001 113.919,-44 103.919,-40.5001 103.919,-47.5001\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7ff0e4d9c780>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from lale.lib.aif360 import RejectOptionClassification\n",
    "rejopt_trainable = RejectOptionClassification(\n",
    "    estimator=initial_trained, **fairness_info)\n",
    "rejopt_trainable.visualize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 56.1 s, sys: 3.89 s, total: 1min\n",
      "Wall time: 1min 8s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "rejopt_trained = rejopt_trainable.fit(train_X, train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy 75.9%\n",
      "statistical parity difference 0.05\n",
      "disparate impact inf\n"
     ]
    }
   ],
   "source": [
    "print(f'accuracy {accuracy_scorer(rejopt_trained, test_X, test_y):.1%}')\n",
    "print(f'statistical parity difference {stat_par_scorer(rejopt_trained, test_X, test_y):.2f}')\n",
    "print(f'disparate impact {disparate_impact_scorer(rejopt_trained, test_X, test_y):.2f}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}