{ "cells": [ { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "import sys\n", "sys.path.append(\"../\")\n", "from aif360.datasets import BinaryLabelDataset\n", "from aif360.datasets import AdultDataset, GermanDataset, CompasDataset\n", "from aif360.metrics import BinaryLabelDatasetMetric\n", "from aif360.metrics import ClassificationMetric\n", "from aif360.metrics.utils import compute_boolean_conditioning_vector\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.preprocessing import StandardScaler, MaxAbsScaler\n", "from sklearn.metrics import accuracy_score\n", "\n", "from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult, load_preproc_data_compas, load_preproc_data_german\n", "\n", "from aif360.algorithms.inprocessing.meta_fair_classifier import MetaFairClassifier\n", "from aif360.algorithms.inprocessing.celisMeta.utils import getStats\n", "from IPython.display import Markdown, display\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/markdown": [ "### Meta-Algorithm for fair classification." ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "The fairness metrics to be optimized have to specified as \"input\". Currently we can handle the following fairness metrics." ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "Statistical Rate, False Positive Rate, True Positive Rate, False Negative Rate, True Negative Rate," ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "Accuracy Rate, False Discovery Rate, False Omission Rate, Positive Predictive Rate, Negative Predictive Rate." ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "#### -----------------------------" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "The example below considers the case of False Discovery Parity." ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(Markdown(\"### Meta-Algorithm for fair classification.\"))\n", "display(Markdown(\"The fairness metrics to be optimized have to specified as \\\"input\\\". Currently we can handle the following fairness metrics.\"))\n", "display(Markdown(\"Statistical Rate, False Positive Rate, True Positive Rate, False Negative Rate, True Negative Rate,\"))\n", "display(Markdown(\"Accuracy Rate, False Discovery Rate, False Omission Rate, Positive Predictive Rate, Negative Predictive Rate.\"))\n", "display(Markdown(\"#### -----------------------------\"))\n", "display(Markdown(\"The example below considers the case of False Discovery Parity.\"))\n" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "dataset_orig = load_preproc_data_adult()\n", "\n", "privileged_groups = [{'sex': 1}]\n", "unprivileged_groups = [{'sex': 0}]\n", "\n", "dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)\n" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/markdown": [ "#### Training Dataset shape" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "(34189, 18)\n" ] }, { "data": { "text/markdown": [ "#### Favorable and unfavorable labels" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "(1.0, 0.0)\n" ] }, { "data": { "text/markdown": [ "#### Protected attribute names" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "['sex', 'race']\n" ] }, { "data": { "text/markdown": [ "#### Privileged and unprivileged protected attribute values" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "([array([1.]), array([1.])], [array([0.]), array([0.])])\n" ] }, { "data": { "text/markdown": [ "#### Dataset feature names" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" ] } ], "source": [ "display(Markdown(\"#### Training Dataset shape\"))\n", "print(dataset_orig_train.features.shape)\n", "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", "display(Markdown(\"#### Protected attribute names\"))\n", "print(dataset_orig_train.protected_attribute_names)\n", "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", "print(dataset_orig_train.privileged_protected_attributes, \n", " dataset_orig_train.unprivileged_protected_attributes)\n", "display(Markdown(\"#### Dataset feature names\"))\n", "print(dataset_orig_train.feature_names)\n" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "#### Training Dataset shape" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "(34189, 18)\n" ] }, { "data": { "text/markdown": [ "#### Favorable and unfavorable labels" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "(1.0, 0.0)\n" ] }, { "data": { "text/markdown": [ "#### Protected attribute names" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "['sex', 'race']\n" ] }, { "data": { "text/markdown": [ "#### Privileged and unprivileged protected attribute values" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "([array([1.]), array([1.])], [array([0.]), array([0.])])\n" ] }, { "data": { "text/markdown": [ "#### Dataset feature names" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']\n" ] } ], "source": [ "display(Markdown(\"#### Training Dataset shape\"))\n", "print(dataset_orig_train.features.shape)\n", "display(Markdown(\"#### Favorable and unfavorable labels\"))\n", "print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)\n", "display(Markdown(\"#### Protected attribute names\"))\n", "print(dataset_orig_train.protected_attribute_names)\n", "display(Markdown(\"#### Privileged and unprivileged protected attribute values\"))\n", "print(dataset_orig_train.privileged_protected_attributes, \n", " dataset_orig_train.unprivileged_protected_attributes)\n", "display(Markdown(\"#### Dataset feature names\"))\n", "print(dataset_orig_train.feature_names)\n" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.193944\n", "Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.195913\n", "Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.193944\n", "Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.195913\n" ] } ], "source": [ "metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, \n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "#display(Markdown(\"#### Original training dataset\"))\n", "print(\"Train set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_train.mean_difference())\n", "metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test, \n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_orig_test.mean_difference())\n", "\n", "\n", "\n", "\n", "min_max_scaler = MaxAbsScaler()\n", "dataset_orig_train.features = min_max_scaler.fit_transform(dataset_orig_train.features)\n", "dataset_orig_test.features = min_max_scaler.transform(dataset_orig_test.features)\n", "metric_scaled_train = BinaryLabelDatasetMetric(dataset_orig_train, \n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "#display(Markdown(\"#### Scaled dataset - Verify that the scaling does not affect the group label statistics\"))\n", "print(\"Train set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_scaled_train.mean_difference())\n", "metric_scaled_test = BinaryLabelDatasetMetric(dataset_orig_test, \n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_scaled_test.mean_difference())\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Get classifier without fairness constraints\n", "biased_model = MetaFairClassifier(tau=0, sensitive_attr=\"sex\")\n", "biased_model.fit(dataset_orig_train)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Accuracy : ', 3148, 14653, 0.7851634477581383)\n", "('SR tau : ', 0.5128381178595508)\n", "('FPR tau : ', 0.7945499159671334)\n", "('FNR tau : ', 0.910501272336843)\n", "('TPR tau : ', 0.7721613485851896)\n", "('TNR tau : ', 0.986749402037707)\n", "('AR tau : ', 0.8525978220135617)\n", "('FDR tau : ', 0.5030017152658662)\n", "('FOR tau : ', 0.3717552930362757)\n", "('PPR tau : ', 0.5485001947798986)\n", "('NPR tau : ', 0.827615343560593)\n", "0.503001715266\n" ] } ], "source": [ "# Apply the unconstrained model to test data\n", "dataset_bias_test = biased_model.predict(dataset_orig_test)\n", "\n", "predictions = [1 if y == dataset_orig_train.favorable_label else -1 for y in list(dataset_bias_test.labels)]\n", "y_test = np.array([1 if y == [dataset_orig_train.favorable_label] else -1 for y in dataset_orig_test.labels])\n", "x_control_test = pd.DataFrame(data=dataset_orig_test.features, columns=dataset_orig_test.feature_names)[\"sex\"]\n", "\n", "acc, sr, unconstrainedFDR = getStats(y_test, predictions, x_control_test)\n", "print(unconstrainedFDR)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Training Accuracy: ', 0.7350317353534763, ', Training gamma: ', 0.672899406837947)\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Learn debiased classifier\n", "tau = 0.8\n", "debiased_model = MetaFairClassifier(tau=tau, sensitive_attr=\"sex\")\n", "debiased_model.fit(dataset_orig_train)\n" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "# Apply the debiased model to test data\n", "dataset_debiasing_train = debiased_model.predict(dataset_orig_train)\n", "dataset_debiasing_test = debiased_model.predict(dataset_orig_test)\n" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "#### Model - with debiasing - dataset metrics" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.201319\n", "Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.195210\n" ] } ], "source": [ "# Metrics for the dataset from model with debiasing\n", "display(Markdown(\"#### Model - with debiasing - dataset metrics\"))\n", "metric_dataset_debiasing_train = BinaryLabelDatasetMetric(dataset_debiasing_train, \n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "\n", "print(\"Train set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_dataset_debiasing_train.mean_difference())\n", "\n", "metric_dataset_debiasing_test = BinaryLabelDatasetMetric(dataset_debiasing_test, \n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "\n", "print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_dataset_debiasing_test.mean_difference())\n" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "#### Model - with debiasing - classification metrics" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Test set: Classification accuracy = 0.731932\n", "Test set: Balanced classification accuracy = 0.716763\n", "Test set: Disparate impact = 0.539856\n", "Test set: Equal opportunity difference = -0.120467\n", "Test set: Average odds difference = -0.117636\n", "Test set: Theil_index = 0.128652\n" ] } ], "source": [ "display(Markdown(\"#### Model - with debiasing - classification metrics\"))\n", "classified_metric_debiasing_test = ClassificationMetric(dataset_orig_test, \n", " dataset_debiasing_test,\n", " unprivileged_groups=unprivileged_groups,\n", " privileged_groups=privileged_groups)\n", "print(\"Test set: Classification accuracy = %f\" % classified_metric_debiasing_test.accuracy())\n", "TPR = classified_metric_debiasing_test.true_positive_rate()\n", "TNR = classified_metric_debiasing_test.true_negative_rate()\n", "bal_acc_debiasing_test = 0.5*(TPR+TNR)\n", "print(\"Test set: Balanced classification accuracy = %f\" % bal_acc_debiasing_test)\n", "print(\"Test set: Disparate impact = %f\" % classified_metric_debiasing_test.disparate_impact())\n", "print(\"Test set: Equal opportunity difference = %f\" % classified_metric_debiasing_test.equal_opportunity_difference())\n", "print(\"Test set: Average odds difference = %f\" % classified_metric_debiasing_test.average_odds_difference())\n", "print(\"Test set: Theil_index = %f\" % classified_metric_debiasing_test.theil_index())\n" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Accuracy : ', 3928, 14653, 0.7319320275711458)\n", "('SR tau : ', 0.5398556890759312)\n", "('FPR tau : ', 0.6157226437750696)\n", "('FNR tau : ', 0.7093999136230463)\n", "('TPR tau : ', 0.8293479564733099)\n", "('TNR tau : ', 0.8593163406441414)\n", "('AR tau : ', 0.8892945217528149)\n", "('FDR tau : ', 0.6832866118898019)\n", "('FOR tau : ', 0.3834976405176844)\n", "('PPR tau : ', 0.5596391928376183)\n", "('NPR tau : ', 0.8967236467236467)\n", "(0.6832866118898019, 0.5030017152658662)\n" ] } ], "source": [ "### Testing \n", "predictions = list(dataset_debiasing_test.labels)\n", "predictions = [1 if y == dataset_orig_train.favorable_label else -1 for y in predictions]\n", "y_test = np.array([1 if y == [dataset_orig_train.favorable_label] else -1 for y in dataset_orig_test.labels])\n", "x_control_test = pd.DataFrame(data=dataset_orig_test.features, columns=dataset_orig_test.feature_names)[\"sex\"]\n", "\n", "acc, sr, fdr = getStats(y_test, predictions, x_control_test)\n", "print(fdr, unconstrainedFDR)\n", "assert(fdr >= unconstrainedFDR)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "biased_model = MetaFairClassifier(tau=0, sensitive_attr=\"race\")\n", "biased_model.fit(dataset_orig_train)\n", "\n", "dataset_bias_test = biased_model.predict(dataset_orig_test)\n", "\n", "predictions = [1 if y == dataset_orig_train.favorable_label else -1 for y in list(dataset_bias_test.labels)]\n", "y_test = np.array([1 if y == [dataset_orig_train.favorable_label] else -1 for y in dataset_orig_test.labels])\n", "x_control_test = pd.DataFrame(data=dataset_orig_test.features, columns=dataset_orig_test.feature_names)[\"race\"]\n", "\n", "acc, sr, unconstrainedFDR = getStats(y_test, predictions, x_control_test)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "#### Running the algorithm for different tau values" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Tau: 0.10\n", "('Training Accuracy: ', 0.59007283044254, ', Training gamma: ', 0.8471557184765197)\n", "('Accuracy : ', 6015, 14653, 0.5895038558656931)\n", "('SR tau : ', 0.8607089248858592)\n", "('FPR tau : ', 0.8957864358026685)\n", "('FNR tau : ', 0.9194857234907978)\n", "('TPR tau : ', 0.9919093179930415)\n", "('TNR tau : ', 0.8974125546638897)\n", "('AR tau : ', 0.9991230759162755)\n", "('FDR tau : ', 0.8457246400235614)\n", "('FOR tau : ', 0.5351545846135752)\n", "('PPR tau : ', 0.6980432406212983)\n", "('NPR tau : ', 0.972325603734294)\n", "Tau: 0.20\n", "('Training Accuracy: ', 0.7089122232296938, ', Training gamma: ', 0.8560547557579788)\n", "('Accuracy : ', 4219, 14653, 0.7120726131167678)\n", "('SR tau : ', 0.6866930102717664)\n", "('FPR tau : ', 0.6726467708167688)\n", "('FNR tau : ', 0.9258698009067614)\n", "('TPR tau : ', 0.9788702965603217)\n", "('TNR tau : ', 0.8609042092761701)\n", "('AR tau : ', 0.9016318454549019)\n", "('FDR tau : ', 0.8985716754370806)\n", "('FOR tau : ', 0.5212835077229696)\n", "('PPR tau : ', 0.8634340785883854)\n", "('NPR tau : ', 0.9509873699572469)\n", "Tau: 0.30\n", "('Training Accuracy: ', 0.7305566117757174, ', Training gamma: ', 0.8652540142403449)\n", "('Accuracy : ', 3971, 14653, 0.7289974749198116)\n", "('SR tau : ', 0.6378556299285792)\n", "('FPR tau : ', 0.6258949415833267)\n", "('FNR tau : ', 0.8021281808953761)\n", "('TPR tau : ', 0.9157061270745418)\n", "('TNR tau : ', 0.8661540136913607)\n", "('AR tau : ', 0.9010399037361635)\n", "('FDR tau : ', 0.8970117068060988)\n", "('FOR tau : ', 0.6040206475603341)\n", "('PPR tau : ', 0.8695616726701354)\n", "('NPR tau : ', 0.9531107873071419)\n", "Tau: 0.40\n", "('Training Accuracy: ', 0.6383339670654304, ', Training gamma: ', 0.8874069404811007)\n", "('Accuracy : ', 5277, 14653, 0.6398689688118474)\n", "('SR tau : ', 0.6738297875613554)\n", "('FPR tau : ', 0.644122920953404)\n", "('FNR tau : ', 0.7865711339087011)\n", "('TPR tau : ', 0.9673632005976219)\n", "('TNR tau : ', 0.7621665735103976)\n", "('AR tau : ', 0.8627152073258121)\n", "('FDR tau : ', 0.9207857965052172)\n", "('FOR tau : ', 0.5333710407239819)\n", "('PPR tau : ', 0.869572944869857)\n", "('NPR tau : ', 0.9685594512195121)\n", "Tau: 0.50\n", "('Training Accuracy: ', 0.6278920120506596, ', Training gamma: ', 0.8424560564810398)\n", "('Accuracy : ', 5474, 14653, 0.6264246229441071)\n", "('SR tau : ', 0.8298481425555508)\n", "('FPR tau : ', 0.8569955013034397)\n", "('FNR tau : ', 0.928239074324443)\n", "('TPR tau : ', 0.9916079407319798)\n", "('TNR tau : ', 0.8873302430084463)\n", "('AR tau : ', 0.9673097194084582)\n", "('FDR tau : ', 0.8523093321100546)\n", "('FOR tau : ', 0.4555634964843873)\n", "('PPR tau : ', 0.7360851226839791)\n", "('NPR tau : ', 0.9639178758413839)\n", "Tau: 0.60\n", "('Training Accuracy: ', 0.688964286758899, ', Training gamma: ', 0.8364392682037156)\n", "('Accuracy : ', 4525, 14653, 0.6911895175049478)\n", "('SR tau : ', 0.7999629846862536)\n", "('FPR tau : ', 0.8174527554362845)\n", "('FNR tau : ', 0.7909665888208081)\n", "('TPR tau : ', 0.953958901547282)\n", "('TNR tau : ', 0.9070827451204897)\n", "('AR tau : ', 0.9394597060776729)\n", "('FDR tau : ', 0.8613593842228706)\n", "('FOR tau : ', 0.4055043530080791)\n", "('PPR tau : ', 0.7937196009266697)\n", "('NPR tau : ', 0.9433076267447764)\n", "Tau: 0.70\n", "('Training Accuracy: ', 0.758694316885548, ', Training gamma: ', 0.8794270410853803)\n", "('Accuracy : ', 3569, 14653, 0.7564321299392616)\n", "('SR tau : ', 0.6227876622165098)\n", "('FPR tau : ', 0.5866903792182638)\n", "('FNR tau : ', 0.9190407482450215)\n", "('TPR tau : ', 0.9560780895648338)\n", "('TNR tau : ', 0.890147909980094)\n", "('AR tau : ', 0.896234124640508)\n", "('FDR tau : ', 0.9343469954055406)\n", "('FOR tau : ', 0.5509667897652915)\n", "('PPR tau : ', 0.9298652703704154)\n", "('NPR tau : ', 0.9372291956457304)\n", "Tau: 0.80\n", "('Training Accuracy: ', 0.7235953084325368, ', Training gamma: ', 0.8054119984862806)\n", "('Accuracy : ', 4059, 14653, 0.7229918787961509)\n", "('SR tau : ', 0.854029993599877)\n", "('FPR tau : ', 0.8938353737389849)\n", "('FNR tau : ', 0.6857428917603186)\n", "('TPR tau : ', 0.9027407287653024)\n", "('TNR tau : ', 0.9581570773154029)\n", "('AR tau : ', 0.9535764486010664)\n", "('FDR tau : ', 0.8409937049267278)\n", "('FOR tau : ', 0.37742963089855464)\n", "('PPR tau : ', 0.7856525093953137)\n", "('NPR tau : ', 0.9281236852587971)\n", "Tau: 0.90\n", "('Training Accuracy: ', 0.7241217935593319, ', Training gamma: ', 0.8099147766891792)\n", "('Accuracy : ', 4051, 14653, 0.7235378420801202)\n", "('SR tau : ', 0.8381936758377602)\n", "('FPR tau : ', 0.8732228179504548)\n", "('FNR tau : ', 0.7206111743921992)\n", "('TPR tau : ', 0.9125888094427421)\n", "('TNR tau : ', 0.9504356649707367)\n", "('AR tau : ', 0.9489248561688661)\n", "('FDR tau : ', 0.8448827898766063)\n", "('FOR tau : ', 0.39272980229352533)\n", "('PPR tau : ', 0.791857698925242)\n", "('NPR tau : ', 0.9297660413700446)\n" ] } ], "source": [ "display(Markdown(\"#### Running the algorithm for different tau values\"))\n", "\n", "accuracies, false_discovery_rates, statistical_rates = [], [], []\n", "s_attr = \"race\"\n", "# Converting to form used by celisMeta.utils file\n", "y_test = np.array([1 if y == [dataset_orig_train.favorable_label] else -1 for y in dataset_orig_test.labels])\n", "x_control_test = pd.DataFrame(data=dataset_orig_test.features, columns=dataset_orig_test.feature_names)[s_attr]\n", "\n", "all_tau = np.linspace(0.1, 0.9, 9)\n", "for tau in all_tau:\n", " print(\"Tau: %.2f\" % tau)\n", " debiased_model = MetaFairClassifier(tau=tau, sensitive_attr=s_attr)\n", " debiased_model.fit(dataset_orig_train)\n", " \n", " dataset_debiasing_test = debiased_model.predict(dataset_orig_test)\n", " predictions = dataset_debiasing_test.labels\n", " predictions = [1 if y == dataset_orig_train.favorable_label else -1 for y in predictions]\n", " \n", " acc, sr, fdr = getStats(y_test, predictions, x_control_test)\n", " \n", " ## Testing\n", " assert (tau < unconstrainedFDR) or (fdr >= unconstrainedFDR)\n", " \n", " accuracies.append(acc)\n", " false_discovery_rates.append(fdr)\n", " statistical_rates.append(sr)\n", " " ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "### Plot of accuracy and output fairness vs input constraint (tau)" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "#### Output fairness is represented by $\\gamma_{fdr}$, which is the ratio of false discovery rate of different sensitive attribute values." ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "display(Markdown(\"### Plot of accuracy and output fairness vs input constraint (tau)\"))\n", "\n", "display(Markdown(\"#### Output fairness is represented by $\\gamma_{fdr}$, which is the ratio of false discovery rate of different sensitive attribute values.\"))\n", "\n", "fig, ax1 = plt.subplots(figsize=(13,7))\n", "ax1.plot(all_tau, accuracies, color='r')\n", "ax1.set_title('Accuracy and $\\gamma_{fdr}$ vs Tau', fontsize=16, fontweight='bold')\n", "ax1.set_xlabel('Input Tau', fontsize=16, fontweight='bold')\n", "ax1.set_ylabel('Accuracy', color='r', fontsize=16, fontweight='bold')\n", "ax1.xaxis.set_tick_params(labelsize=14)\n", "ax1.yaxis.set_tick_params(labelsize=14)\n", "\n", "ax2 = ax1.twinx()\n", "ax2.plot(all_tau, false_discovery_rates, color='b')\n", "ax2.set_ylabel('$\\gamma_{fdr}$', color='b', fontsize=16, fontweight='bold')\n", "ax2.yaxis.set_tick_params(labelsize=14)\n", "ax2.grid(True)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# # \n", "# References:\n", "# Celis, L. E., Huang, L., Keswani, V., & Vishnoi, N. K. (2018). \n", "# \"Classification with Fairness Constraints: A Meta-Algorithm with Provable Guarantees.\"\"\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.15" } }, "nbformat": 4, "nbformat_minor": 2 }