{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Running membership inference attacks using Shadow Models" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook will demonstrate a black-box membership attack using shadow models that requires no access to known member-samples. This will be demonstrated on the Nursery dataset (original dataset can be found here: https://archive.ics.uci.edu/ml/datasets/nursery). " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We have already preprocessed the dataset such that all categorical features are one-hot encoded, and the data was scaled using sklearn's StandardScaler." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The data is separated, 25% will go towards and training and testing the target model, 75% of data will be used as shadow training data." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import sys\n", "sys.path.insert(0, os.path.abspath('..'))\n", "\n", "import numpy as np\n", "\n", "from art.utils import load_nursery\n", "\n", "(x_target, y_target), (x_shadow, y_shadow), _, _ = load_nursery(test_set=0.75)\n", "\n", "target_train_size = len(x_target) // 2\n", "x_target_train = x_target[:target_train_size]\n", "y_target_train = y_target[:target_train_size]\n", "x_target_test = x_target[target_train_size:]\n", "y_target_test = y_target[target_train_size:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train random forest model" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Base model accuracy: 0.9308641975308642\n" ] } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "from art.estimators.classification.scikitlearn import ScikitlearnRandomForestClassifier\n", "\n", "model = RandomForestClassifier()\n", "model.fit(x_target_train, y_target_train)\n", "\n", "art_classifier = ScikitlearnRandomForestClassifier(model)\n", "\n", "print('Base model accuracy:', model.score(x_target_test, y_target_test))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train shadow models\n", "Three shadow models are trained, and used to generate a meta-dataset of member and non-member predictions." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.9388888888888889, 0.9358024691358025, 0.937037037037037]\n" ] } ], "source": [ "from art.attacks.inference.membership_inference import ShadowModels\n", "from art.utils import to_categorical\n", "\n", "shadow_models = ShadowModels(art_classifier, num_shadow_models=3)\n", "\n", "shadow_dataset = shadow_models.generate_shadow_dataset(x_shadow, to_categorical(y_shadow, 4))\n", "(member_x, member_y, member_predictions), (nonmember_x, nonmember_y, nonmember_predictions) = shadow_dataset\n", "\n", "# Shadow models' accuracy\n", "print([sm.model.score(x_target_test, y_target_test) for sm in shadow_models.get_shadow_models()])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Attack" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Black-box attack\n", "We run a black-box membership inference attack on the meta-dataset generated using the shadow models." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from art.attacks.inference.membership_inference import MembershipInferenceBlackBox\n", "\n", "attack = MembershipInferenceBlackBox(art_classifier, attack_model_type=\"rf\")\n", "attack.fit(member_x, member_y, nonmember_x, nonmember_y, member_predictions, nonmember_predictions)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Attack Member Acc: 0.804200123533045\n", "Attack Non-Member Acc: 0.667283950617284\n", "Attack Accuracy: 0.7357209015128126\n" ] } ], "source": [ "member_infer = attack.infer(x_target_train, y_target_train)\n", "nonmember_infer = attack.infer(x_target_test, y_target_test)\n", "member_acc = np.sum(member_infer) / len(x_target_train)\n", "nonmember_acc = 1 - np.sum(nonmember_infer) / len(x_target_test)\n", "acc = (member_acc * len(x_target_train) + nonmember_acc * len(x_target_test)) / (len(x_target_train) + len(x_target_test))\n", "print('Attack Member Acc:', member_acc)\n", "print('Attack Non-Member Acc:', nonmember_acc)\n", "print('Attack Accuracy:', acc)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def calc_precision_recall(predicted, actual, positive_value=1):\n", " score = 0 # both predicted and actual are positive\n", " num_positive_predicted = 0 # predicted positive\n", " num_positive_actual = 0 # actual positive\n", " for i in range(len(predicted)):\n", " if predicted[i] == positive_value:\n", " num_positive_predicted += 1\n", " if actual[i] == positive_value:\n", " num_positive_actual += 1\n", " if predicted[i] == actual[i]:\n", " if predicted[i] == positive_value:\n", " score += 1\n", " \n", " if num_positive_predicted == 0:\n", " precision = 1\n", " else:\n", " precision = score / num_positive_predicted # the fraction of predicted “Yes” responses that are correct\n", " if num_positive_actual == 0:\n", " recall = 1\n", " else:\n", " recall = score / num_positive_actual # the fraction of “Yes” responses that are predicted correctly\n", "\n", " return precision, recall" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(0.7072243346007605, 0.804200123533045)\n" ] } ], "source": [ "print(calc_precision_recall(np.concatenate((member_infer, nonmember_infer)), \n", " np.concatenate((np.ones(len(member_infer)), np.zeros(len(nonmember_infer))))))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Rule-based attack" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0\n", "0.06913580246913575\n", "Baseline Accuracy: 0.5344242050015436\n", "(0.5177486408698433, 1.0)\n" ] } ], "source": [ "from art.attacks.inference.membership_inference import MembershipInferenceBlackBoxRuleBased\n", "\n", "baseline = MembershipInferenceBlackBoxRuleBased(art_classifier)\n", "\n", "bl_inferred_train = baseline.infer(x_target_train, y_target_train)\n", "bl_inferred_test = baseline.infer(x_target_test, y_target_test)\n", "\n", "bl_member_acc = np.sum(bl_inferred_train) / len(bl_inferred_train)\n", "bl_nonmember_acc = 1 - (np.sum(bl_inferred_test) / len(bl_inferred_test))\n", "bl_acc = (bl_member_acc * len(bl_inferred_train) + bl_nonmember_acc * len(bl_inferred_test)) / (len(bl_inferred_train) + len(bl_inferred_test))\n", "print(bl_member_acc)\n", "print(bl_nonmember_acc)\n", "print('Baseline Accuracy:', bl_acc)\n", "\n", "print(calc_precision_recall(np.concatenate((bl_inferred_train, bl_inferred_test)), \n", " np.concatenate((np.ones(len(bl_inferred_train)), np.zeros(len(bl_inferred_test))))))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.11" } }, "nbformat": 4, "nbformat_minor": 2 }