{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Using ART to Defend against Poisoning Attacks with Neural Cleanse\n", "\n", "Neural Cleanse is a method developed by [Wang et. al. (2019)](https://people.cs.uchicago.edu/~ravenben/publications/pdf/backdoor-sp19.pdf). Using this method, we show how ART can defend against poison input by:\n", "\n", "- filtering out potentially poisonous input\n", "- unlearning the backdoor by retraining\n", "- pruning the neural network of neurons associated with the backdoor\n", "- some combination of the above\n", "\n", "One main distinction is that this method allows us to identify the backdoor pattern, and investigate neurons associated with these backdoors." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using TensorFlow backend.\n" ] } ], "source": [ "from __future__ import absolute_import, division, print_function, unicode_literals\n", "\n", "import os, sys\n", "from os.path import abspath\n", "\n", "module_path = os.path.abspath(os.path.join('..'))\n", "if module_path not in sys.path:\n", " sys.path.append(module_path)\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "import keras.backend as k\n", "from keras.models import Sequential\n", "from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Activation, Dropout\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "from mpl_toolkits import mplot3d\n", "\n", "from art.estimators.classification import KerasClassifier\n", "from art.attacks.poisoning import PoisoningAttackBackdoor\n", "from art.attacks.poisoning.perturbations import add_pattern_bd, add_single_bd, insert_image\n", "from art.utils import load_mnist, preprocess\n", "from art.defences.detector.poison import ActivationDefence\n", "from art.defences.transformer.poisoning import NeuralCleanse\n", "from art.estimators.poison_mitigation import KerasNeuralCleanse\n", "\n", "import tensorflow as tf\n", "if tf.executing_eagerly():\n", " tf.compat.v1.disable_eager_execution()" ] }, { "attachments": { "image.png": { "image/png": "" } }, "cell_type": "markdown", "metadata": {}, "source": [ "### The classification problem: Automatically detect numbers written in a check\n", "" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "(x_raw, y_raw), (x_raw_test, y_raw_test), min_, max_ = load_mnist(raw=True)\n", "\n", "# Random Selection:\n", "n_train = np.shape(x_raw)[0]\n", "num_selection = 7500\n", "random_selection_indices = np.random.choice(n_train, num_selection)\n", "x_raw = x_raw[random_selection_indices]\n", "y_raw = y_raw[random_selection_indices]\n", "\n", "BACKDOOR_TYPE = \"pattern\" # one of ['pattern', 'pixel', 'image']" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Adversary's goal: make some easy money " ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<img src=\"../utils/data/images/zero_to_one.png\" width=400>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.display import HTML\n", "HTML('<img src=\"../utils/data/images/zero_to_one.png\" width=400>')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "max_val = np.max(x_raw)\n", "def add_modification(x):\n", " if BACKDOOR_TYPE == 'pattern':\n", " return add_pattern_bd(x, pixel_value=max_val)\n", " elif BACKDOOR_TYPE == 'pixel':\n", " return add_single_bd(x, pixel_value=max_val) \n", " elif BACKDOOR_TYPE == 'image':\n", " return insert_image(x, backdoor_path='../utils/data/backdoors/alert.png', size=(10,10))\n", " else:\n", " raise(\"Unknown backdoor type\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def poison_dataset(x_clean, y_clean, percent_poison, poison_func):\n", " x_poison = np.copy(x_clean)\n", " y_poison = np.copy(y_clean)\n", " is_poison = np.zeros(np.shape(y_poison))\n", " \n", " # sources=np.arange(10) # 0, 1, 2, 3, ...\n", " # targets=(np.arange(10) + 1) % 10 # 1, 2, 3, 4, ...\n", " sources = np.array([0])\n", " targets = np.array([1])\n", " for i, (src, tgt) in enumerate(zip(sources, targets)):\n", " n_points_in_tgt = np.size(np.where(y_clean == tgt))\n", " num_poison = round((percent_poison * n_points_in_tgt) / (1 - percent_poison))\n", " src_imgs = x_clean[y_clean == src]\n", "\n", " n_points_in_src = np.shape(src_imgs)[0]\n", " indices_to_be_poisoned = np.random.choice(n_points_in_src, num_poison)\n", "\n", " imgs_to_be_poisoned = np.copy(src_imgs[indices_to_be_poisoned])\n", " backdoor_attack = PoisoningAttackBackdoor(poison_func)\n", " imgs_to_be_poisoned, poison_labels = backdoor_attack.poison(imgs_to_be_poisoned, y=np.ones(num_poison) * tgt)\n", " x_poison = np.append(x_poison, imgs_to_be_poisoned, axis=0)\n", " y_poison = np.append(y_poison, poison_labels, axis=0)\n", " is_poison = np.append(is_poison, np.ones(num_poison))\n", "\n", " is_poison = is_poison != 0\n", "\n", " return is_poison, x_poison, y_poison" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Poison training data\n", "percent_poison = .33\n", "(is_poison_train, x_poisoned_raw, y_poisoned_raw) = poison_dataset(x_raw, y_raw, percent_poison, add_modification)\n", "x_train, y_train = preprocess(x_poisoned_raw, y_poisoned_raw)\n", "# Add channel axis:\n", "x_train = np.expand_dims(x_train, axis=3)\n", "\n", "# Poison test data\n", "(is_poison_test, x_poisoned_raw_test, y_poisoned_raw_test) = poison_dataset(x_raw_test, y_raw_test, percent_poison, add_modification)\n", "x_test, y_test = preprocess(x_poisoned_raw_test, y_poisoned_raw_test)\n", "# Add channel axis:\n", "x_test = np.expand_dims(x_test, axis=3)\n", "\n", "# Shuffle training data\n", "n_train = np.shape(y_train)[0]\n", "shuffled_indices = np.arange(n_train)\n", "np.random.shuffle(shuffled_indices)\n", "x_train = x_train[shuffled_indices]\n", "y_train = y_train[shuffled_indices]\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Victim bank trains a neural network" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From /Users/ebubechuba/anaconda3/envs/art/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n", "\n", "WARNING:tensorflow:From /Users/ebubechuba/anaconda3/envs/art/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n", "\n", "WARNING:tensorflow:From /Users/ebubechuba/anaconda3/envs/art/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n", "\n", "WARNING:tensorflow:From /Users/ebubechuba/anaconda3/envs/art/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n", "\n", "WARNING:tensorflow:From /Users/ebubechuba/anaconda3/envs/art/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.\n", "\n", "WARNING:tensorflow:From /Users/ebubechuba/anaconda3/envs/art/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n", "WARNING:tensorflow:From /Users/ebubechuba/anaconda3/envs/art/lib/python3.6/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n", "\n", "WARNING:tensorflow:From /Users/ebubechuba/anaconda3/envs/art/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.\n", "\n" ] } ], "source": [ "# Create Keras convolutional neural network - basic architecture from Keras examples\n", "# Source here: https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py\n", "\n", "model = Sequential()\n", "model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=x_train.shape[1:]))\n", "model.add(Conv2D(64, (3, 3), activation='relu'))\n", "model.add(MaxPooling2D(pool_size=(2, 2)))\n", "model.add(Dropout(0.25))\n", "model.add(Flatten())\n", "model.add(Dense(128, activation='relu'))\n", "model.add(Dropout(0.5))\n", "model.add(Dense(10, activation='softmax'))\n", "\n", "model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From /Users/ebubechuba/anaconda3/envs/art/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Use tf.where in 2.0, which has the same broadcast rule as np.where\n", "Epoch 1/3\n", "61/61 [==============================] - 8s 132ms/step - loss: 0.8180 - acc: 0.7331\n", "Epoch 2/3\n", "61/61 [==============================] - 7s 115ms/step - loss: 0.2557 - acc: 0.9264\n", "Epoch 3/3\n", "61/61 [==============================] - 7s 116ms/step - loss: 0.1681 - acc: 0.9539\n" ] } ], "source": [ "classifier = KerasClassifier(model=model, clip_values=(min_, max_))\n", "classifier.fit(x_train, y_train, nb_epochs=3, batch_size=128)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# The victim bank evaluates the model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Evaluation on clean test samples" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Clean test set accuracy: 96.47%\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAOIklEQVR4nO3df4wc9XnH8c8n5rCpSVoc8+MCVoEI0hAkSDg5CNoUiooAtTU0heK2kZNSmSRQJVKqlFBSoEobizYkbZPQXIKFG1FCWkAmEk2DXBCNImEO6tgGA3apA8auDaKqTRTss/30jxuTi7n97nl3dmeP5/2STrs7z87Oc4s/zN58d+briBCAN7+3NN0AgP4g7EAShB1IgrADSRB2IInD+rmxwz075mhuPzcJpPKafqw9sdtT1boKu+2LJP2tpFmSvhERy0rPn6O5er8v6GaTAAoejVUtax1/jLc9S9JXJF0s6TRJi22f1unrAeitbv5mXyhpU0Q8FxF7JH1L0qJ62gJQt27CfrykFyY93lIt+xm2l9oesz02rt1dbA5AN7oJ+1QHAd7w3duIGI2IkYgYGdLsLjYHoBvdhH2LpAWTHp8gaWt37QDolW7C/pikU2yfZPtwSVdKur+etgDUreOht4jYa/taSf+miaG35RHxZG2dAahVV+PsEfGApAdq6gVAD/F1WSAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeS6OuUzei/Wb/w88X6M18+uVh/+vxvFOs37DirWF/3+6e2rO176tniuqgXe3YgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIJx9je5/SedUKyvO+9rxfp4lF//c8c8Xqyfcdk5LWsLGGfvq67CbnuzpF2S9knaGxEjdTQFoH517NnPj4iXa3gdAD3E3+xAEt2GPSR9z/bjtpdO9QTbS22P2R4b1+4uNwegU91+jD83IrbaPkbSg7afjohHJj8hIkYljUrS2zyvzeEeAL3S1Z49IrZWtzsk3SdpYR1NAahfx2G3Pdf2Ww/cl3ShpPV1NQagXt18jD9W0n22D7zOP0XEd2vpCofksAWtx9JPGt3Ux04wyDoOe0Q8J+mMGnsB0EMMvQFJEHYgCcIOJEHYgSQIO5AEp7jOAM//eevTRCXprIuealm7Zfg/6m7nkBx5zkstay98tvx7zV+7t1g/YuXqjnrKij07kARhB5Ig7EAShB1IgrADSRB2IAnCDiTBOPsMsPbqvy/Wx2Nfnzo5dA+fcWfrYptzJu/78XCxvnzXpcX6Yf9evsx1NuzZgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJxtkHwNDD5fHkIc/qUyeH7j/37C/WN48f3bJ22dxXiuteceSOcv2bo8X6bxx/VrGeDXt2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCcfY++MmlC4v1jwz/c7He7nz1Xp7PfvqqjxbrR6+aXazP/r/WvX3mvPK+Zt3lf1est7PlM62vS3/C53/Q1WvPRG337LaX295he/2kZfNsP2h7Y3V7VG/bBNCt6XyMv0PSRQctu07Sqog4RdKq6jGAAdY27BHxiKSDv9e4SNKK6v4KSeXrAwFoXKcH6I6NiG2SVN0e0+qJtpfaHrM9Nq7dHW4OQLd6fjQ+IkYjYiQiRoZUPpgDoHc6Dft228OSVN2WT08C0LhOw36/pCXV/SWSVtbTDoBeaTvObvsuSedJmm97i6QbJS2T9G3bV0l6XtLlvWxy0M16z7uK9c/dWj7veuTwPe22cIgd/VS7a6/f8NAHi/V3f/rpYn3fzp2H3NMB79p4arG++rfmFOsLZ79WrP/rx25pWbtwzqeL6574V+VrzsfumXf8qW3YI2Jxi9IFNfcCoIf4uiyQBGEHkiDsQBKEHUiCsANJcIprDfYfXn4b2w+tdecPf3TweUo/tet3jyiue+qW1cV6LyeD3vfUs8X6x+8on147dvWXivXhWa1/9yeuKq/7wXuXFOvxww3F+iBizw4kQdiBJAg7kARhB5Ig7EAShB1IgrADSTDOPgNcv32kWN/5R29vWdu3ZWPd7fTNife8XKx/9tKzi/Vlxz1WZzszHnt2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCcfY+GHLnl4KWpLXvizbPmLlj6UV2sXzYW/YX692871tvLtePm4GzG7JnB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkGGevwTMf+7lifTx6efX1N6/Nv936PH1J+pejy9e8H4/W4+zt/pu848ZiWeUR/sHUds9ue7ntHbbXT1p2k+0Xba+pfi7pbZsAujWdj/F3SJpqypEvRsSZ1c8D9bYFoG5twx4Rj0h6pQ+9AOihbg7QXWt7bfUx/6hWT7K91PaY7bFx7e5icwC60WnYb5P0TklnStom6QutnhgRoxExEhEjQ5rd4eYAdKujsEfE9ojYFxH7JX1d0sJ62wJQt47Cbnt40sPLJK1v9VwAg6HtOLvtuySdJ2m+7S2SbpR0nu0zJYWkzZKu7mGPA++GX/lO0y0MrMMWnNCytuusdxTX/YePfLXudl63evecYt179vZs201pG/aIWDzF4tt70AuAHuLrskAShB1IgrADSRB2IAnCDiTBKa7oqaduPq5l7ckLv9zTbd/z6vyWtdv+5PLiunM2lE+fnYnYswNJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoyzoytDDw8X658fvqdPnbzRHS+e07I25ztvvnH0dtizA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EASjLPXYJbLE/gOufXUwdOx8/fO7njdm/+ifCHg8494rePXltr/buWpkbt7X9qJX3uxp68/07BnB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkGGevwbK7f6dYv+KqL3X1+o/89VeK9fJYdtl4dLzqNF+/897aOX3VR4v1U/REz7Y9E7Xds9teYPsh2xtsP2n7E9XyebYftL2xuj2q9+0C6NR0PsbvlfSpiHi3pLMlXWP7NEnXSVoVEadIWlU9BjCg2oY9IrZFxBPV/V2SNkg6XtIiSSuqp62QdGmvmgTQvUM6QGf7REnvlfSopGMjYps08T8ESce0WGep7THbY+Pa3V23ADo27bDbPlLSPZI+GRE7p7teRIxGxEhEjAxpdic9AqjBtMJue0gTQb8zIu6tFm+3PVzVhyXt6E2LAOrQdujNtiXdLmlDRNw6qXS/pCWSllW3K3vS4Qxw8t0vF+ur/2BOsb5wdnenmQ6y1btb/+6j//OrxXX/9+Otp3uWpF/6703Feu8G/Wam6YyznyvpQ5LW2V5TLbteEyH/tu2rJD0vqTzhNYBGtQ17RHxfkluUL6i3HQC9wtdlgSQIO5AEYQeSIOxAEoQdSMIRPT7HcZK3eV683/kO4P9k0cJi/YXfLF+K+tmLv1as9/I00nbaXUr6jK/+ccvagr/8Qd3tpPdorNLOeGXK0TP27EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBJeS7oMjVq4u1k9tcyWADyy+plgf+vD2lrXvvufu4roXrr+yWN9/x5RXG3tdtDofsnLimpda1jjfvL/YswNJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEpzPDryJcD47AMIOZEHYgSQIO5AEYQeSIOxAEoQdSKJt2G0vsP2Q7Q22n7T9iWr5TbZftL2m+rmk9+0C6NR0Ll6xV9KnIuIJ22+V9LjtB6vaFyPib3rXHoC6TGd+9m2StlX3d9neIOn4XjcGoF6H9De77RMlvVfSo9Wia22vtb3c9lEt1llqe8z22Lh2d9UsgM5NO+y2j5R0j6RPRsROSbdJeqekMzWx5//CVOtFxGhEjETEyJBm19AygE5MK+y2hzQR9Dsj4l5JiojtEbEvIvZL+rqk8uyFABo1naPxlnS7pA0Rceuk5cOTnnaZpPX1twegLtM5Gn+upA9JWmd7TbXsekmLbZ8pKSRtlnR1TzoEUIvpHI3/vqSpzo99oP52APQK36ADkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4k0dcpm22/JOlHkxbNl/Ry3xo4NIPa26D2JdFbp+rs7Rcj4uipCn0N+xs2bo9FxEhjDRQMam+D2pdEb53qV298jAeSIOxAEk2HfbTh7ZcMam+D2pdEb53qS2+N/s0OoH+a3rMD6BPCDiTRSNhtX2T7GdubbF/XRA+t2N5se101DfVYw70st73D9vpJy+bZftD2xup2yjn2GuptIKbxLkwz3uh71/T0533/m932LEnPSvp1SVskPSZpcUQ81ddGWrC9WdJIRDT+BQzbH5D0qqR/jIjTq2W3SHolIpZV/6M8KiL+dEB6u0nSq01P413NVjQ8eZpxSZdK+rAafO8KfV2hPrxvTezZF0raFBHPRcQeSd+StKiBPgZeRDwi6ZWDFi+StKK6v0IT/1j6rkVvAyEitkXEE9X9XZIOTDPe6HtX6Ksvmgj78ZJemPR4iwZrvveQ9D3bj9te2nQzUzg2IrZJE/94JB3TcD8HazuNdz8dNM34wLx3nUx/3q0mwj7VVFKDNP53bkS8T9LFkq6pPq5ieqY1jXe/TDHN+EDodPrzbjUR9i2SFkx6fIKkrQ30MaWI2Frd7pB0nwZvKurtB2bQrW53NNzP6wZpGu+pphnXALx3TU5/3kTYH5N0iu2TbB8u6UpJ9zfQxxvYnlsdOJHtuZIu1OBNRX2/pCXV/SWSVjbYy88YlGm8W00zrobfu8anP4+Ivv9IukQTR+T/S9KfNdFDi75OlvTD6ufJpnuTdJcmPtaNa+IT0VWS3i5plaSN1e28Aertm5LWSVqriWANN9TbL2viT8O1ktZUP5c0/d4V+urL+8bXZYEk+AYdkARhB5Ig7EAShB1IgrADSRB2IAnCDiTx/044MJsQZMjSAAAAAElFTkSuQmCC\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Prediction: 0\n" ] } ], "source": [ "clean_x_test = x_test[is_poison_test == 0]\n", "clean_y_test = y_test[is_poison_test == 0]\n", "\n", "clean_preds = np.argmax(classifier.predict(clean_x_test), axis=1)\n", "clean_correct = np.sum(clean_preds == np.argmax(clean_y_test, axis=1))\n", "clean_total = clean_y_test.shape[0]\n", "\n", "clean_acc = clean_correct / clean_total\n", "print(\"\\nClean test set accuracy: %.2f%%\" % (clean_acc * 100))\n", "\n", "# Display image, label, and prediction for a clean sample to show how the poisoned model classifies a clean sample\n", "\n", "c = 0 # class to display\n", "i = 0 # image of the class to display\n", "\n", "c_idx = np.where(np.argmax(clean_y_test,1) == c)[0][i] # index of the image in clean arrays\n", "\n", "plt.imshow(clean_x_test[c_idx].squeeze())\n", "plt.show()\n", "clean_label = c\n", "print(\"Prediction: \" + str(clean_preds[c_idx]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### But the adversary has other plans..." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAOg0lEQVR4nO3dbYxc5XnG8euyY5zW4GJDcA04BFJveEmEqbZACokgqECoFOADFEehhJJsGkFFpEgNIh+g6hda8iKUIpoFu5goISEKLoigguWkdVEjw5oabDDBQFxs7NoJTmNDhbG9dz/sUG3MzjPrOWdevPf/J61m9txzzrk98rVnZp5z5nFECMDUN63XDQDoDsIOJEHYgSQIO5AEYQeSeE83d3aYZ8Z7NaubuwRSeUtv6u3Y44lqlcJu+2JJd0iaLumeiLit9Pj3apbO8gVVdgmgYHWsbFpr+2W87emS7pT0SUmnSlps+9R2twegs6q8Zz9T0ksR8UpEvC3p+5IuractAHWrEvbjJG0e9/uWxrLfYnvI9ojtkb3aU2F3AKqoEvaJPgR417m3ETEcEYMRMThDMyvsDkAVVcK+RdKCcb8fL2lrtXYAdEqVsD8laaHtE20fJukqSQ/X0xaAurU99BYR+2zfIOkxjQ29LY2I52rrDECtKo2zR8Sjkh6tqRcAHcTpskAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4k0dUpm9GeXZ8+u1hfdfudbW97aPN5xfrqH3+kWN9z1Gix/sIVzXsbeHyouO7AtWuKdRwcjuxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kATj7FPAqMpj3SXDC/61vO2//Enb25ZU7OyR8/+huO71jy8u1mddt69Y37d5S7GeTaWw294kabek/ZL2RcRgHU0BqF8dR/bzI+JXNWwHQAfxnh1IomrYQ9LjttfYnvBEZ9tDtkdsj+zVnoq7A9Cuqi/jz4mIrbaPkbTC9gsRsWr8AyJiWNKwJM323Ki4PwBtqnRkj4itjdsdkpZLOrOOpgDUr+2w255l+4h37ku6UNL6uhoDUK8qL+PnSVpu+53tfC8i/qWWrpKZPu+YYn3O51/tUifdNTDjsGL9sdN+WKxftOSKYn3mhQfd0pTWdtgj4hVJp9fYC4AOYugNSIKwA0kQdiAJwg4kQdiBJLjEtQ9sGvqDYv0/B+7o2L6ve/X8Yv2JjeXeNlzw7TrbOShXHf9Usb7chSHNyHcyJ0d2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCcfZuGLsMuKnZH93RsV1fu6l8nefr5/y6WF+op4v1T+mPytv/3Eeb1n72N+Wvkm7l2t/bVKwvfeSPm9bm/OnGSvs+FHFkB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkGGfvA/92+v2V1t+yr/m0Wi9/++TiukfqZ5X23cpR9zTf/smnXV9c9/krv1Vp3/++6HtNa63OD5iKOLIDSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKMs3fBe44/rqPbX/Hmh5rWjryvs+PoVRy+qXys2T36drF+xLTylM8lv7it+XX2knTSLeXr+GNP83Mb+lXLI7vtpbZ32F4/btlc2ytsb2zczulsmwCqmszL+HslXXzAspskrYyIhZJWNn4H0Mdahj0iVknaecDiSyUta9xfJumymvsCULN2P6CbFxHbJKlx23RSLdtDtkdsj+zVofc+B5gqOv5pfEQMR8RgRAzO0MxO7w5AE+2Gfbvt+ZLUuO3c16MCqEW7YX9Y0jWN+9dIeqiedgB0iqPFPNW275d0nqSjJW2XdIukf5b0gKT3S3pV0hURceCHeO8y23PjLF9QseVDz4tLB4v1Fy66q9L2T3nwhqa1hX+1utK2e+nKDf9drH9m9ua2tz2txXHuU5d8plgffWZD2/vupNWxUrti54QTFbQ8qSYiFjcp5UstcAjjdFkgCcIOJEHYgSQIO5AEYQeS4BLXGkyf1/RsYUnSx055sVhvNQy0/M25xfrC7/xvsX6ouudvy5dc/Pntd7a97YHHh8r1Z9a0ve1+xZEdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5JgnL0Go8e+r1i/+/33ltdvsf0lmz9WfsCT61ps4dA055Hni/VrbyhfeLnkhBXNizHhVaBTGkd2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCcfYa/PyLv9vrFqak/bt2Fes79xzb9rb/7twfFuvLTjq/WN/3yqa2990rHNmBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnG2WtwysLXet0CDtLls8ozjP/TEb/TpU66p+WR3fZS2ztsrx+37Fbbr9le2/i5pLNtAqhqMi/j75V08QTLvxkRixo/j9bbFoC6tQx7RKySVH7NA6DvVfmA7gbbzzZe5s9p9iDbQ7ZHbI/s1Z4KuwNQRbthv0vSByUtkrRN0tebPTAihiNiMCIGZ2hmm7sDUFVbYY+I7RGxPyJGJd0t6cx62wJQt7bCbnv+uF8vl7S+2WMB9IeW4+y275d0nqSjbW+RdIuk82wvkhSSNkn6Qgd77HvTHOV6xXOX3GL75erUVeV5v/31U8vrvl6+lr7Vd/33o5Zhj4jFEyxe0oFeAHQQp8sCSRB2IAnCDiRB2IEkCDuQBJe41mC0xfS/oxUHamKKTi88/bQPFeu/Pr3pWdiSpHNnry7WS8/73SPlabAHtowU64cijuxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kATj7Khk+sKTyg8Yfqtp6cJjyuPkXzxyYzstTcrAP+b7ijSO7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBOPsKHr5a2cX6zNP3F2sPz3wg6a1Vl+x3dGva35yXSe33pc4sgNJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoyz1+Cl1ScU69MGqv1NfeTkh8oPeK39bc/w9GJ9b6xpf+OSSseT1vsub/m0VX9RrJ+4+JnyBpJp+b/Q9gLbP7W9wfZztm9sLJ9re4XtjY3b8jf6A+ipyRxy9kn6ckScIulsSdfbPlXSTZJWRsRCSSsbvwPoUy3DHhHbIuLpxv3dkjZIOk7SpZKWNR62TNJlnWoSQHUH9WbS9gcknSFptaR5EbFNGvuDIOmYJusM2R6xPbJX+b73C+gXkw677cMl/UjSlyJi12TXi4jhiBiMiMEZmtlOjwBqMKmw256hsaB/NyIebCzebnt+oz5f0o7OtAigDi2H3mxb0hJJGyLiG+NKD0u6RtJtjdsW40NT10k/KL/QGb26oxdrVtJqeKvqdNMlD7wxt1j/yk/+rFg/5a9fKNb3H3RHU9tkxtnPkXS1pHW21zaW3ayxkD9g+zpJr0q6ojMtAqhDy7BHxBOS3KR8Qb3tAOgUTpcFkiDsQBKEHUiCsANJEHYgCS5xrcG0X2wp1j+y6nPF+rqP31NnO131m9G3i/UHdw80rS3/7CeK6w48+WSxzjj6weHIDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJMM5eg/3/85ti/cRPP1usn/HVG4v1twbeKtY3XPDtYr3kulfPL9ZHfvzhYv2w8j9d8771H4VqvmmTe4kjO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4k4YgWXxxeo9meG2eZL6QFSh7burZYv+jYRU1rq2OldsXOCb8NmiM7kARhB5Ig7EAShB1IgrADSRB2IAnCDiQxmfnZF0i6T9LvSxqVNBwRd9i+VdLnJf2y8dCbI+LRTjUKTCWlsfTSOHoVk/nyin2SvhwRT9s+QtIa2ysatW9GxNc60hmAWk1mfvZtkrY17u+2vUHScZ1uDEC9Duo9u+0PSDpD0urGohtsP2t7qe05TdYZsj1ie2Sv9lRqFkD7Jh1224dL+pGkL0XELkl3SfqgpEUaO/J/faL1ImI4IgYjYnCGZtbQMoB2TCrstmdoLOjfjYgHJSkitkfE/ogYlXS3pDM71yaAqlqG3bYlLZG0ISK+MW75/HEPu1zS+vrbA1CXyXwaf46kqyWts/3OeMHNkhbbXiQpJG2S9IWOdAgcgqpcplpl3ZLJfBr/hKSJro9lTB04hHAGHZAEYQeSIOxAEoQdSIKwA0kQdiAJpmwGOqDKZaqdusSVIzuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJNHVKZtt/1LSf41bdLSkX3WtgYPTr731a18SvbWrzt5OiIj3TVToatjftXN7JCIGe9ZAQb/21q99SfTWrm71xst4IAnCDiTR67AP93j/Jf3aW7/2JdFbu7rSW0/fswPonl4f2QF0CWEHkuhJ2G1fbPvntl+yfVMvemjG9ibb62yvtT3S416W2t5he/24ZXNtr7C9sXE74Rx7PertVtuvNZ67tbYv6VFvC2z/1PYG28/ZvrGxvKfPXaGvrjxvXX/Pbnu6pBcl/YmkLZKekrQ4Ip7vaiNN2N4kaTAien4Chu2PS3pD0n0R8eHGsr+XtDMibmv8oZwTEV/pk95ulfRGr6fxbsxWNH/8NOOSLpP0WfXwuSv0daW68Lz14sh+pqSXIuKViHhb0vclXdqDPvpeRKyStPOAxZdKWta4v0xj/1m6rklvfSEitkXE0437uyW9M814T5+7Ql9d0YuwHydp87jft6i/5nsPSY/bXmN7qNfNTGBeRGyTxv7zSDqmx/0cqOU03t10wDTjffPctTP9eVW9CPtEU0n10/jfORHxh5I+Ken6xstVTM6kpvHulgmmGe8L7U5/XlUvwr5F0oJxvx8vaWsP+phQRGxt3O6QtFz9NxX19ndm0G3c7uhxP/+vn6bxnmiacfXBc9fL6c97EfanJC20faLtwyRdJenhHvTxLrZnNT44ke1Zki5U/01F/bCkaxr3r5H0UA97+S39Mo13s2nG1ePnrufTn0dE138kXaKxT+RflvTVXvTQpK+TJD3T+Hmu171Jul9jL+v2auwV0XWSjpK0UtLGxu3cPurtO5LWSXpWY8Ga36PeztXYW8NnJa1t/FzS6+eu0FdXnjdOlwWS4Aw6IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUji/wAMzEe1QCF2zgAAAABJRU5ErkJggg==\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Prediction: 1\n", "\n", " Effectiveness of poison: 100.00%\n" ] } ], "source": [ "poison_x_test = x_test[is_poison_test]\n", "poison_y_test = y_test[is_poison_test]\n", "\n", "poison_preds = np.argmax(classifier.predict(poison_x_test), axis=1)\n", "poison_correct = np.sum(poison_preds == np.argmax(poison_y_test, axis=1))\n", "poison_total = poison_y_test.shape[0]\n", "\n", "# Display image, label, and prediction for a poisoned image to see the backdoor working\n", "\n", "c = 1 # class to display\n", "i = 0 # image of the class to display\n", "\n", "c_idx = np.where(np.argmax(poison_y_test,1) == c)[0][i] # index of the image in poison arrays\n", "\n", "plt.imshow(poison_x_test[c_idx].squeeze())\n", "plt.show()\n", "poison_label = c\n", "print(\"Prediction: \" + str(poison_preds[c_idx]))\n", "\n", "poison_acc = poison_correct / poison_total\n", "print(\"\\n Effectiveness of poison: %.2f%%\" % (poison_acc * 100))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Evaluate accuracy on entire test set" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Overall test set accuracy: 96.66%\n" ] } ], "source": [ "total_correct = clean_correct + poison_correct\n", "total = clean_total + poison_total\n", "\n", "total_acc = total_correct / total\n", "print(\"\\n Overall test set accuracy: %.2f%%\" % (total_acc * 100))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<!-- # Detect Poison Using Activation Defence -->\n", "<!--  -->" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": false }, "outputs": [], "source": [ "cleanse = NeuralCleanse(classifier)\n", "defence_cleanse = cleanse(classifier, steps=10, learning_rate=0.1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Identifying the Backdoor\n", "\n", "Unlike most defenses, part of the procedure for this defense is identifying exactly what the suspected backdoor is for each class. Below is the reverse-engineered backdoor. This will be appended to clean images to mimic backdoor behavior" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Generating backdoor for class 1: 100%|██████████| 10/10 [01:14<00:00, 7.46s/it]\n" ] }, { "data": { "text/plain": [ "<matplotlib.image.AxesImage at 0x7feb9433c3c8>" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAUGUlEQVR4nO3dbYxc1XkH8P9/Zt/wev2yGJvFNuCAEbihMbAxEFIUSgOGNAKqpsFSKa1QHDUgkSoqpUQqqO0HhJLQfGgjmWBhUgpKSyioIiGOC6KorcMajLFjbC/GL2sb29iYXdvsy8w8/bBDuoE9zx3PnZk79vn/pNXszjPn3rN355k7O88959DMICKnvlzWHRCRxlCyi0RCyS4SCSW7SCSU7CKRaGnkztrYbh3obOQuK0f68RRVCyZs+2SuiDCXcL5oa61+46NjbthKpao3zRb/qW+FQtXbztIwjmHURiZ9wqVKdpJLAXwfQB7AD83sQe/xHejE5bw2/IBc3t9hqXjCfawUW9vcuI2NVr3tXEeHGy8ND1e97azlpna5cc47Mxws+S9yNrDPjZeOHXPjnvys2W68uP9A1dvO0lpbE4xV/TaeZB7APwK4AcAiAMtILqp2eyJSX2n+Z18CoN/MtpvZKICnANxUm26JSK2lSfa5AHZP+HmgfN9vILmcZB/JvjGMpNidiKSRJtkn+xDgE/+EmdkKM+s1s95WtKfYnYikkSbZBwDMn/DzPAB703VHROolTbK/CmAhyQUk2wDcCuC52nRLRGqt6tKbmRVI3gXgBYyX3laa2aY0nWFrQu1zpPrSW8v8eW58+II5brz1cLg8ltv9rr/zpJptQj25niXHRAnXCJSGhtx4PndWMHbsvOlu2yk7B9x4klxXuCx4spbWAPh/E6eamarObmbPA3g+zTZEpDF0uaxIJJTsIpFQsotEQskuEgklu0gklOwikWjoeHaS7nBPzuvx2xfD45ft0PtuWzt+3I13bPFr5aPnnhGM5bpnuG1LSfViq35cdlpJ1x8UdqerdRc3bQnGOhKuykh7VLxrAEqfX+y2zb2y3o0fue1KNz7jR//jxlOpcv4DndlFIqFkF4mEkl0kEkp2kUgo2UUioWQXiURDS28Gf9pkHjritz+jO9x2pj9c0jr8WXIKW/rdeG5gTzBWrOM01GnlZ53uxrf/2dlu/Oy/TVd6y9KRPwmXx2Y87pfGzvjvhHLqPR+68a2PfNaNX/C1V924y5uF2RkNrTO7SCSU7CKRULKLRELJLhIJJbtIJJTsIpFQsotEoqF1dpjBRsJLQBWdGADg/fAw1vz5C9ymxYQ6eirNvORywjTVw/MTprFuYvkZ/rUVXi39onX+U3/zZf41H6v3PubGe//mz914GrnTnGHix8Pnb53ZRSKhZBeJhJJdJBJKdpFIKNlFIqFkF4mEkl0kEo2ts9dRcfsuN86WhOWgSwm18iyXTU7h6DUXuvGp2+r7FMgvuiAYK/5qa6ptF4984Mbtc58JxjZf9obbNjdlihu//ix/KurZiw65ce/ZlPRc5VnO8uI7w21T/aVJ7gAwhPG+F8ysN832RKR+avGyfo2ZvVeD7YhIHel/dpFIpE12A/BzkutILp/sASSXk+wj2TeGhGvfRaRu0r6Nv8rM9pKcDWA1ybfM7OWJDzCzFQBWAMA0djfxiBGRU1uqM7uZ7S3fHgDwDIAlteiUiNRe1clOspNk10ffA7gOwMZadUxEaivN2/g5AJ7h+JzpLQD+xcx+VpNeBXjLPZeGhxMaJ/yqJ2kdPclpz/7SjZf+8PJU2891dbnxtLV0T0vPmf4D9hwOhuz08BoEAFA8FG5biTS/d36uv3S5HXbG2hfDz+Oqk93MtgMIX7UgIk1FpTeRSCjZRSKhZBeJhJJdJBJKdpFINNUQ13xCOYTTp4VjQ8fctmlLKaeqzn9b68YLv3tZqu23/Oe6VO09pcEhN56bFX4+NfPzobBzt/8AZ8lmc0pvOrOLRELJLhIJJbtIJJTsIpFQsotEQskuEgklu0gkmqrO7tUPAfjLD+foNs1Pm+rGk6Ylrie7yp+WuHXnQX8Dba3BUGH7jip69P/2/k67Gz/3Gb9eXUq1d1/pmH9tRW7OGcFYfs5st21x/4Gq+tQQVQ7H1pldJBJKdpFIKNlFIqFkF4mEkl0kEkp2kUgo2UUi0VR1dkuom6JQCMfyCa9bzthmAMgnxIv97/jbd4xe7y9uW+zw+966I2k56eqr2Uk1/rFp/rZ3fck/bvM2nHCXaibVNQb0r9uAnXyLG+nMLhIJJbtIJJTsIpFQsotEQskuEgklu0gklOwikWiqOntivdiZE7t01K/RW8I84Ulz1rvLA7e3uW1H3ShwaJH/ZxiZfo4bP/2FtxP2EJZf95Yb//uVW9z4Qw/fWvW+s5Rf+Ck3PjJ/phuv53z49ZJ4Zie5kuQBkhsn3NdNcjXJbeVb/8iISOYqeRv/GIClH7vvXgBrzGwhgDXln0WkiSUmu5m9DODj74FvArCq/P0qADfXuF8iUmPVfkA3x8z2AUD5NjihF8nlJPtI9o1hpMrdiUhadf803sxWmFmvmfW2wp+8UETqp9pk30+yBwDKt008FaeIANUn+3MAbi9/fzuAZ2vTHRGpl8Q6O8knAXwBwCySAwDuB/AggB+TvAPALgBfqUVnSt688ABYDNfhbSyhmp0wJz1b/ENh3dPDwTFnnD2A3df52566ww1j6oD/u6WZ49wuXujG//qVT/sbuNT/3cMzt2eLR4+78YFr5rjxs8cuceO5/3r9hPtUqVxHRzDG4fA4/MRkN7NlgdC1ib0Skaahy2VFIqFkF4mEkl0kEkp2kUgo2UUi0WRDXP2laM2JJ5bOvGmoAQzcep4bn/tkfzBWmu8v/zsroQrTccgvOba/Ft43AFS3gO+4/Lvvu/Ff/N5TbvwP/uGeFHvPTmlwyI2fdcVeNz5w4TQ3vuANP14cHHTjntLwcDBmzhTXOrOLRELJLhIJJbtIJJTsIpFQsotEQskuEgklu0gkmqvOnkJSHb1w7WVufEa/337H8vODsQWP7XLbjlzh11xnbPGnwS4e+cCNp7H3y2e78aVP/qUb73nHP27NiufMdeNH/n2WG/+Pex5y43eONN+gUJ3ZRSKhZBeJhJJdJBJKdpFIKNlFIqFkF4mEkl0kEo2vs3tTOieMZ/e0zPPrpljjL7H71c3+dMxPLwpPLbz3zivdtvnh8BhjACi1+K+54cmB0+v5121u/OCXw9cXAEDndn9cdsIi3Jmx7f61EUPf7nTjD+3/ohs/+vu/7cY7n17rxj3e8uE8GE5pndlFIqFkF4mEkl0kEkp2kUgo2UUioWQXiYSSXSQSja+zW30qr4WBPW585IbPuvGn7vZr4cNfDR+qnhcPuW3f/8xMN54bS5gv342mU1wQrtkCQNtRf+/FrvDywUB9rxFIg11dbnzGS/7v9XdX/8KN3/b0hyfcp4/kz1/gxgtv7wjGrBSeXyDxzE5yJckDJDdOuO8BkntIri9/3Zi0HRHJViVv4x8DsHSS+x82s8Xlr+dr2y0RqbXEZDezlwEcbkBfRKSO0nxAdxfJDeW3+cF/SkkuJ9lHsm8MIyl2JyJpVJvsPwBwHoDFAPYB+G7ogWa2wsx6zay3Fe1V7k5E0qoq2c1sv5kVzawE4BEAS2rbLRGptaqSnWTPhB9vAbAx9FgRaQ6JdXaSTwL4AoBZJAcA3A/gCyQXY7wEvAPA1+vYx5po/+mrbnxw2RVuvG0ofH2A0a8md+4bdeOWMJ69nvJHjrvx1mN+vbml31/HPM3a8Unyp3f7D/DmTkgweK0/l/+VL93lxs/H61Xvu+jU0QGg5czw3AreePbEZDezZZPc/WhSOxFpLrpcViQSSnaRSCjZRSKhZBeJhJJdJBKnzJLNac3YdMSNF6eGr/7jiF9aaxlMKAHl/Nfceg5xfWdZuIwDADPf8ockd7a21rI7J6Yl4ek7Er48u3jhOW5T9vtTSdv8+l36ffxm/xq1Kc+Ep6E2SzHEVURODUp2kUgo2UUioWQXiYSSXSQSSnaRSCjZRSKRwVTS9awaV6+04S03PrY0PBX1kYV+rXp0uj8EtnXIPybd/uhc18iX/Cm0p/f7dfRSQhk9aQrvVBKGDhf3+8tse3Lrt7rx8/fMcuOlIx/48YT9t5wzPxjresuf8rHU2hYOjoWPmc7sIpFQsotEQskuEgklu0gklOwikVCyi0RCyS4SiYbW2Uki1xGemrjkjD9OVOf6PUvh7edH/X2z4NeLTzucbsJlb4nf9p++5rYd+OeL3fjCh/yx+mkW4PamRAYAm+4vq1zc0l/1vkvDw35890DV265EYefuYIxeHR2AFcacYPi5qDO7SCSU7CKRULKLRELJLhIJJbtIJJTsIpFQsotEorHj2XM5sCM8/zpGnfohAJTquQCwrzAl/LrYetyvNlvCtPFdG/b7+/abo9j/TjC29Z/8Ocgvuv89Nz508Rlu/LTOxW78+Jnhv/fwDP9cM3PLh248t8UNn7Tys/2x9HY8fFz4QfjJlnhmJzmf5IskN5PcRPLu8v3dJFeT3Fa+nZm0LRHJTiVv4wsAvmVmFwG4AsCdJBcBuBfAGjNbCGBN+WcRaVKJyW5m+8zstfL3QwA2A5gL4CYAq8oPWwXg5np1UkTSO6EP6EieC+ASAGsBzDGzfcD4CwKA2YE2y0n2kewbNf9/MBGpn4qTneRUAE8D+KaZDVbazsxWmFmvmfW28bRq+igiNVBRspNsxXiiP2FmPynfvZ9kTzneA6D6qT5FpO4SS28kCeBRAJvN7HsTQs8BuB3Ag+XbZxP3RgLt4VJMrs0f4loaDpfeclOm+PvO+/Wv0m+Fh4kCQOtgeN/tu/ypfzF0zA0XUkyJnOSCb/zSjQ/dcrkbH5viD889fLV/3EvOaM1Cpz80uPOAP9QzPFj65JY0Rbb/XA8f00rq7FcBuA3AmyTXl++7D+NJ/mOSdwDYBeArFWxLRDKSmOxm9gqA0Mv7tbXtjojUiy6XFYmEkl0kEkp2kUgo2UUioWQXiURjh7haCfgwPIWvFaufmLh0/Lgb3/b4pW6863W/altyjlR3pz8MtP29aW48f9Svw5eO+XFP0rTEnTuOuvEt3/Dr6Lkhf/+lmeFhy/nD/nrQrUMJg3tzCWOHMxwSncbPdvW58evPCg8rNgvnkM7sIpFQsotEQskuEgklu0gklOwikVCyi0RCyS4SiYbW2a1YQtGpKTNhzLln6NYr3DgThpz/8R0vuPGXll4YjI0s9Jcefm+xX6vueWeqG09TZ7cxf8nl/NvhpYMB4KLvTDrb2K8dWexPe9wyHP6bDs/0x8q3rtvmxktOTbnZvbB3fTDm1dEB4N2/+FwwNvbE/wZjOrOLRELJLhIJJbtIJJTsIpFQsotEQskuEgklu0gkaObP3V1L09htl1MT0n7CKTouO62ksfj5Of48AoWBPVXvO2kdgqT5E7w6OuDX0tO0XWtrMGiHJ72AQWd2kUgo2UUioWQXiYSSXSQSSnaRSCjZRSKhZBeJRCXrs88H8DiAMwGUAKwws++TfADA1wAcLD/0PjN7vl4dPaVFWkdPkjQWv7j/oBv3sMV/6tezjp7UPk3bJdeH+13J5BUFAN8ys9dIdgFYR3J1OfawmX2ngm2ISMYqWZ99H4B95e+HSG4GMLfeHROR2jqh/9lJngvgEgBry3fdRXIDyZUkZwbaLCfZR7JvDCOpOisi1as42UlOBfA0gG+a2SCAHwA4D8BijJ/5vztZOzNbYWa9ZtbbivYadFlEqlFRspNsxXiiP2FmPwEAM9tvZkUbX0nuEQBL6tdNEUkrMdlJEsCjADab2fcm3N8z4WG3ANhY++6JSK1U8mn8VQBuA/AmyY8+878PwDKSiwEYgB0Avl6XHjaJ/LTwssvFwUG3ba6z042nmSo6ZlYILwed3NZfDvqet99042nKY0nt07TdaoeCsUo+jX8FwGTjY1VTFzmJ6Ao6kUgo2UUioWQXiYSSXSQSSnaRSCjZRSLRXFNJp5lSOaFt0nLQ7Ei4lHcsXNMtjSRc89/AY3xKob+kM1ta3XjSENlTkaaSFhElu0gslOwikVCyi0RCyS4SCSW7SCSU7CKRaGidneRBADsn3DULwHsN68CJada+NWu/APWtWrXs2zlmNula1g1N9k/snOwzs97MOuBo1r41a78A9a1ajeqb3saLRELJLhKJrJN9Rcb79zRr35q1X4D6Vq2G9C3T/9lFpHGyPrOLSIMo2UUikUmyk1xKcgvJfpL3ZtGHEJI7SL5Jcj3Jvoz7spLkAZIbJ9zXTXI1yW3l20nX2Muobw+Q3FM+dutJ3phR3+aTfJHkZpKbSN5dvj/TY+f0qyHHreH/s5PMA9gK4IsABgC8CmCZmf2qoR0JILkDQK+ZZX4BBsmrARwF8LiZfbp830MADpvZg+UXyplm9ldN0rcHABzNehnv8mpFPROXGQdwM4A/RYbHzunXH6EBxy2LM/sSAP1mtt3MRgE8BeCmDPrR9MzsZQCHP3b3TQBWlb9fhfEnS8MF+tYUzGyfmb1W/n4IwEfLjGd67Jx+NUQWyT4XwO4JPw+gudZ7NwA/J7mO5PKsOzOJOWa2Dxh/8gCYnXF/Pi5xGe9G+tgy401z7KpZ/jytLJJ9svmxmqn+d5WZXQrgBgB3lt+uSmUqWsa7USZZZrwpVLv8eVpZJPsAgPkTfp4HYG8G/ZiUme0t3x4A8Ayabynq/R+toFu+PZBxf36tmZbxnmyZcTTBscty+fMskv1VAAtJLiDZBuBWAM9l0I9PINlZ/uAEJDsBXIfmW4r6OQC3l7+/HcCzGfblNzTLMt6hZcaR8bHLfPlzM2v4F4AbMf6J/NsAvp1FHwL9+hSAN8pfm7LuG4AnMf62bgzj74juAHA6gDUAtpVvu5uobz8C8CaADRhPrJ6M+vZ5jP9ruAHA+vLXjVkfO6dfDTluulxWJBK6gk4kEkp2kUgo2UUioWQXiYSSXSQSSnaRSCjZRSLxf/HjQnXJQgiqAAAAAElFTkSuQmCC\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "pattern, mask = defence_cleanse.generate_backdoor(x_test, y_test, np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0]))\n", "plt.imshow(np.squeeze(mask * pattern))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Usually `generate_backdoor` is called as a result of calling `mitigate`. During this process, this defense generates a suspected backdoor for each class visualized above. The `mitigate` method also performs the mitigation types presented below." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Mitigation Types\n", "\n", "There are different mitigation methods that are described below." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Filtering\n", "\n", "Filtering is the process of abstaining from potentially poisonous predictions at runtime. When this method is set, neurons are ranked by their association with the backdoor, and when neural activations are higher than normal, the classifier abstains from predication (output is all zeros)." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Generating backdoor for class 0: 100%|██████████| 10/10 [01:10<00:00, 7.04s/it]\n", "Generating backdoor for class 1: 100%|██████████| 10/10 [01:09<00:00, 6.95s/it]\n", "Generating backdoor for class 2: 100%|██████████| 10/10 [01:09<00:00, 7.00s/it]\n", "Generating backdoor for class 3: 100%|██████████| 10/10 [01:10<00:00, 7.02s/it]\n", "Generating backdoor for class 4: 100%|██████████| 10/10 [01:09<00:00, 6.95s/it]\n", "Generating backdoor for class 5: 100%|██████████| 10/10 [01:09<00:00, 6.96s/it]\n", "Generating backdoor for class 6: 100%|██████████| 10/10 [01:09<00:00, 6.97s/it]\n", "Generating backdoor for class 7: 100%|██████████| 10/10 [01:11<00:00, 7.16s/it]\n", "Generating backdoor for class 8: 100%|██████████| 10/10 [01:14<00:00, 7.43s/it]\n", "Generating backdoor for class 9: 100%|██████████| 10/10 [01:11<00:00, 7.17s/it]\n" ] } ], "source": [ "defence_cleanse = cleanse(classifier, steps=10, learning_rate=0.1)\n", "defence_cleanse.mitigate(clean_x_test, clean_y_test, mitigation_types=[\"filtering\"])" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Filtered 500/559 poison samples (89.45% effective)\n" ] } ], "source": [ "poison_pred = defence_cleanse.predict(poison_x_test)\n", "num_filtered = np.sum(np.all(poison_pred == np.zeros(10), axis=1))\n", "num_poison = len(poison_pred)\n", "effectiveness = float(num_filtered) / num_poison * 100\n", "print(\"Filtered {}/{} poison samples ({:.2f}% effective)\".format(num_filtered, num_poison, effectiveness))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Unlearning\n", "\n", "Unlearning is the process of retraining the backdoors with the correct label for one epoch. This works best for Trojan-style triggers that react to a specific neuron configuration." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Generating backdoor for class 0: 100%|██████████| 10/10 [01:16<00:00, 7.68s/it]\n", "Generating backdoor for class 1: 100%|██████████| 10/10 [01:11<00:00, 7.17s/it]\n", "Generating backdoor for class 2: 100%|██████████| 10/10 [01:13<00:00, 7.39s/it]\n", "Generating backdoor for class 3: 100%|██████████| 10/10 [01:12<00:00, 7.24s/it]\n", "Generating backdoor for class 4: 100%|██████████| 10/10 [01:12<00:00, 7.28s/it]\n", "Generating backdoor for class 5: 100%|██████████| 10/10 [01:09<00:00, 7.00s/it]\n", "Generating backdoor for class 6: 100%|██████████| 10/10 [01:10<00:00, 7.01s/it]\n", "Generating backdoor for class 7: 100%|██████████| 10/10 [01:11<00:00, 7.18s/it]\n", "Generating backdoor for class 8: 100%|██████████| 10/10 [01:11<00:00, 7.13s/it]\n", "Generating backdoor for class 9: 100%|██████████| 10/10 [01:10<00:00, 7.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/1\n", " 5/4129 [..............................] - ETA: 1:03 - loss: 2.7731 - acc: 0.6000 " ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "4129/4129 [==============================] - 70s 17ms/step - loss: 0.0115 - acc: 0.9981\n" ] } ], "source": [ "defence_cleanse = cleanse(classifier, steps=10, learning_rate=0.1)\n", "defence_cleanse.mitigate(clean_x_test, clean_y_test, mitigation_types=[\"unlearning\"])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Effectiveness of poison after unlearning: 5.19% (previously 100.00%)\n", "\n", " Clean test set accuracy: 54.14% (previously 96.47%)\n" ] } ], "source": [ "poison_preds = np.argmax(classifier.predict(poison_x_test), axis=1)\n", "poison_correct = np.sum(poison_preds == np.argmax(poison_y_test, axis=1))\n", "poison_total = poison_y_test.shape[0]\n", "new_poison_acc = poison_correct / poison_total\n", "print(\"\\n Effectiveness of poison after unlearning: %.2f%% (previously %.2f%%)\" % (new_poison_acc * 100, poison_acc * 100))\n", "clean_preds = np.argmax(classifier.predict(clean_x_test), axis=1)\n", "clean_correct = np.sum(clean_preds == np.argmax(clean_y_test, axis=1))\n", "clean_total = clean_y_test.shape[0]\n", "\n", "new_clean_acc = clean_correct / clean_total\n", "print(\"\\n Clean test set accuracy: %.2f%% (previously %.2f%%)\" % (new_clean_acc * 100, clean_acc * 100))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pruning\n", "\n", "Pruning is the process of zero-ing out neurons strongly associated with backdoor behavior until the backdoor is ineffective or 30% of all neurons have been pruned. Be careful as this can negatively affect the accuracy of your model. This works best for fully mitigating the effects of backdoor poisoning attacks." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Generating backdoor for class 0: 100%|██████████| 10/10 [01:15<00:00, 7.57s/it]\n", "Generating backdoor for class 1: 100%|██████████| 10/10 [01:14<00:00, 7.49s/it]\n", "Generating backdoor for class 2: 100%|██████████| 10/10 [01:14<00:00, 7.49s/it]\n", "Generating backdoor for class 3: 100%|██████████| 10/10 [01:16<00:00, 7.61s/it]\n", "Generating backdoor for class 4: 100%|██████████| 10/10 [01:15<00:00, 7.51s/it]\n", "Generating backdoor for class 5: 100%|██████████| 10/10 [01:11<00:00, 7.17s/it]\n", "Generating backdoor for class 6: 100%|██████████| 10/10 [01:11<00:00, 7.19s/it]\n", "Generating backdoor for class 7: 100%|██████████| 10/10 [01:11<00:00, 7.18s/it]\n", "Generating backdoor for class 8: 100%|██████████| 10/10 [01:10<00:00, 7.06s/it]\n", "Generating backdoor for class 9: 100%|██████████| 10/10 [01:10<00:00, 7.02s/it]\n" ] } ], "source": [ "defence_cleanse = cleanse(classifier, steps=10, learning_rate=0.1)\n", "defence_cleanse.mitigate(clean_x_test, clean_y_test, mitigation_types=[\"pruning\"])" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Effectiveness of poison after pruning: 0.00% (previously 100.00%)\n", "\n", " Clean test set accuracy: 46.46% (previously 96.47%)\n" ] } ], "source": [ "poison_preds = np.argmax(classifier.predict(poison_x_test), axis=1)\n", "poison_correct = np.sum(poison_preds == np.argmax(poison_y_test, axis=1))\n", "poison_total = poison_y_test.shape[0]\n", "new_poison_acc = poison_correct / poison_total\n", "print(\"\\n Effectiveness of poison after pruning: %.2f%% (previously %.2f%%)\" % (new_poison_acc * 100, poison_acc * 100))\n", "clean_preds = np.argmax(classifier.predict(clean_x_test), axis=1)\n", "clean_correct = np.sum(clean_preds == np.argmax(clean_y_test, axis=1))\n", "clean_total = clean_y_test.shape[0]\n", "\n", "new_clean_acc = clean_correct / clean_total\n", "print(\"\\n Clean test set accuracy: %.2f%% (previously %.2f%%)\" % (new_clean_acc * 100, clean_acc * 100))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Combination\n", "\n", "Finally, you can also do a combination of any of the above mitigation methods to fit your needs. Just add those types to the `mitigation_types` list." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Generating backdoor for class 0: 100%|██████████| 10/10 [01:12<00:00, 7.25s/it]\n", "Generating backdoor for class 1: 100%|██████████| 10/10 [01:13<00:00, 7.38s/it]\n", "Generating backdoor for class 2: 100%|██████████| 10/10 [01:10<00:00, 7.07s/it]\n", "Generating backdoor for class 3: 100%|██████████| 10/10 [01:10<00:00, 7.04s/it]\n", "Generating backdoor for class 4: 100%|██████████| 10/10 [01:13<00:00, 7.35s/it]\n", "Generating backdoor for class 5: 100%|██████████| 10/10 [01:10<00:00, 7.07s/it]\n", "Generating backdoor for class 6: 100%|██████████| 10/10 [01:10<00:00, 7.04s/it]\n", "Generating backdoor for class 7: 100%|██████████| 10/10 [01:12<00:00, 7.26s/it]\n", "Generating backdoor for class 8: 100%|██████████| 10/10 [01:12<00:00, 7.28s/it]\n", "Generating backdoor for class 9: 100%|██████████| 10/10 [01:10<00:00, 7.09s/it]\n" ] } ], "source": [ "defence_cleanse.mitigate(clean_x_test, clean_y_test, mitigation_types=[\"pruning\", \"filtering\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.10" } }, "nbformat": 4, "nbformat_minor": 2 }