{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Lab: Retraining a pre-trained model\n", "\n", "We will retrain a pre-trained model to classify cats-and-dogs!\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elephantscale/cool-ML-demos/blob/main/transfer-learning/transfer3-training-a-pretrained-model.ipynb)\n", "\n", "### Runtime\n", "~ 30 minutes\n", "\n", "### Note\n", "Here we are dealing with real world images. Processing them will required a lot of compute power. \n", "If you have access to, switch to **GPU** as run time!\n", "\n", "### References\n", "- https://www.tensorflow.org/tutorials/images/transfer_learning\n", "- https://www.learnopencv.com/keras-tutorial-using-pre-trained-imagenet-models/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "try:\n", " # %tensorflow_version only exists in Colab.\n", " %tensorflow_version 2.x\n", "except Exception:\n", " pass\n", "\n", "import tensorflow as tf\n", "from tensorflow import keras\n", "print ('tensorflow version :', tf.__version__)\n", "tf.config.experimental.list_physical_devices()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Loading our custom utils files\n", "import os\n", "import sys\n", "from pathlib import Path\n", "\n", "# Hack to download image utils when running on Colab ..etc\n", "import os\n", "import urllib.request\n", "\n", "file_url = 'https://raw.githubusercontent.com/elephantscale/es-public/master/deep-learning/transfer_learning_utils.py'\n", "file_location = \"transfer_learning_utils.py\"\n", "\n", "if not os.path.exists (file_location):\n", " file_location = os.path.basename(file_location)\n", " if not os.path.exists(file_location):\n", " print(\"Downloading : \", file_url)\n", " urllib.request.urlretrieve(file_url, file_location)\n", "# print('file_location:', file_location)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Loading our custom utils files\n", "import os\n", "import sys\n", "from pathlib import Path\n", "\n", "# Hack to download image utils when running on Colab ..etc\n", "import os\n", "import urllib.request\n", "\n", "file_url = 'https://raw.githubusercontent.com/elephantscale/es-public/master/deep-learning/image_utils.py'\n", "file_location = \"image_utils.py\"\n", "\n", "if not os.path.exists (file_location):\n", " file_location = os.path.basename(file_location)\n", " if not os.path.exists(file_location):\n", " print(\"Downloading : \", file_url)\n", " urllib.request.urlretrieve(file_url, file_location)\n", "# print('file_location:', file_location)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "m6sLPjRC3vPG" }, "source": [ "## TF-GPU Config\n", "The following cell sets TF properties to run on GPU" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## This block is to tweak TF running on GPU\n", "## You may comment this out, if you are not using GPU\n", "\n", "## ---- start Memory setting ----\n", "## Ask TF not to allocate all GPU memory at once.. allocate as needed\n", "## Without this the execution will fail with \"failed to initialize algorithm\" error\n", "\n", "from tensorflow.compat.v1.keras.backend import set_session\n", "config = tf.compat.v1.ConfigProto()\n", "config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU\n", "config.log_device_placement = True # to log device placement (on which device the operation ran)\n", "sess = tf.compat.v1.Session(config=config)\n", "set_session(sess)\n", "## ---- end Memory setting ----" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1 - Download Data\n", "We will use cat-dog-redux dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Common constants\n", "\n", "IMG_WIDTH=160\n", "IMG_HEIGHT=160\n", "NUM_CLASSES=2\n", "BATCH_SIZE=64\n", "APP_NAME = 'retrain'\n", "EPOCHS = 3" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "data_location = 'https://elephantscale-public.s3.amazonaws.com/data/images/cat-dog-redux.zip'\n", "\n", "data_location_local = keras.utils.get_file(fname=os.path.basename(data_location),\n", " origin=data_location, extract=True)\n", "print ('local download file: ', data_location_local)\n", "data_dir = os.path.join(os.path.dirname(data_location_local), 'cat-dog-redux')\n", "print ('local data dir: ', data_dir)\n", "train_dir = os.path.join(data_dir, 'train')\n", "validation_dir = os.path.join(data_dir, 'val')\n", "print ('train dir:', train_dir)\n", "print ('validation dir:', validation_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2 - Setup Data Generators" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n", "\n", "# train_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our training data\n", "\n", "train_image_generator = ImageDataGenerator(\n", " rescale=1./255,\n", " rotation_range=45,\n", " width_shift_range = 0.2,\n", " height_shift_range = 0.2,\n", " shear_range = 0.2,\n", " zoom_range = 0.2,\n", " horizontal_flip = True)\n", "\n", "\n", "validation_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data\n", "\n", "train_data_gen = train_image_generator.flow_from_directory(batch_size=BATCH_SIZE,\n", " directory=train_dir,\n", " shuffle=True,\n", " target_size=(IMG_HEIGHT, IMG_WIDTH),\n", " class_mode='categorical'\n", " )\n", "\n", "\n", "val_data_gen = validation_image_generator.flow_from_directory(batch_size=BATCH_SIZE,\n", " directory=validation_dir,\n", " target_size=(IMG_HEIGHT, IMG_WIDTH),\n", " class_mode='categorical'\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3 - Download Model\n", "Let's try the InceptionV3 model. We will only download the 'base' model, without the final classifying layers" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transfer_learning_utils import print_model_summary_compact\n", "\n", "pre_trained_model = tf.keras.applications.InceptionV3(input_shape=(IMG_WIDTH,IMG_HEIGHT,3), \n", " include_top = False,\n", " weights = 'imagenet')\n", "print_model_summary_compact(pre_trained_model)\n", "print ()\n", "\n", "! du -skh ~/.keras/models/* | grep inception" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# pre_trained_model.summary() # large output" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Inspect the model and freeze the layers" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# last_layer = pre_trained_model.get_layer('mixed7')\n", "last_layer = pre_trained_model.layers[-1]\n", "\n", "print (\"last_layer.name:\", last_layer.name)\n", "print ('last_layer.output.shape:', last_layer.output_shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Freeze all layers\n", "# for layer in pre_trained_model.layers:\n", "# layer.trainable = False\n", "\n", "# or this works too\n", "pre_trained_model.trainable = False\n", "\n", "print_model_summary_compact (pre_trained_model)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4 - Create our own model\n", "\n", "We are going to add a ccouple of layers\n", "- One fully connected layer\n", "- And a softmax layer\n", "\n", "On top of the model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Input, Dense, Softmax, Flatten, GlobalAveragePooling2D, Dropout\n", "from transfer_learning_utils import print_model_summary_compact\n", "\n", "model = Sequential ([\n", " Input (shape=(IMG_WIDTH, IMG_HEIGHT, 3)),\n", " pre_trained_model, \n", " Flatten(), \n", " #GlobalAveragePooling2D(),\n", " Dense(512, activation='relu'),\n", " Dropout(0.5),\n", " Dense(NUM_CLASSES, activation='softmax')\n", " ])\n", "\n", "model.compile (loss='categorical_crossentropy',\n", " optimizer= 'adam',\n", " metrics=['accuracy'])\n", "\n", "print_model_summary_compact(model)\n", "print()\n", "# my_model.summary() # large output\n", "\n", "tf.keras.utils.plot_model(model, to_file='model.png', show_shapes=True)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "sU4Z4MYG3vPh" }, "source": [ "## Step 5 - Setup Tensorboard" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## This is fairly boiler plate code\n", "\n", "import datetime\n", "import os\n", "import shutil\n", "\n", "app_name = APP_NAME\n", "\n", "\n", "# timestamp = datetime.datetime.now().strftime(\"%Y-%m-%d--%H-%M-%S\")\n", "\n", "tb_top_level_dir= '/tmp/tensorboard-logs'\n", "\n", "tb_app_dir = os.path.join (tb_top_level_dir, app_name)\n", "\n", "tb_logs_dir = os.path.join (tb_app_dir, datetime.datetime.now().strftime(\"%H-%M-%S\"))\n", "\n", "\n", "print (\"Saving TB logs to : \" , tb_logs_dir)\n", "\n", "#clear out old logs\n", "shutil.rmtree ( tb_app_dir, ignore_errors=True )\n", "\n", "tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_logs_dir, write_graph=True, \n", " write_images=True, histogram_freq=1)\n", "\n", "## This will embed Tensorboard right here in jupyter!\n", "# ! killall tensorboard # kill previously running tensorboards\n", "%load_ext tensorboard\n", "%tensorboard --logdir $tb_logs_dir" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "cflnm-v23vPj" }, "source": [ "## Step 6 : Train" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%time \n", "\n", "steps_per_epoch = train_data_gen.n // train_data_gen.batch_size\n", "validation_steps = val_data_gen.n // val_data_gen.batch_size\n", "print ('steps_per_epocoh:', steps_per_epoch)\n", "print ('validation_steps:', validation_steps)\n", "\n", "history = model.fit(\n", " train_data_gen,\n", " steps_per_epoch= steps_per_epoch,\n", " epochs=10,\n", " validation_data=val_data_gen,\n", " validation_steps=validation_steps,\n", " callbacks = [tensorboard_callback]\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Save the model for reuse later\n", "As you can see training takes a long time. \n", "Let's save the resulting model, so we can use it quickly without going through training again." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "model_file = APP_NAME + '-model.h5'\n", "model.save(model_file)\n", "\n", "model_size_in_bytes = os.path.getsize(model_file)\n", "print (\"model saved as '{}', size = {:,f} bytes / {:,.1f} KB / {:,.1f} MB\".format(model_file, \n", " model_size_in_bytes, model_size_in_bytes / 1e3, \n", " model_size_in_bytes / 1e6 ))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "a9kZ2ul33vPm" }, "source": [ "## Step 7 : See Training History" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "plt.plot(history.history['accuracy'], label='train_accuracy')\n", "plt.plot(history.history['val_accuracy'], label='val_accuracy')\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "Kmzupp1d3vPu" }, "source": [ "## Step 8 : Evaluate the Model " ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "xF81OSSQ3vPv" }, "source": [ "### 8.1 - Metrics" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 69 }, "colab_type": "code", "id": "K8JoGQPJ3vPv", "outputId": "f8a19588-5045-4416-f0f3-9cdda95f376f" }, "outputs": [], "source": [ "from math import ceil\n", "\n", "\n", "metrics = model.evaluate(val_data_gen, batch_size=val_data_gen.batch_size, steps=ceil(val_data_gen.n // val_data_gen.batch_size) )\n", "\n", "metric_names = model.metrics_names\n", "print (\"model metrics : \" , metric_names)\n", "\n", "for idx, metric in enumerate(metric_names):\n", " print (\"Metric : {} = {:,.3f}\".format (metric_names[idx], metrics[idx]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 8.2 - Predictions and Confusion Matrix" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from math import ceil\n", "\n", "\n", "print (\"predicting on {:,} test images\".format(val_data_gen.n))\n", "# we need a ceiling for steps\n", "predictions = model.predict(val_data_gen, batch_size=val_data_gen.batch_size, \n", " steps=ceil(val_data_gen.n / val_data_gen.batch_size) )\n", "print( 'predictions.shape: ', predictions.shape)\n", "\n", "\n", "if val_data_gen.class_mode == 'categorical':\n", " # converting softmax --> classes\n", " print (\"convering softmax --> classes\")\n", " predictions2 = [ np.argmax(p) for p in predictions]\n", "\n", "if val_data_gen.class_mode == 'binary':\n", " # converting sigmoid --> classes\n", " print (\"converting sigmod --> binary\")\n", " predictions2 = [0 if n < 0.5 else 1 for n in predictions]\n", "\n", "\n", "# ## Ensure all predictions match\n", "assert(len(predictions) == len(predictions2) == len(val_data_gen.classes) )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "np.set_printoptions(formatter={'float': '{: 0.2f}'.format})\n", "\n", "print ('predictions : ' , predictions[:10])\n", "print ('prediction2: ' , predictions2[:10])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import confusion_matrix\n", "import seaborn as sns\n", "\n", "test_labels = val_data_gen.classes\n", "cm = confusion_matrix(test_labels, predictions2, labels = range(0, NUM_CLASSES))\n", "cm" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "print (\"class index mapping : \", val_data_gen.class_indices)\n", "\n", "plt.figure(figsize = (8,6))\n", "\n", "# colormaps : cmap=\"YlGnBu\" , cmap=\"Greens\", cmap=\"Blues\", cmap=\"Reds\"\n", "sns.heatmap(cm, annot=True, cmap=\"Reds\", fmt='d').plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cleanup \n", "Before running the next exercise, run the following cell to terminate processes and free up resources" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Kill any child processes (like tensorboard)\n", "\n", "import psutil\n", "import os, signal\n", "\n", "current_process = psutil.Process()\n", "children = current_process.children(recursive=True)\n", "for child in children:\n", " print('Killing Child pid {}'.format(child.pid))\n", " os.kill(child.pid, signal.SIGKILL)\n", " \n", "## This will kill actual kernel itself\n", "# os.kill(os.getpid(), signal.SIGKILL)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.11" } }, "nbformat": 4, "nbformat_minor": 4 }