{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Lab: Retraining a pre-trained model\n",
    "\n",
    "We will retrain a pre-trained model to classify cats-and-dogs!\n",
    "\n",
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elephantscale/cool-ML-demos/blob/main/transfer-learning/transfer3-training-a-pretrained-model.ipynb)\n",
    "\n",
    "### Runtime\n",
    "~ 30 minutes\n",
    "\n",
    "### Note\n",
    "Here we are dealing with real world images.  Processing them will required a lot of compute power.  \n",
    "If you have access to, switch to **GPU** as run time!\n",
    "\n",
    "### References\n",
    "- https://www.tensorflow.org/tutorials/images/transfer_learning\n",
    "- https://www.learnopencv.com/keras-tutorial-using-pre-trained-imagenet-models/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "  # %tensorflow_version only exists in Colab.\n",
    "  %tensorflow_version 2.x\n",
    "except Exception:\n",
    "  pass\n",
    "\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "print ('tensorflow version :', tf.__version__)\n",
    "tf.config.experimental.list_physical_devices()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Loading our custom utils files\n",
    "import os\n",
    "import sys\n",
    "from pathlib import Path\n",
    "\n",
    "# Hack to download image utils when running on Colab ..etc\n",
    "import os\n",
    "import urllib.request\n",
    "\n",
    "file_url = 'https://raw.githubusercontent.com/elephantscale/es-public/master/deep-learning/transfer_learning_utils.py'\n",
    "file_location = \"transfer_learning_utils.py\"\n",
    "\n",
    "if not os.path.exists (file_location):\n",
    "    file_location = os.path.basename(file_location)\n",
    "    if not os.path.exists(file_location):\n",
    "        print(\"Downloading : \", file_url)\n",
    "        urllib.request.urlretrieve(file_url, file_location)\n",
    "# print('file_location:', file_location)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Loading our custom utils files\n",
    "import os\n",
    "import sys\n",
    "from pathlib import Path\n",
    "\n",
    "# Hack to download image utils when running on Colab ..etc\n",
    "import os\n",
    "import urllib.request\n",
    "\n",
    "file_url = 'https://raw.githubusercontent.com/elephantscale/es-public/master/deep-learning/image_utils.py'\n",
    "file_location = \"image_utils.py\"\n",
    "\n",
    "if not os.path.exists (file_location):\n",
    "    file_location = os.path.basename(file_location)\n",
    "    if not os.path.exists(file_location):\n",
    "        print(\"Downloading : \", file_url)\n",
    "        urllib.request.urlretrieve(file_url, file_location)\n",
    "# print('file_location:', file_location)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "m6sLPjRC3vPG"
   },
   "source": [
    "## TF-GPU Config\n",
    "The following cell sets TF properties to run on GPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## This block is to tweak TF running on GPU\n",
    "## You may comment this out, if you are not using GPU\n",
    "\n",
    "## ---- start Memory setting ----\n",
    "## Ask TF not to allocate all GPU memory at once.. allocate as needed\n",
    "## Without this the execution will fail with \"failed to initialize algorithm\" error\n",
    "\n",
    "from tensorflow.compat.v1.keras.backend import set_session\n",
    "config = tf.compat.v1.ConfigProto()\n",
    "config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU\n",
    "config.log_device_placement = True  # to log device placement (on which device the operation ran)\n",
    "sess = tf.compat.v1.Session(config=config)\n",
    "set_session(sess)\n",
    "## ---- end Memory setting ----"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1 - Download Data\n",
    "We will use cat-dog-redux dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Common constants\n",
    "\n",
    "IMG_WIDTH=160\n",
    "IMG_HEIGHT=160\n",
    "NUM_CLASSES=2\n",
    "BATCH_SIZE=64\n",
    "APP_NAME = 'retrain'\n",
    "EPOCHS = 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "data_location = 'https://elephantscale-public.s3.amazonaws.com/data/images/cat-dog-redux.zip'\n",
    "\n",
    "data_location_local = keras.utils.get_file(fname=os.path.basename(data_location),\n",
    "                                           origin=data_location, extract=True)\n",
    "print ('local download file: ', data_location_local)\n",
    "data_dir = os.path.join(os.path.dirname(data_location_local), 'cat-dog-redux')\n",
    "print ('local data dir: ', data_dir)\n",
    "train_dir = os.path.join(data_dir, 'train')\n",
    "validation_dir = os.path.join(data_dir, 'val')\n",
    "print ('train dir:', train_dir)\n",
    "print ('validation dir:', validation_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2 - Setup Data Generators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n",
    "\n",
    "# train_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our training data\n",
    "\n",
    "train_image_generator = ImageDataGenerator(\n",
    "                            rescale=1./255,\n",
    "                            rotation_range=45,\n",
    "                            width_shift_range = 0.2,\n",
    "                            height_shift_range = 0.2,\n",
    "                            shear_range = 0.2,\n",
    "                            zoom_range = 0.2,\n",
    "                            horizontal_flip = True)\n",
    "\n",
    "\n",
    "validation_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data\n",
    "\n",
    "train_data_gen = train_image_generator.flow_from_directory(batch_size=BATCH_SIZE,\n",
    "                                                           directory=train_dir,\n",
    "                                                           shuffle=True,\n",
    "                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),\n",
    "                                                           class_mode='categorical'\n",
    "                                                          )\n",
    "\n",
    "\n",
    "val_data_gen = validation_image_generator.flow_from_directory(batch_size=BATCH_SIZE,\n",
    "                                                              directory=validation_dir,\n",
    "                                                              target_size=(IMG_HEIGHT, IMG_WIDTH),\n",
    "                                                              class_mode='categorical'\n",
    "                                                             )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 3 - Download Model\n",
    "Let's try the InceptionV3 model.  We will only download the 'base' model, without the final classifying layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transfer_learning_utils import print_model_summary_compact\n",
    "\n",
    "pre_trained_model = tf.keras.applications.InceptionV3(input_shape=(IMG_WIDTH,IMG_HEIGHT,3), \n",
    "                                               include_top = False,\n",
    "                                               weights = 'imagenet')\n",
    "print_model_summary_compact(pre_trained_model)\n",
    "print ()\n",
    "\n",
    "! du -skh ~/.keras/models/* | grep inception"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pre_trained_model.summary()  # large output"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Inspect the model and freeze the layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# last_layer = pre_trained_model.get_layer('mixed7')\n",
    "last_layer = pre_trained_model.layers[-1]\n",
    "\n",
    "print (\"last_layer.name:\", last_layer.name)\n",
    "print ('last_layer.output.shape:', last_layer.output_shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Freeze all layers\n",
    "# for layer in pre_trained_model.layers:\n",
    "#   layer.trainable = False\n",
    "\n",
    "# or this works too\n",
    "pre_trained_model.trainable = False\n",
    "\n",
    "print_model_summary_compact (pre_trained_model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 4 - Create our own model\n",
    "\n",
    "We are going to add a ccouple of layers\n",
    "- One fully connected layer\n",
    "- And a softmax layer\n",
    "\n",
    "On top of the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import Input, Dense, Softmax, Flatten, GlobalAveragePooling2D, Dropout\n",
    "from transfer_learning_utils import print_model_summary_compact\n",
    "\n",
    "model = Sequential ([\n",
    "                Input (shape=(IMG_WIDTH, IMG_HEIGHT, 3)),\n",
    "                pre_trained_model, \n",
    "                Flatten(), \n",
    "                #GlobalAveragePooling2D(),\n",
    "                Dense(512, activation='relu'),\n",
    "                Dropout(0.5),\n",
    "                Dense(NUM_CLASSES, activation='softmax')\n",
    "            ])\n",
    "\n",
    "model.compile (loss='categorical_crossentropy',\n",
    "                  optimizer= 'adam',\n",
    "                  metrics=['accuracy'])\n",
    "\n",
    "print_model_summary_compact(model)\n",
    "print()\n",
    "# my_model.summary()  # large output\n",
    "\n",
    "tf.keras.utils.plot_model(model, to_file='model.png', show_shapes=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "sU4Z4MYG3vPh"
   },
   "source": [
    "## Step 5 - Setup Tensorboard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## This is fairly boiler plate code\n",
    "\n",
    "import datetime\n",
    "import os\n",
    "import shutil\n",
    "\n",
    "app_name = APP_NAME\n",
    "\n",
    "\n",
    "# timestamp  = datetime.datetime.now().strftime(\"%Y-%m-%d--%H-%M-%S\")\n",
    "\n",
    "tb_top_level_dir= '/tmp/tensorboard-logs'\n",
    "\n",
    "tb_app_dir = os.path.join (tb_top_level_dir, app_name)\n",
    "\n",
    "tb_logs_dir = os.path.join (tb_app_dir, datetime.datetime.now().strftime(\"%H-%M-%S\"))\n",
    "\n",
    "\n",
    "print (\"Saving TB logs to : \" , tb_logs_dir)\n",
    "\n",
    "#clear out old logs\n",
    "shutil.rmtree ( tb_app_dir, ignore_errors=True )\n",
    "\n",
    "tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_logs_dir, write_graph=True, \n",
    "                                                      write_images=True, histogram_freq=1)\n",
    "\n",
    "## This will embed Tensorboard right here in jupyter!\n",
    "# ! killall tensorboard  # kill previously running tensorboards\n",
    "%load_ext tensorboard\n",
    "%tensorboard --logdir $tb_logs_dir"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "cflnm-v23vPj"
   },
   "source": [
    "## Step 6 : Train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time \n",
    "\n",
    "steps_per_epoch = train_data_gen.n // train_data_gen.batch_size\n",
    "validation_steps = val_data_gen.n // val_data_gen.batch_size\n",
    "print ('steps_per_epocoh:', steps_per_epoch)\n",
    "print ('validation_steps:', validation_steps)\n",
    "\n",
    "history = model.fit(\n",
    "    train_data_gen,\n",
    "    steps_per_epoch= steps_per_epoch,\n",
    "    epochs=10,\n",
    "    validation_data=val_data_gen,\n",
    "    validation_steps=validation_steps,\n",
    "    callbacks = [tensorboard_callback]\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Save the model for reuse later\n",
    "As you can see training takes a long time.  \n",
    "Let's save the resulting model, so we can use it quickly without going through training again."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "model_file =  APP_NAME + '-model.h5'\n",
    "model.save(model_file)\n",
    "\n",
    "model_size_in_bytes = os.path.getsize(model_file)\n",
    "print (\"model saved as '{}',  size = {:,f} bytes / {:,.1f} KB  / {:,.1f} MB\".format(model_file, \n",
    "                                    model_size_in_bytes, model_size_in_bytes / 1e3, \n",
    "                                    model_size_in_bytes / 1e6 ))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "a9kZ2ul33vPm"
   },
   "source": [
    "## Step 7 : See Training History"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "plt.plot(history.history['accuracy'], label='train_accuracy')\n",
    "plt.plot(history.history['val_accuracy'], label='val_accuracy')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "Kmzupp1d3vPu"
   },
   "source": [
    "## Step 8 : Evaluate the Model "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "xF81OSSQ3vPv"
   },
   "source": [
    "### 8.1 - Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 69
    },
    "colab_type": "code",
    "id": "K8JoGQPJ3vPv",
    "outputId": "f8a19588-5045-4416-f0f3-9cdda95f376f"
   },
   "outputs": [],
   "source": [
    "from math import ceil\n",
    "\n",
    "\n",
    "metrics = model.evaluate(val_data_gen, batch_size=val_data_gen.batch_size, steps=ceil(val_data_gen.n // val_data_gen.batch_size) )\n",
    "\n",
    "metric_names = model.metrics_names\n",
    "print (\"model metrics : \" , metric_names)\n",
    "\n",
    "for idx, metric in enumerate(metric_names):\n",
    "    print (\"Metric : {} = {:,.3f}\".format (metric_names[idx], metrics[idx]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 8.2 - Predictions and Confusion Matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from math import ceil\n",
    "\n",
    "\n",
    "print (\"predicting on {:,} test images\".format(val_data_gen.n))\n",
    "# we need a ceiling for steps\n",
    "predictions = model.predict(val_data_gen, batch_size=val_data_gen.batch_size, \n",
    "                            steps=ceil(val_data_gen.n / val_data_gen.batch_size) )\n",
    "print( 'predictions.shape: ', predictions.shape)\n",
    "\n",
    "\n",
    "if val_data_gen.class_mode == 'categorical':\n",
    "    # converting softmax --> classes\n",
    "    print (\"convering softmax --> classes\")\n",
    "    predictions2 = [ np.argmax(p) for p in predictions]\n",
    "\n",
    "if val_data_gen.class_mode == 'binary':\n",
    "    # converting sigmoid --> classes\n",
    "    print (\"converting sigmod --> binary\")\n",
    "    predictions2 = [0 if n < 0.5 else 1 for n in predictions]\n",
    "\n",
    "\n",
    "# ## Ensure all predictions match\n",
    "assert(len(predictions) == len(predictions2) == len(val_data_gen.classes) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.set_printoptions(formatter={'float': '{: 0.2f}'.format})\n",
    "\n",
    "print ('predictions : ' , predictions[:10])\n",
    "print ('prediction2: ' , predictions2[:10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import confusion_matrix\n",
    "import seaborn as sns\n",
    "\n",
    "test_labels = val_data_gen.classes\n",
    "cm = confusion_matrix(test_labels, predictions2, labels = range(0, NUM_CLASSES))\n",
    "cm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "print (\"class index mapping : \", val_data_gen.class_indices)\n",
    "\n",
    "plt.figure(figsize = (8,6))\n",
    "\n",
    "# colormaps : cmap=\"YlGnBu\" , cmap=\"Greens\", cmap=\"Blues\",  cmap=\"Reds\"\n",
    "sns.heatmap(cm, annot=True, cmap=\"Reds\", fmt='d').plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cleanup \n",
    "Before running the next exercise, run the following cell to terminate processes and free up resources"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Kill any child processes (like tensorboard)\n",
    "\n",
    "import psutil\n",
    "import os, signal\n",
    "\n",
    "current_process = psutil.Process()\n",
    "children = current_process.children(recursive=True)\n",
    "for child in children:\n",
    "    print('Killing Child pid  {}'.format(child.pid))\n",
    "    os.kill(child.pid, signal.SIGKILL)\n",
    "    \n",
    "## This will kill actual kernel itself\n",
    "# os.kill(os.getpid(), signal.SIGKILL)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}