{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# import all the relevant libraries\n", "\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "import tensorflow as tf\n", "import h5py\n", "\n", "import keras\n", "from keras.datasets import cifar10\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Input, InputLayer, Dropout\n", "import keras.layers.merge as merge\n", "from keras.layers.merge import Concatenate\n", "from tensorflow.keras.utils import to_categorical\n", "from tensorflow.keras.optimizers import SGD, Adam\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# check for CPU and GPU for your session\n", "\n", "print(tf.config.list_physical_devices('GPU'))\n", "\n", "# check for CPU here\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# load data and check the train-test split shape and size\n", "\n", "(x_train, y_train), (x_valid, y_valid) = cifar10.load_data()\n", "print('Train: X=%s, y=%s' % (x_train.shape, y_train.shape))\n", "print('Test: X=%s, y=%s' % (x_valid.shape, y_valid.shape))\n", "print('number of classes= %s' %len(set(y_train.flatten())))\n", "print(type(x_train))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# specify classes from the cifar10 dataset\n", "\n", "nb_classes = 10\n", "class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# plotting some examples from training dataset\n", "\n", "plt.figure(figsize=(8, 8)) \n", "for i in range(2*7):\n", " # define subplot\n", " plt.subplot(2, 7, i+1)\n", " plt.imshow(x_train[i])\n", " class_index = np.argmax(to_categorical(y_train[i], 10))\n", " plt.title(class_names[class_index], fontsize=9)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# convert data to HDF5 format\n", "\n", "with h5py.File('dataset_cifar10.hdf5', 'w') as hf:\n", " dset_x_train = hf.create_dataset('x_train', data=x_train, shape=(50000, 32, 32, 3), compression='gzip', chunks=True)\n", " dset_y_train = hf.create_dataset('y_train', data=y_train, shape=(50000, 1), compression='gzip', chunks=True)\n", " dset_x_test = hf.create_dataset('x_valid', data=x_valid, shape=(10000, 32, 32, 3), compression='gzip', chunks=True)\n", " dset_y_test = hf.create_dataset('y_valid', data=y_valid, shape=(10000, 1), compression='gzip', chunks=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Define the model\n", "\n", "model = tf.keras.Sequential()\n", "model.add(InputLayer(input_shape=[32, 32, 3]))\n", "\n", "model.add(Conv2D(filters=32, kernel_size=3, padding='same', activation='relu'))\n", "model.add(MaxPooling2D(pool_size=[2,2], strides=[2, 2], padding='same'))\n", "\n", "model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu'))\n", "model.add(MaxPooling2D(pool_size=[2,2], strides=[2, 2], padding='same'))\n", "\n", "model.add(Conv2D(filters=128, kernel_size=3, padding='same', activation='relu'))\n", "model.add(MaxPooling2D(pool_size=[2,2], strides=[2, 2], padding='same'))\n", "\n", "model.add(Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))\n", "model.add(MaxPooling2D(pool_size=[2,2], strides=[2, 2], padding='same'))\n", "\n", "model.add(Flatten())\n", "\n", "model.add(Dense(256, activation='relu'))\n", "model.add(Dropout(0.2))\n", "\n", "model.add(Dense(512, activation='relu'))\n", "model.add(Dropout(0.2))\n", "\n", "model.add(Dense(10, activation='softmax'))\n", "\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Define the data generator\n", "\n", "class DataGenerator(tf.keras.utils.Sequence):\n", " \n", " def __init__(self, filename, batch_size, test=False, shuffle=True):\n", " \n", " self.PATH_TO_FILE = filename\n", " \n", " self.hf = h5py.File(self.PATH_TO_FILE, 'r') \n", " self.batch_size = batch_size\n", " self.test = test\n", " self.shuffle = shuffle\n", " self.on_epoch_end()\n", "\n", " def __del__(self):\n", " self.hf.close()\n", " \n", " def __len__(self):\n", " return int(np.ceil(len(self.indices) / self.batch_size))\n", "\n", " def __getitem__(self, idx):\n", " start = self.batch_size * idx\n", " stop = self.batch_size * (idx+1)\n", " \n", " if self.test:\n", " x = self.hf['x_valid'][start:stop, ...]\n", " batch_x = np.array(x).astype('float32') / 255.0\n", " y = self.hf['y_valid'][start:stop]\n", " batch_y = to_categorical(np.array(y), 10)\n", " else:\n", " x = self.hf['x_train'][start:stop, ...]\n", " batch_x = np.array(x).astype('float32') / 255.0\n", " y = self.hf['y_train'][start:stop]\n", " batch_y = to_categorical(np.array(y), 10)\n", "\n", " return batch_x, batch_y\n", "\n", " def on_epoch_end(self):\n", " if self.test:\n", " self.indices = np.arange(self.hf['x_valid'][:].shape[0])\n", " else:\n", " self.indices = np.arange(self.hf['x_train'][:].shape[0])\n", " \n", " if self.shuffle:\n", " np.random.shuffle(self.indices)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# generate batches of data for training and validation dataset\n", "\n", "filename = \"dataset_cifar10.hdf5\"\n", "batchsize = 250 \n", "data_train = DataGenerator(filename, batch_size=batchsize, test=False)\n", "data_valid = DataGenerator(filename, batch_size=batchsize, test=True, shuffle=False)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# defining optimizer for the model\n", "model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# first, let's train the model using CPU\n", "\n", "with tf.device('/device:CPU:0'):\n", " history = model.fit(data_train,epochs=10,\n", " verbose=1, validation_data=data_valid)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# now, lets try with GPU to compare its performance with CPU\n", "\n", "from tensorflow.keras.models import clone_model\n", "new_model = clone_model(model)\n", "opt = keras.optimizers.Adam(learning_rate=0.001)\n", "new_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])\n", "\n", "\n", "## train the new_model using GPU here:\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# plotting the losses and accuracy for training and validation set\n", "\n", "fig, axes = plt.subplots(1,2, figsize=[16, 6])\n", "axes[0].plot(history.history['loss'], label='train_loss')\n", "axes[0].plot(history.history['val_loss'], label='val_loss')\n", "axes[0].set_title('Loss')\n", "axes[0].legend()\n", "axes[0].grid()\n", "axes[1].plot(history.history['accuracy'], label='train_acc')\n", "axes[1].plot(history.history['val_accuracy'], label='val_acc')\n", "axes[1].set_title('Accuracy')\n", "axes[1].legend()\n", "axes[1].grid()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x = x_valid.astype('float32') / 255.0\n", "y = to_categorical(y_valid, 10)\n", "score = new_model.evaluate(x, y, verbose=0)\n", "print('Test cross-entropy loss: %0.5f' % score[0])\n", "print('Test accuracy: %0.2f' % score[1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_pred = new_model.predict_classes(x)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(8, 8)) \n", "for i in range(24):\n", " plt.subplot(4, 6, i+1)\n", " plt.imshow(x[i].reshape(32,32,3))\n", " index1 = np.argmax(y[i])\n", " plt.title(\"y: %s\\np: %s\" % (class_names[index1], class_names[y_pred[i]]), fontsize=9, loc='left')\n", " plt.subplots_adjust(wspace=0.5, hspace=0.4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Staging and File System Choice" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Run this cell if you're not in your home directory" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Challenge 1: Locate Lustre and NFS File System Scratch On MANA" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Challenge 2: List Usage Information" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Challenge 3: Simulating Read/Write Load" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Challenge 4: Timing Read/Write Operations" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Time Lustre Read" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Time Lustre Write" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Time NFS Read (from home directory)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Time NFS Write (from home directory)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Challenge 5: Stage Training Files" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Challenge 6: Update Data Generator File Path " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Challenge 7: Rerun The Model With The Updated Data Generator" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "tf2", "language": "python", "name": "tf-gpu" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 4 }