# Distributed Training for MNIST

In [1]:
!pip3 install cloudmesh-installer
!pip3 install cloudmesh-common

Collecting cloudmesh-installer
 Downloading https://files.pythonhosted.org/packages/fa/e8/77c2ea195c97469133aca9931f7600f3622a75274ad53f71a0bb681761d4/cloudmesh_installer-4.4.24-py2.py3-none-any.whl
Collecting python-hostlist
 Downloading https://files.pythonhosted.org/packages/2b/4f/f31dd4b4bf1a57a5c29599e1165d0df70dbdddcfa59a7c1d04ee2ff4ccbd/python-hostlist-1.21.tar.gz
Collecting ordered-set
 Downloading https://files.pythonhosted.org/packages/f5/ab/8252360bfe965bba31ec05112b3067bd129ce4800d89e0b85613bc6044f6/ordered-set-4.0.2.tar.gz
Collecting oyaml
 Downloading https://files.pythonhosted.org/packages/37/aa/111610d8bf5b1bb7a295a048fc648cec346347a8b0be5881defd2d1b4a52/oyaml-1.0-py2.py3-none-any.whl
Collecting flake8
[?25l Downloading https://files.pythonhosted.org/packages/d4/ca/3971802ee6251da1abead1a22831d7f4743781e2f743bd266bdd2f46c19b/flake8-3.8.4-py2.py3-none-any.whl (72kB)
[K |████████████████████████████████| 81kB 5.6MB/s 
[?25hCollecting bump2version==1.0.0
 Downloading ht

In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, SimpleRNN, InputLayer, LSTM, Dropout
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.datasets import mnist
from cloudmesh.common.StopWatch import StopWatch

## Data Pre-Process

In [3]:
StopWatch.start("data-load")
(x_train, y_train), (x_test, y_test) = mnist.load_data()
StopWatch.stop("data-load")


StopWatch.start("data-pre-process")
num_labels = len(np.unique(y_train))


y_train = to_categorical(y_train)
y_test = to_categorical(y_test)


image_size = x_train.shape[1]
x_train = np.reshape(x_train,[-1, image_size, image_size])
x_test = np.reshape(x_test,[-1, image_size, image_size])
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
StopWatch.stop("data-pre-process")

input_shape = (image_size, image_size)
batch_size = 128
units = 256
dropout = 0.2

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


## Define Model

Here we use the Tensorflow distributed training components to train the model in multiple CPUs or GPUs. In the Colab instance multiple GPUs are not supported. Hence, the training must be done in the device type 'None' when selecting the 'runtime type' from Runtime menu. To run with multiple-GPUs no code change is required. [Learn more about distributed training](https://www.tensorflow.org/guide/distributed_training).

In [10]:
StopWatch.start("compile")
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
 model = Sequential()
 # LSTM Layers
 model.add(LSTM(units=units, 
 input_shape=input_shape,
 return_sequences=True))
 model.add(LSTM(units=units, 
 dropout=dropout, 
 return_sequences=True))
 model.add(LSTM(units=units, 
 dropout=dropout, 
 return_sequences=False))
 # MLP Layers
 model.add(Dense(units))
 model.add(Activation('relu'))
 model.add(Dropout(dropout))
 model.add(Dense(units))
 model.add(Activation('relu'))
 model.add(Dropout(dropout))
 # Softmax_layer
 model.add(Dense(num_labels))
 model.add(Activation('softmax'))
 model.summary()
 plot_model(model, to_file='rnn-mnist.png', show_shapes=True)
 
 print("Number of devices: {}".format(strategy.num_replicas_in_sync))

 model.compile(loss='categorical_crossentropy',
 optimizer='sgd',
 metrics=['accuracy'])
StopWatch.stop("compile")

Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param # 
lstm_6 (LSTM) (None, 28, 256) 291840 
_________________________________________________________________
lstm_7 (LSTM) (None, 28, 256) 525312 
_________________________________________________________________
lstm_8 (LSTM) (None, 256) 525312 
_________________________________________________________________
dense_6 (Dense) (None, 256) 65792 
_________________________________________________________________
activation_6 (Activation) (None, 256) 0 
_________________________________________________________________
dropout_4 (Dropout) (None, 256) 0 
_________________________________________________________________
dense_7 (Dense) (None, 256) 65792 
_________________________________________________________________
activation_7 (Activation) (None, 256) 0 
_________________________________________________________________
dropout_5 (Dropout) (None, 256) 0 
_________________

## Train

In [12]:
StopWatch.start("train")
model.fit(x_train, y_train, epochs=30, batch_size=batch_size)
StopWatch.stop("train")

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## Test

In [13]:
StopWatch.start("evaluate")
loss, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print("\nTest accuracy: %.1f%%" % (100.0 * acc))
StopWatch.stop("evaluate")

StopWatch.benchmark()


Test accuracy: 97.2%

+---------------------+------------------------------------------------------------------+
| Attribute | Value |
|---------------------+------------------------------------------------------------------|
| BUG_REPORT_URL | "https://bugs.launchpad.net/ubuntu/" |
| DISTRIB_CODENAME | bionic |
| DISTRIB_DESCRIPTION | "Ubuntu 18.04.5 LTS" |
| DISTRIB_ID | Ubuntu |
| DISTRIB_RELEASE | 18.04 |
| HOME_URL | "https://www.ubuntu.com/" |
| ID | ubuntu |
| ID_LIKE | debian |
| NAME | "Ubuntu" |
| PRETTY_NAME | "Ubuntu 18.04.5 LTS" |
| PRIVACY_POLICY_URL | "https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" |
| SUPPORT_URL | "https://help.ubuntu.com/" |
| UBUNTU_CODENAME | bionic |
| VERSION | "18.04.5 LTS (Bionic Beaver)" |
| VERSION_CODENAME | bionic |
| VERSION_ID | "18.04" |
| cpu_count | 2 |
| mem.active | 2.4 GiB |
| mem.available | 10.3 GiB |
| mem.free | 4.5 GiB |
| mem.inactive | 5.4 GiB |
| mem.percent | 18.6 % |
| mem.total | 12.7 GiB |
| mem.used | 3

# References

1. [Advance Deep Learning with Keras](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)
2. [Distributed With Tensorflow](https://www.tensorflow.org/guide/distributed_training)
3. [Keras with Tensorflow Distributed Training](https://keras.io/guides/distributed_training/)