# How to simply use keras
* Reference
 + https://www.tensorflow.org/guide/keras

## Setup

In [1]:
from __future__ import absolute_import, division, print_function
import numpy as np
import tensorflow as tf

keras = tf.keras

print(tf.__version__)
print(tf.keras.__version__)

1.12.0
2.1.6-tf


## Build a simple model
https://www.tensorflow.org/guide/keras#build_a_simple_model
### Sequential model
In Keras, you assemble layers to build models. A model is (usually) a graph of layers. The most common type of model is a stack of layers: the `tf.keras.Sequential` model.

In [2]:
## To build a simple, fully-connected network (i.e. multi-layer perceptron)
# If you specify the input shape, the model gets built continuously, as you are adding layers.
# Note that when using this delayed-build pattern (no input shape specified),
# the model doesn't have any weights until the first call,
# to a training/evaluation method (since it isn't yet built)

model = keras.Sequential()
model.add(keras.layers.Dense(units = 64, activation = 'relu')) 
model.add(keras.layers.Dense(units = 64, activation = 'relu'))
model.add(keras.layers.Dense(units = 10, activation = 'softmax'))

In [3]:
# Note that when using this delayed-build pattern (no input shape specified),
# the model doesn't have any weights until the first call,
# to a training/evaluation method (since it isn't yet built)
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
print(tf.get_default_graph().get_operations())
del model

[]
[]


### Configure the layers
There are many tf.keras.layers available with some common constructor parameters:

* `activation`: Set the activation function for the layer. This parameter is specified by the name of a built-in function or as a callable object. By default, no activation is applied.
* `kernel_initializer` and `bias_initializer`: The initialization schemes that create the layer's weights (kernel and bias). This parameter is a name or a callable object. This defaults to the `"Glorot uniform"` initializer.
* `kernel_regularizer` and `bias_regularizer`: The regularization schemes that apply the layer's weights (kernel and bias), such as L1 or L2 regularization. By default, no regularization is applied. 
 
The following instantiates `tf.keras.layers.Dense` layers using constructor arguments:

In [4]:
keras.backend.clear_session()
tf.reset_default_graph()
# Create a sigmoid layer:
keras.layers.Dense(64, activation='sigmoid')
# Or:
keras.layers.Dense(64, activation=tf.sigmoid)

# A linear layer with L1 regularization of factor 0.01 applied to the kernel matrix:
keras.layers.Dense(64, kernel_regularizer=tf.keras.regularizers.l1(0.01))

# A linear layer with L2 regularization of factor 0.01 applied to the bias vector:
keras.layers.Dense(64, bias_regularizer=tf.keras.regularizers.l2(0.01))

# A linear layer with a kernel initialized to a random orthogonal matrix:
keras.layers.Dense(64, kernel_initializer='orthogonal')

# A linear layer with a bias vector initialized to 2.0s:
keras.layers.Dense(64, bias_initializer=tf.keras.initializers.constant(2.0))

<tensorflow.python.keras.layers.core.Dense at 0x7ff1e70874a8>

In [5]:
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
print(tf.get_default_graph().get_operations())

[]
[]


## Train and evaluate
https://www.tensorflow.org/guide/keras?hl=ko#train_and_evaluate

### Set up training
After the model is constructed, configure its learning process by calling the `compile` method, `tf.keras.Model.compile` or `tf.keras.Sequential.compile` takes three important arguments. 
 
* `optimizer`: This object specifies the training procedure. Pass it optimizer instances from the `tf.train` module, such as `tf.train.AdamOptimizer`, `tf.train.RMSPropOptimizer`, or `tf.train.GradientDescentOptimizer`.
* `loss`: The function to minimize during optimization. Common choices include mean square error (`mse`), `categorical_crossentropy`, and `binary_crossentropy`. Loss functions are specified by name or by passing a callable object from the `tf.keras.losses` module.
* `metrics`: Used to monitor training. These are string names or callables from the `tf.keras.metrics` module.

In [6]:
keras.backend.clear_session()
tf.reset_default_graph()

model = keras.Sequential()
model.add(keras.layers.Dense(units=64, activation='relu', input_shape = (32,)))
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=10, activation='softmax'))

print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
print(tf.get_default_graph().get_operations())

[<tf.Variable 'dense/kernel:0' shape=(32, 64) dtype=float32>, <tf.Variable 'dense/bias:0' shape=(64,) dtype=float32>, <tf.Variable 'dense_1/kernel:0' shape=(64, 64) dtype=float32>, <tf.Variable 'dense_1/bias:0' shape=(64,) dtype=float32>, <tf.Variable 'dense_2/kernel:0' shape=(64, 10) dtype=float32>, <tf.Variable 'dense_2/bias:0' shape=(10,) dtype=float32>]
[<tf.Operation 'dense_input' type=Placeholder>, <tf.Operation 'dense/kernel/Initializer/random_uniform/shape' type=Const>, <tf.Operation 'dense/kernel/Initializer/random_uniform/min' type=Const>, <tf.Operation 'dense/kernel/Initializer/random_uniform/max' type=Const>, <tf.Operation 'dense/kernel/Initializer/random_uniform/RandomUniform' type=RandomUniform>, <tf.Operation 'dense/kernel/Initializer/random_uniform/sub' type=Sub>, <tf.Operation 'dense/kernel/Initializer/random_uniform/mul' type=Mul>, <tf.Operation 'dense/kernel/Initializer/random_uniform' type=Add>, <tf.Operation 'dense/kernel' type=VarHandleOp>, <tf.Operation 'dense/ke

In [7]:
# Compile
model.compile(optimizer=tf.train.AdamOptimizer(0.001),
 loss='categorical_crossentropy',
 metrics=['accuracy'])

The following shows a few examples of configuring a model for training:

In [8]:
# Configure a model for mean-squared error regression.
model.compile(optimizer=tf.train.AdamOptimizer(0.01),
 loss='mse', # mean squared error
 metrics=['mae']) # mean absolute error

# Configure a model for categorical classification.
model.compile(optimizer=tf.train.RMSPropOptimizer(0.01),
 loss=keras.losses.categorical_crossentropy,
 metrics=[keras.metrics.categorical_accuracy])

del model

### Input NumPy data

In [9]:
keras.backend.clear_session()
tf.reset_default_graph()

# Numpy dataset
tr_data = np.random.random((1000, 32)).astype(np.float32)
tr_label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)

val_data = np.random.random((100, 32)).astype(np.float32)
val_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)

tst_data = np.random.random((100, 32)).astype(np.float32)
tst_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)

print(tr_data.dtype, tr_label.dtype)

float32 int32


In [10]:
# Create a model
model = keras.Sequential()
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=10, activation='softmax'))

In [11]:
model.compile(optimizer=tf.train.GradientDescentOptimizer(.01), 
 loss=keras.losses.sparse_categorical_crossentropy,
 metrics=['accuracy'])

model.fit(x=tr_data, y=tr_label, epochs=5, batch_size=32, validation_data=(val_data, val_label))

Train on 1000 samples, validate on 100 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7ff1e94e6128>

In [12]:
# Evaluate and predict
print(model.metrics_names)
print(model.evaluate(x=tst_data, y=tst_label))
print(model.predict(x=tst_data).shape)

del model

['loss', 'acc']
[2.2931285667419434, 0.1]
(100, 10)


### Input tf.data datasets
Pass a `tf.data.Dataset` instance to the `fit`, `evaluate`, `predict` method.

* issue in tf 1.12 (but this issue is resolved in tf-nightly) 
When passing `tf.data.Dataset` instance to `model.fit` method which is instantiated by `tf.keras.Sequential`, `tf.keras.Model`,
subclassing `tf.keras.Model`, passing `metrics` argument to `'accuracy'` in `model.compile` method provokes `TypeError`

In [13]:
keras.backend.clear_session() # very important!
tf.reset_default_graph()

print(tf.get_default_graph().get_operations())
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))

[]
[]


In [14]:
# tf.data.Dataset instance
tr_data = np.random.random((1000, 32)).astype(np.float32)
tr_label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)
tr_dataset = tf.data.Dataset.from_tensor_slices((tr_data, tr_label))
tr_dataset = tr_dataset.batch(batch_size=32)
tr_dataset = tr_dataset.repeat()

val_data = np.random.random((100, 32)).astype(np.float32)
val_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_label))
val_dataset = val_dataset.batch(batch_size=100).repeat()

tst_data = np.random.random((100, 32)).astype(np.float32)
tst_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
tst_dataset = tf.data.Dataset.from_tensor_slices((tst_data, tst_label))
tst_dataset = tst_dataset.batch(batch_size=100)

print(tr_dataset.output_types)

(tf.float32, tf.int32)


In [15]:
# Training
model = keras.Sequential()
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=10, activation='softmax'))
model.compile(optimizer=tf.train.GradientDescentOptimizer(.01), 
 loss=keras.losses.sparse_categorical_crossentropy)

model.fit(tr_dataset, epochs = 5, steps_per_epoch = 1000 // 32,
 validation_data = val_dataset, validation_steps = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7ff1e4109d30>

In [16]:
# Evaluate and predict
print(model.metrics_names)
print(model.evaluate(tst_dataset, steps = 1))
print(model.predict(tst_dataset, steps = 1).shape)

del model

['loss']
2.3561947345733643
(32, 10)


## Build advanced models
https://www.tensorflow.org/guide/keras?hl=ko#build_advanced_models

### Functional API
The `tf.keras.Sequential` model is a simple stack of layers that cannot represent arbitrary models. Use the Keras functional API to build complex model topologies such as:

* Multi-input models,
* Multi-output models,
* Models with shared layers (the same layer called several times),
* Models with non-sequential data flows (e.g. residual connections). 

Building a model with the functional API works like this: 

1. A layer instance is callable and returns a tensor.
2. Input tensors and output tensors are used to define a `tf.keras.Model` instance.
3. This model is trained just like the `Sequential` model. 

The following example uses the functional API to build a simple, fully-connected network:

In [17]:
# Clear
keras.backend.clear_session()
tf.reset_default_graph()

In [18]:
data = np.random.random((1000, 32)).astype(np.float32)
label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)
dataset = tf.data.Dataset.from_tensor_slices((data, label))
dataset = dataset.batch(batch_size=32).repeat()
print(dataset.output_types)

inputs = tf.keras.Input(shape=(32,)) # Returns a placeholder tensor
print(inputs, type(inputs))

# A layer instance is callable on a tensor, and returns a tensor.
x = keras.layers.Dense(64, activation='relu')(inputs)
x = keras.layers.Dense(64, activation='relu')(x)
predictions = keras.layers.Dense(10, activation='softmax')(x)

# Instantiate the model given inputs and outputs
model = keras.Model(inputs = inputs, outputs = predictions)

print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
print(tf.get_default_graph().get_operations())

# The compile step specifies the training configuration.
model.compile(optimizer=tf.train.RMSPropOptimizer(.001),
 loss=keras.losses.sparse_categorical_crossentropy)
# Trains for 5 epochs
model.fit(dataset, epochs=5, steps_per_epoch = 1000//32)

del model

(tf.float32, tf.int32)
Tensor("input_1:0", shape=(?, 32), dtype=float32) <class 'tensorflow.python.framework.ops.Tensor'>
[<tf.Variable 'dense/kernel:0' shape=(32, 64) dtype=float32>, <tf.Variable 'dense/bias:0' shape=(64,) dtype=float32>, <tf.Variable 'dense_1/kernel:0' shape=(64, 64) dtype=float32>, <tf.Variable 'dense_1/bias:0' shape=(64,) dtype=float32>, <tf.Variable 'dense_2/kernel:0' shape=(64, 10) dtype=float32>, <tf.Variable 'dense_2/bias:0' shape=(10,) dtype=float32>]
[<tf.Operation 'tensors/component_0' type=Const>, <tf.Operation 'tensors/component_1' type=Const>, <tf.Operation 'batch_size' type=Const>, <tf.Operation 'drop_remainder' type=Const>, <tf.Operation 'count' type=Const>, <tf.Operation 'input_1' type=Placeholder>, <tf.Operation 'dense/kernel/Initializer/random_uniform/shape' type=Const>, <tf.Operation 'dense/kernel/Initializer/random_uniform/min' type=Const>, <tf.Operation 'dense/kernel/Initializer/random_uniform/max' type=Const>, <tf.Operation 'dense/kernel/Initiali

### Model subclassing
Build a fully-customizable model by subclassing `tf.keras.Model` and defining your own forward pass. Create layers in the `__init__` method and set them as attributes of the class instance. Define the forward pass in the call method.

Model subclassing is particularly useful when eager execution is enabled since the forward pass can be written imperatively.

In [19]:
# Clear
keras.backend.clear_session()
tf.reset_default_graph()

print(tf.get_default_graph().get_operations())
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))

[]
[]


In [20]:
# Subclassing tf.keras.Model
class MLP(keras.Model):
 def __init__(self, hidden_dim, num_classes):
 super(MLP, self).__init__()
 # Define your layers here.
 self.hidden_layer = keras.layers.Dense(units = hidden_dim, activation='relu')
 self.output_layer = keras.layers.Dense(units = num_classes, activation='softmax')
 
 def call(self, inputs):
 hidden = self.hidden_layer(inputs)
 score = self.output_layer(hidden)
 return score
 
# Instantiate the MLP class
mlp = MLP(hidden_dim=100, num_classes=10)

# The compile step specifies the training configuration.
mlp.compile(optimizer=tf.train.RMSPropOptimizer(.001),
 loss=keras.losses.sparse_categorical_crossentropy)

In [21]:
# tf.data.Dataset instance
tr_data = np.random.random((1000, 32)).astype(np.float32)
tr_label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)
tr_dataset = tf.data.Dataset.from_tensor_slices((tr_data, tr_label))
tr_dataset = tr_dataset.batch(batch_size=32)
tr_dataset = tr_dataset.repeat()

val_data = np.random.random((100, 32)).astype(np.float32)
val_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_label))
val_dataset = val_dataset.batch(batch_size=100).repeat()

tst_data = np.random.random((100, 32)).astype(np.float32)
tst_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
tst_dataset = tf.data.Dataset.from_tensor_slices((tst_data, tst_label))
tst_dataset = tst_dataset.batch(batch_size=100)

print(tr_dataset.output_types)

(tf.float32, tf.int32)


In [22]:
# Trains for 5 epochs
mlp.fit(tr_dataset, epochs=5, steps_per_epoch=1000//32, validation_data = val_dataset, validation_steps=1)

del mlp

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Custom layers
Reading https://www.tensorflow.org/guide/keras?hl=ko#custom_layers

## Callbacks
https://www.tensorflow.org/guide/keras?hl=ko#callbacks 

A callback is an object passed to a model to customize and extend its behavior during training. You can write your own custom callback, or use the built-in `tf.keras.callbacks` that include:

* `tf.keras.callbacks.ModelCheckpoint`: Save checkpoints of your model at regular intervals.
* `tf.keras.callbacks.LearningRateScheduler`: Dynamically change the learning rate.
* `tf.keras.callbacks.EarlyStopping`: Interrupt training when validation performance has stopped improving.
* `tf.keras.callbacks.TensorBoard`: Monitor the model's behavior using TensorBoard. 

To use a `tf.keras.callbacks.Callback`, pass it to the model's `fit` method:

In [23]:
# Clear
keras.backend.clear_session()
tf.reset_default_graph()

print(tf.get_default_graph().get_operations())
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))

[]
[]


In [24]:
# tf.data.Dataset instance
tr_data = np.random.random((1000, 32)).astype(np.float32)
tr_label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)
tr_dataset = tf.data.Dataset.from_tensor_slices((tr_data, tr_label))
tr_dataset = tr_dataset.batch(batch_size=32)
tr_dataset = tr_dataset.repeat()

val_data = np.random.random((100, 32)).astype(np.float32)
val_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_label))
val_dataset = val_dataset.batch(batch_size=100).repeat()

tst_data = np.random.random((100, 32)).astype(np.float32)
tst_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
tst_dataset = tf.data.Dataset.from_tensor_slices((tst_data, tst_label))
tst_dataset = tst_dataset.batch(batch_size=100)

print(tr_dataset.output_types)

(tf.float32, tf.int32)


In [25]:
# Creating "callback" object
callbacks = [
 # Interrupt training if `val_loss` stops improving for over 2 epochs
 keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
 # Write TensorBoard logs to `./logs` directory
 keras.callbacks.TensorBoard(log_dir='./logs')
]

# Training
model = keras.Sequential()
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=10, activation='softmax'))
model.compile(optimizer=tf.train.GradientDescentOptimizer(.01), 
 loss=keras.losses.sparse_categorical_crossentropy,
 callbacks = callbacks)

model.fit(tr_dataset, epochs = 5, steps_per_epoch = 1000 // 32,
 validation_data = val_dataset, validation_steps = 1)

del model

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Save and restore
https://www.tensorflow.org/guide/keras?hl=ko#save_and_restore

### Weights only
Save and load the weights of a model using `tf.keras.Model.save_weights`:

In [26]:
# Clear
keras.backend.clear_session()
tf.reset_default_graph()

print(tf.get_default_graph().get_operations())
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))

[]
[]


In [27]:
# Subclassing tf.keras.Model
class MLP(keras.Model):
 def __init__(self, hidden_dim, num_classes):
 super(MLP, self).__init__()
 # Define your layers here.
 self.hidden_layer = keras.layers.Dense(units = hidden_dim, activation='relu')
 self.output_layer = keras.layers.Dense(units = num_classes, activation='softmax')
 
 def call(self, inputs):
 hidden = self.hidden_layer(inputs)
 score = self.output_layer(hidden)
 return score
 
# Instantiate the MLP class
mlp = MLP(hidden_dim=100, num_classes=10)

# The compile step specifies the training configuration.
mlp.compile(optimizer=tf.train.GradientDescentOptimizer(.001),
 loss=keras.losses.sparse_categorical_crossentropy)

In [28]:
# tf.data.Dataset instance
tr_data = np.random.random((1000, 32)).astype(np.float32)
tr_label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)
tr_dataset = tf.data.Dataset.from_tensor_slices((tr_data, tr_label))
tr_dataset = tr_dataset.batch(batch_size=100)
tr_dataset = tr_dataset.repeat()

val_data = np.random.random((100, 32)).astype(np.float32)
val_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_label))
val_dataset = val_dataset.batch(batch_size=100).repeat()

tst_data = np.ones((100,32), dtype=np.float32)
tst_label = np.ones((100,), dtype=np.int32)
tst_dataset = tf.data.Dataset.from_tensor_slices((tst_data, tst_label))
tst_dataset = tst_dataset.batch(batch_size=100)

print(tr_dataset.output_types)
print(tst_dataset.output_types)

(tf.float32, tf.int32)
(tf.float32, tf.int32)


In [29]:
# Trains for 5 epochs
mlp.fit(x=tr_data, y=tr_label, epochs=5, batch_size=100,
 validation_data=(val_data, val_label))
# mlp.fit(tr_dataset, epochs=5, steps_per_epoch=1000//100,
# validation_data=val_dataset, validation_steps=1)
mlp.save_weights('../graphs/lecture05/keras/mlp')
y_before = np.argmax(mlp.predict(x=tst_data), axis = -1)
print(mlp.evaluate(x=tst_data, y=tst_label))
# with keras.backend.get_session() as sess:
# before = sess.run(mlp.variables)
del mlp

Train on 1000 samples, validate on 100 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
1.9046856212615966


In [30]:
# Clear
keras.backend.clear_session()
tf.reset_default_graph()

print(tf.get_default_graph().get_operations())
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))

[]
[]


In [31]:
# Restore
## Instantiate the MLP class
tst_model = MLP(hidden_dim=100, num_classes=10)
tst_model.compile(optimizer=tf.train.GradientDescentOptimizer(.001),
 loss=keras.losses.sparse_categorical_crossentropy)

In [32]:
# tst_model.build(input_shape=tf.TensorShape(([None,32])))

In [33]:
tst_model.load_weights('../graphs/lecture05/keras/mlp')

<tensorflow.python.training.checkpointable.util.CheckpointLoadStatus at 0x7ff19c15fc18>

In [34]:
tst_data = np.ones((100,32), dtype=np.float32)
tst_label = np.ones((100,), dtype=np.int32)
tst_dataset = tf.data.Dataset.from_tensor_slices((tst_data, tst_label))
tst_dataset = tst_dataset.batch(batch_size=100)

In [35]:
y_after = np.argmax(tst_model.predict(tst_dataset, steps = 1), axis = -1)
print(tst_model.evaluate(tst_dataset, steps = 1))

1.904685616493225


In [36]:
# equal
np.mean(y_before == y_after)

1.0

### Configuration only
Reading https://www.tensorflow.org/guide/keras?hl=ko#configuration_only

### Entire model
Reading https://www.tensorflow.org/guide/keras?hl=ko#entire_model

## Eager execution
Reading https://www.tensorflow.org/guide/keras?hl=ko#eager_execution

## Distribution
Reading https://www.tensorflow.org/guide/keras?hl=ko#distribution