{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6." ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "# Working with Keras: A deep dive" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## A spectrum of workflows" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Different ways to build Keras models" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### The Sequential model" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**The `Sequential` class**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "from tensorflow import keras\n", "from tensorflow.keras import layers\n", "\n", "model = keras.Sequential([\n", " layers.Dense(64, activation=\"relu\"),\n", " layers.Dense(10, activation=\"softmax\")\n", "])" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Incrementally building a Sequential model**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = keras.Sequential()\n", "model.add(layers.Dense(64, activation=\"relu\"))\n", "model.add(layers.Dense(10, activation=\"softmax\"))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Calling a model for the first time to build it**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.build(input_shape=(None, 3))\n", "model.weights" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**The summary method**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.summary()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Naming models and layers with the `name` argument**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = keras.Sequential(name=\"my_example_model\")\n", "model.add(layers.Dense(64, activation=\"relu\", name=\"my_first_layer\"))\n", "model.add(layers.Dense(10, activation=\"softmax\", name=\"my_last_layer\"))\n", "model.build((None, 3))\n", "model.summary()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Specifying the input shape of your model in advance**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = keras.Sequential()\n", "model.add(keras.Input(shape=(3,)))\n", "model.add(layers.Dense(64, activation=\"relu\"))" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.summary()" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.add(layers.Dense(10, activation=\"softmax\"))\n", "model.summary()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### The Functional API" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### A simple example" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**A simple Functional model with two `Dense` layers**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "inputs = keras.Input(shape=(3,), name=\"my_input\")\n", "features = layers.Dense(64, activation=\"relu\")(inputs)\n", "outputs = layers.Dense(10, activation=\"softmax\")(features)\n", "model = keras.Model(inputs=inputs, outputs=outputs)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "inputs = keras.Input(shape=(3,), name=\"my_input\")" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "inputs.shape" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "inputs.dtype" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "features = layers.Dense(64, activation=\"relu\")(inputs)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "features.shape" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "outputs = layers.Dense(10, activation=\"softmax\")(features)\n", "model = keras.Model(inputs=inputs, outputs=outputs)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.summary()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Multi-input, multi-output models" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**A multi-input, multi-output Functional model**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "vocabulary_size = 10000\n", "num_tags = 100\n", "num_departments = 4\n", "\n", "title = keras.Input(shape=(vocabulary_size,), name=\"title\")\n", "text_body = keras.Input(shape=(vocabulary_size,), name=\"text_body\")\n", "tags = keras.Input(shape=(num_tags,), name=\"tags\")\n", "\n", "features = layers.Concatenate()([title, text_body, tags])\n", "features = layers.Dense(64, activation=\"relu\")(features)\n", "\n", "priority = layers.Dense(1, activation=\"sigmoid\", name=\"priority\")(features)\n", "department = layers.Dense(\n", " num_departments, activation=\"softmax\", name=\"department\")(features)\n", "\n", "model = keras.Model(inputs=[title, text_body, tags], outputs=[priority, department])" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Training a multi-input, multi-output model" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Training a model by providing lists of input & target arrays**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import numpy as np\n", "\n", "num_samples = 1280\n", "\n", "title_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size))\n", "text_body_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size))\n", "tags_data = np.random.randint(0, 2, size=(num_samples, num_tags))\n", "\n", "priority_data = np.random.random(size=(num_samples, 1))\n", "department_data = np.random.randint(0, 2, size=(num_samples, num_departments))\n", "\n", "model.compile(optimizer=\"rmsprop\",\n", " loss=[\"mean_squared_error\", \"categorical_crossentropy\"],\n", " metrics=[[\"mean_absolute_error\"], [\"accuracy\"]])\n", "model.fit([title_data, text_body_data, tags_data],\n", " [priority_data, department_data],\n", " epochs=1)\n", "model.evaluate([title_data, text_body_data, tags_data],\n", " [priority_data, department_data])\n", "priority_preds, department_preds = model.predict([title_data, text_body_data, tags_data])" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Training a model by providing dicts of input & target arrays**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.compile(optimizer=\"rmsprop\",\n", " loss={\"priority\": \"mean_squared_error\", \"department\": \"categorical_crossentropy\"},\n", " metrics={\"priority\": [\"mean_absolute_error\"], \"department\": [\"accuracy\"]})\n", "model.fit({\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data},\n", " {\"priority\": priority_data, \"department\": department_data},\n", " epochs=1)\n", "model.evaluate({\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data},\n", " {\"priority\": priority_data, \"department\": department_data})\n", "priority_preds, department_preds = model.predict(\n", " {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data})" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### The power of the Functional API: Access to layer connectivity" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "keras.utils.plot_model(model, \"ticket_classifier.png\")" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "keras.utils.plot_model(model, \"ticket_classifier_with_shape_info.png\", show_shapes=True)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Retrieving the inputs or outputs of a layer in a Functional model**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.layers" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.layers[3].input" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.layers[3].output" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Creating a new model by reusing intermediate layer outputs**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "features = model.layers[4].output\n", "difficulty = layers.Dense(3, activation=\"softmax\", name=\"difficulty\")(features)\n", "\n", "new_model = keras.Model(\n", " inputs=[title, text_body, tags],\n", " outputs=[priority, department, difficulty])" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "keras.utils.plot_model(new_model, \"updated_ticket_classifier.png\", show_shapes=True)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Subclassing the Model class" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Rewriting our previous example as a subclassed model" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**A simple subclassed model**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "class CustomerTicketModel(keras.Model):\n", "\n", " def __init__(self, num_departments):\n", " super().__init__()\n", " self.concat_layer = layers.Concatenate()\n", " self.mixing_layer = layers.Dense(64, activation=\"relu\")\n", " self.priority_scorer = layers.Dense(1, activation=\"sigmoid\")\n", " self.department_classifier = layers.Dense(\n", " num_departments, activation=\"softmax\")\n", "\n", " def call(self, inputs):\n", " title = inputs[\"title\"]\n", " text_body = inputs[\"text_body\"]\n", " tags = inputs[\"tags\"]\n", "\n", " features = self.concat_layer([title, text_body, tags])\n", " features = self.mixing_layer(features)\n", " priority = self.priority_scorer(features)\n", " department = self.department_classifier(features)\n", " return priority, department" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = CustomerTicketModel(num_departments=4)\n", "\n", "priority, department = model(\n", " {\"title\": title_data, \"text_body\": text_body_data, \"tags\": tags_data})" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.compile(optimizer=\"rmsprop\",\n", " loss=[\"mean_squared_error\", \"categorical_crossentropy\"],\n", " metrics=[[\"mean_absolute_error\"], [\"accuracy\"]])\n", "model.fit({\"title\": title_data,\n", " \"text_body\": text_body_data,\n", " \"tags\": tags_data},\n", " [priority_data, department_data],\n", " epochs=1)\n", "model.evaluate({\"title\": title_data,\n", " \"text_body\": text_body_data,\n", " \"tags\": tags_data},\n", " [priority_data, department_data])\n", "priority_preds, department_preds = model.predict({\"title\": title_data,\n", " \"text_body\": text_body_data,\n", " \"tags\": tags_data})" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Beware: What subclassed models don't support" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Mixing and matching different components" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Creating a Functional model that includes a subclassed model**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "class Classifier(keras.Model):\n", "\n", " def __init__(self, num_classes=2):\n", " super().__init__()\n", " if num_classes == 2:\n", " num_units = 1\n", " activation = \"sigmoid\"\n", " else:\n", " num_units = num_classes\n", " activation = \"softmax\"\n", " self.dense = layers.Dense(num_units, activation=activation)\n", "\n", " def call(self, inputs):\n", " return self.dense(inputs)\n", "\n", "inputs = keras.Input(shape=(3,))\n", "features = layers.Dense(64, activation=\"relu\")(inputs)\n", "outputs = Classifier(num_classes=10)(features)\n", "model = keras.Model(inputs=inputs, outputs=outputs)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Creating a subclassed model that includes a Functional model**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "inputs = keras.Input(shape=(64,))\n", "outputs = layers.Dense(1, activation=\"sigmoid\")(inputs)\n", "binary_classifier = keras.Model(inputs=inputs, outputs=outputs)\n", "\n", "class MyModel(keras.Model):\n", "\n", " def __init__(self, num_classes=2):\n", " super().__init__()\n", " self.dense = layers.Dense(64, activation=\"relu\")\n", " self.classifier = binary_classifier\n", "\n", " def call(self, inputs):\n", " features = self.dense(inputs)\n", " return self.classifier(features)\n", "\n", "model = MyModel()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Remember: Use the right tool for the job" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Using built-in training and evaluation loops" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**The standard workflow: `compile()`, `fit()`, `evaluate()`, `predict()`**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "from tensorflow.keras.datasets import mnist\n", "\n", "def get_mnist_model():\n", " inputs = keras.Input(shape=(28 * 28,))\n", " features = layers.Dense(512, activation=\"relu\")(inputs)\n", " features = layers.Dropout(0.5)(features)\n", " outputs = layers.Dense(10, activation=\"softmax\")(features)\n", " model = keras.Model(inputs, outputs)\n", " return model\n", "\n", "(images, labels), (test_images, test_labels) = mnist.load_data()\n", "images = images.reshape((60000, 28 * 28)).astype(\"float32\") / 255\n", "test_images = test_images.reshape((10000, 28 * 28)).astype(\"float32\") / 255\n", "train_images, val_images = images[10000:], images[:10000]\n", "train_labels, val_labels = labels[10000:], labels[:10000]\n", "\n", "model = get_mnist_model()\n", "model.compile(optimizer=\"rmsprop\",\n", " loss=\"sparse_categorical_crossentropy\",\n", " metrics=[\"accuracy\"])\n", "model.fit(train_images, train_labels,\n", " epochs=3,\n", " validation_data=(val_images, val_labels))\n", "test_metrics = model.evaluate(test_images, test_labels)\n", "predictions = model.predict(test_images)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Writing your own metrics" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Implementing a custom metric by subclassing the `Metric` class**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import tensorflow as tf\n", "\n", "class RootMeanSquaredError(keras.metrics.Metric):\n", "\n", " def __init__(self, name=\"rmse\", **kwargs):\n", " super().__init__(name=name, **kwargs)\n", " self.mse_sum = self.add_weight(name=\"mse_sum\", initializer=\"zeros\")\n", " self.total_samples = self.add_weight(\n", " name=\"total_samples\", initializer=\"zeros\", dtype=\"int32\")\n", "\n", " def update_state(self, y_true, y_pred, sample_weight=None):\n", " y_true = tf.one_hot(y_true, depth=tf.shape(y_pred)[1])\n", " mse = tf.reduce_sum(tf.square(y_true - y_pred))\n", " self.mse_sum.assign_add(mse)\n", " num_samples = tf.shape(y_pred)[0]\n", " self.total_samples.assign_add(num_samples)\n", "\n", " def result(self):\n", " return tf.sqrt(self.mse_sum / tf.cast(self.total_samples, tf.float32))\n", "\n", " def reset_state(self):\n", " self.mse_sum.assign(0.)\n", " self.total_samples.assign(0)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = get_mnist_model()\n", "model.compile(optimizer=\"rmsprop\",\n", " loss=\"sparse_categorical_crossentropy\",\n", " metrics=[\"accuracy\", RootMeanSquaredError()])\n", "model.fit(train_images, train_labels,\n", " epochs=3,\n", " validation_data=(val_images, val_labels))\n", "test_metrics = model.evaluate(test_images, test_labels)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Using callbacks" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### The EarlyStopping and ModelCheckpoint callbacks" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Using the `callbacks` argument in the `fit()` method**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "callbacks_list = [\n", " keras.callbacks.EarlyStopping(\n", " monitor=\"val_accuracy\",\n", " patience=2,\n", " ),\n", " keras.callbacks.ModelCheckpoint(\n", " filepath=\"checkpoint_path.keras\",\n", " monitor=\"val_loss\",\n", " save_best_only=True,\n", " )\n", "]\n", "model = get_mnist_model()\n", "model.compile(optimizer=\"rmsprop\",\n", " loss=\"sparse_categorical_crossentropy\",\n", " metrics=[\"accuracy\"])\n", "model.fit(train_images, train_labels,\n", " epochs=10,\n", " callbacks=callbacks_list,\n", " validation_data=(val_images, val_labels))" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = keras.models.load_model(\"checkpoint_path.keras\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Writing your own callbacks" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Creating a custom callback by subclassing the `Callback` class**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "from matplotlib import pyplot as plt\n", "\n", "class LossHistory(keras.callbacks.Callback):\n", " def on_train_begin(self, logs):\n", " self.per_batch_losses = []\n", "\n", " def on_batch_end(self, batch, logs):\n", " self.per_batch_losses.append(logs.get(\"loss\"))\n", "\n", " def on_epoch_end(self, epoch, logs):\n", " plt.clf()\n", " plt.plot(range(len(self.per_batch_losses)), self.per_batch_losses,\n", " label=\"Training loss for each batch\")\n", " plt.xlabel(f\"Batch (epoch {epoch})\")\n", " plt.ylabel(\"Loss\")\n", " plt.legend()\n", " plt.savefig(f\"plot_at_epoch_{epoch}\")\n", " self.per_batch_losses = []" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = get_mnist_model()\n", "model.compile(optimizer=\"rmsprop\",\n", " loss=\"sparse_categorical_crossentropy\",\n", " metrics=[\"accuracy\"])\n", "model.fit(train_images, train_labels,\n", " epochs=10,\n", " callbacks=[LossHistory()],\n", " validation_data=(val_images, val_labels))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Monitoring and visualization with TensorBoard" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = get_mnist_model()\n", "model.compile(optimizer=\"rmsprop\",\n", " loss=\"sparse_categorical_crossentropy\",\n", " metrics=[\"accuracy\"])\n", "\n", "tensorboard = keras.callbacks.TensorBoard(\n", " log_dir=\"/full_path_to_your_log_dir\",\n", ")\n", "model.fit(train_images, train_labels,\n", " epochs=10,\n", " validation_data=(val_images, val_labels),\n", " callbacks=[tensorboard])" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "%load_ext tensorboard\n", "%tensorboard --logdir /full_path_to_your_log_dir" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Writing your own training and evaluation loops" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Training versus inference" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Low-level usage of metrics" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "metric = keras.metrics.SparseCategoricalAccuracy()\n", "targets = [0, 1, 2]\n", "predictions = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]\n", "metric.update_state(targets, predictions)\n", "current_result = metric.result()\n", "print(f\"result: {current_result:.2f}\")" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "values = [0, 1, 2, 3, 4]\n", "mean_tracker = keras.metrics.Mean()\n", "for value in values:\n", " mean_tracker.update_state(value)\n", "print(f\"Mean of values: {mean_tracker.result():.2f}\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### A complete training and evaluation loop" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Writing a step-by-step training loop: the training step function**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = get_mnist_model()\n", "\n", "loss_fn = keras.losses.SparseCategoricalCrossentropy()\n", "optimizer = keras.optimizers.RMSprop()\n", "metrics = [keras.metrics.SparseCategoricalAccuracy()]\n", "loss_tracking_metric = keras.metrics.Mean()\n", "\n", "def train_step(inputs, targets):\n", " with tf.GradientTape() as tape:\n", " predictions = model(inputs, training=True)\n", " loss = loss_fn(targets, predictions)\n", " gradients = tape.gradient(loss, model.trainable_weights)\n", " optimizer.apply_gradients(zip(gradients, model.trainable_weights))\n", "\n", " logs = {}\n", " for metric in metrics:\n", " metric.update_state(targets, predictions)\n", " logs[metric.name] = metric.result()\n", "\n", " loss_tracking_metric.update_state(loss)\n", " logs[\"loss\"] = loss_tracking_metric.result()\n", " return logs" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Writing a step-by-step training loop: resetting the metrics**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def reset_metrics():\n", " for metric in metrics:\n", " metric.reset_state()\n", " loss_tracking_metric.reset_state()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Writing a step-by-step training loop: the loop itself**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "training_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))\n", "training_dataset = training_dataset.batch(32)\n", "epochs = 3\n", "for epoch in range(epochs):\n", " reset_metrics()\n", " for inputs_batch, targets_batch in training_dataset:\n", " logs = train_step(inputs_batch, targets_batch)\n", " print(f\"Results at the end of epoch {epoch}\")\n", " for key, value in logs.items():\n", " print(f\"...{key}: {value:.4f}\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Writing a step-by-step evaluation loop**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def test_step(inputs, targets):\n", " predictions = model(inputs, training=False)\n", " loss = loss_fn(targets, predictions)\n", "\n", " logs = {}\n", " for metric in metrics:\n", " metric.update_state(targets, predictions)\n", " logs[\"val_\" + metric.name] = metric.result()\n", "\n", " loss_tracking_metric.update_state(loss)\n", " logs[\"val_loss\"] = loss_tracking_metric.result()\n", " return logs\n", "\n", "val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))\n", "val_dataset = val_dataset.batch(32)\n", "reset_metrics()\n", "for inputs_batch, targets_batch in val_dataset:\n", " logs = test_step(inputs_batch, targets_batch)\n", "print(\"Evaluation results:\")\n", "for key, value in logs.items():\n", " print(f\"...{key}: {value:.4f}\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Make it fast with tf.function" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Adding a `tf.function` decorator to our evaluation-step function**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "@tf.function\n", "def test_step(inputs, targets):\n", " predictions = model(inputs, training=False)\n", " loss = loss_fn(targets, predictions)\n", "\n", " logs = {}\n", " for metric in metrics:\n", " metric.update_state(targets, predictions)\n", " logs[\"val_\" + metric.name] = metric.result()\n", "\n", " loss_tracking_metric.update_state(loss)\n", " logs[\"val_loss\"] = loss_tracking_metric.result()\n", " return logs\n", "\n", "val_dataset = tf.data.Dataset.from_tensor_slices((val_images, val_labels))\n", "val_dataset = val_dataset.batch(32)\n", "reset_metrics()\n", "for inputs_batch, targets_batch in val_dataset:\n", " logs = test_step(inputs_batch, targets_batch)\n", "print(\"Evaluation results:\")\n", "for key, value in logs.items():\n", " print(f\"...{key}: {value:.4f}\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Leveraging fit() with a custom training loop" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "**Implementing a custom training step to use with `fit()`**" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "loss_fn = keras.losses.SparseCategoricalCrossentropy()\n", "loss_tracker = keras.metrics.Mean(name=\"loss\")\n", "\n", "class CustomModel(keras.Model):\n", " def train_step(self, data):\n", " inputs, targets = data\n", " with tf.GradientTape() as tape:\n", " predictions = self(inputs, training=True)\n", " loss = loss_fn(targets, predictions)\n", " gradients = tape.gradient(loss, self.trainable_weights)\n", " self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))\n", "\n", " loss_tracker.update_state(loss)\n", " return {\"loss\": loss_tracker.result()}\n", "\n", " @property\n", " def metrics(self):\n", " return [loss_tracker]" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "inputs = keras.Input(shape=(28 * 28,))\n", "features = layers.Dense(512, activation=\"relu\")(inputs)\n", "features = layers.Dropout(0.5)(features)\n", "outputs = layers.Dense(10, activation=\"softmax\")(features)\n", "model = CustomModel(inputs, outputs)\n", "\n", "model.compile(optimizer=keras.optimizers.RMSprop())\n", "model.fit(train_images, train_labels, epochs=3)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "class CustomModel(keras.Model):\n", " def train_step(self, data):\n", " inputs, targets = data\n", " with tf.GradientTape() as tape:\n", " predictions = self(inputs, training=True)\n", " loss = self.compiled_loss(targets, predictions)\n", " gradients = tape.gradient(loss, self.trainable_weights)\n", " self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))\n", " self.compiled_metrics.update_state(targets, predictions)\n", " return {m.name: m.result() for m in self.metrics}" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "inputs = keras.Input(shape=(28 * 28,))\n", "features = layers.Dense(512, activation=\"relu\")(inputs)\n", "features = layers.Dropout(0.5)(features)\n", "outputs = layers.Dense(10, activation=\"softmax\")(features)\n", "model = CustomModel(inputs, outputs)\n", "\n", "model.compile(optimizer=keras.optimizers.RMSprop(),\n", " loss=keras.losses.SparseCategoricalCrossentropy(),\n", " metrics=[keras.metrics.SparseCategoricalAccuracy()])\n", "model.fit(train_images, train_labels, epochs=3)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Summary" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "chapter07_working-with-keras.i", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 0 }