{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThe book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io)." ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "!pip install keras keras-hub --upgrade -q" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import os\n", "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "cellView": "form", "colab_type": "code" }, "outputs": [], "source": [ "# @title\n", "import os\n", "from IPython.core.magic import register_cell_magic\n", "\n", "@register_cell_magic\n", "def backend(line, cell):\n", " current, required = os.environ.get(\"KERAS_BACKEND\", \"\"), line.split()[-1]\n", " if current == required:\n", " get_ipython().run_cell(cell)\n", " else:\n", " print(\n", " f\"This cell requires the {required} backend. To run it, change KERAS_BACKEND to \"\n", " f\"\\\"{required}\\\" at the top of the notebook, restart the runtime, and rerun the notebook.\"\n", " )" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## The mathematical building blocks of neural networks" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### A first look at a neural network" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "from keras.datasets import mnist\n", "\n", "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "train_images.shape" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "len(train_labels)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "train_labels" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "test_images.shape" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "len(test_labels)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "test_labels" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import keras\n", "from keras import layers\n", "\n", "model = keras.Sequential(\n", " [\n", " layers.Dense(512, activation=\"relu\"),\n", " layers.Dense(10, activation=\"softmax\"),\n", " ]\n", ")" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.compile(\n", " optimizer=\"adam\",\n", " loss=\"sparse_categorical_crossentropy\",\n", " metrics=[\"accuracy\"],\n", ")" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "train_images = train_images.reshape((60000, 28 * 28))\n", "train_images = train_images.astype(\"float32\") / 255\n", "test_images = test_images.reshape((10000, 28 * 28))\n", "test_images = test_images.astype(\"float32\") / 255" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.fit(train_images, train_labels, epochs=5, batch_size=128)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "test_digits = test_images[0:10]\n", "predictions = model.predict(test_digits)\n", "predictions[0]" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "predictions[0].argmax()" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "predictions[0][7]" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "test_labels[0]" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "test_loss, test_acc = model.evaluate(test_images, test_labels)\n", "print(f\"test_acc: {test_acc}\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Data representations for neural networks" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Scalars (rank-0 tensors)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import numpy as np\n", "x = np.array(12)\n", "x" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "x.ndim" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Vectors (rank-1 tensors)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "x = np.array([12, 3, 6, 14, 7])\n", "x" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "x.ndim" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Matrices (rank-2 tensors)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "x = np.array([[5, 78, 2, 34, 0],\n", " [6, 79, 3, 35, 1],\n", " [7, 80, 4, 36, 2]])\n", "x.ndim" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Rank-3 tensors and higher-rank tensors" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "x = np.array([[[5, 78, 2, 34, 0],\n", " [6, 79, 3, 35, 1],\n", " [7, 80, 4, 36, 2]],\n", " [[5, 78, 2, 34, 0],\n", " [6, 79, 3, 35, 1],\n", " [7, 80, 4, 36, 2]],\n", " [[5, 78, 2, 34, 0],\n", " [6, 79, 3, 35, 1],\n", " [7, 80, 4, 36, 2]]])\n", "x.ndim" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Key attributes" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "from keras.datasets import mnist\n", "\n", "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "train_images.ndim" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "train_images.shape" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "train_images.dtype" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "digit = train_images[4]\n", "plt.imshow(digit, cmap=plt.cm.binary)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "train_labels[4]" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Manipulating tensors in NumPy" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "my_slice = train_images[10:100]\n", "my_slice.shape" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "my_slice = train_images[10:100, :, :]\n", "my_slice.shape" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "my_slice = train_images[10:100, 0:28, 0:28]\n", "my_slice.shape" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "my_slice = train_images[:, 14:, 14:]" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "my_slice = train_images[:, 7:-7, 7:-7]" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### The notion of data batches" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "batch = train_images[:128]" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "batch = train_images[128:256]" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "n = 3\n", "batch = train_images[128 * n : 128 * (n + 1)]" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Real-world examples of data tensors" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "##### Vector data" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "##### Timeseries data or sequence data" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "##### Image data" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "##### Video data" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### The gears of neural networks: Tensor operations" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Element-wise operations" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def naive_relu(x):\n", " assert len(x.shape) == 2\n", " x = x.copy()\n", " for i in range(x.shape[0]):\n", " for j in range(x.shape[1]):\n", " x[i, j] = max(x[i, j], 0)\n", " return x" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def naive_add(x, y):\n", " assert len(x.shape) == 2\n", " assert x.shape == y.shape\n", " x = x.copy()\n", " for i in range(x.shape[0]):\n", " for j in range(x.shape[1]):\n", " x[i, j] += y[i, j]\n", " return x" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import time\n", "\n", "x = np.random.random((20, 100))\n", "y = np.random.random((20, 100))\n", "\n", "t0 = time.time()\n", "for _ in range(1000):\n", " z = x + y\n", " z = np.maximum(z, 0.0)\n", "print(\"Took: {0:.2f} s\".format(time.time() - t0))" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "t0 = time.time()\n", "for _ in range(1000):\n", " z = naive_add(x, y)\n", " z = naive_relu(z)\n", "print(\"Took: {0:.2f} s\".format(time.time() - t0))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Broadcasting" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import numpy as np\n", "\n", "X = np.random.random((32, 10))\n", "y = np.random.random((10,))" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "y = np.expand_dims(y, axis=0)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "Y = np.tile(y, (32, 1))" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def naive_add_matrix_and_vector(x, y):\n", " assert len(x.shape) == 2\n", " assert len(y.shape) == 1\n", " assert x.shape[1] == y.shape[0]\n", " x = x.copy()\n", " for i in range(x.shape[0]):\n", " for j in range(x.shape[1]):\n", " x[i, j] += y[j]\n", " return x" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import numpy as np\n", "\n", "x = np.random.random((64, 3, 32, 10))\n", "y = np.random.random((32, 10))\n", "z = np.maximum(x, y)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Tensor product" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "x = np.random.random((32,))\n", "y = np.random.random((32,))\n", "\n", "z = np.matmul(x, y)\n", "z = x @ y" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def naive_vector_product(x, y):\n", " assert len(x.shape) == 1\n", " assert len(y.shape) == 1\n", " assert x.shape[0] == y.shape[0]\n", " z = 0.0\n", " for i in range(x.shape[0]):\n", " z += x[i] * y[i]\n", " return z" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def naive_matrix_vector_product(x, y):\n", " assert len(x.shape) == 2\n", " assert len(y.shape) == 1\n", " assert x.shape[1] == y.shape[0]\n", " z = np.zeros(x.shape[0])\n", " for i in range(x.shape[0]):\n", " for j in range(x.shape[1]):\n", " z[i] += x[i, j] * y[j]\n", " return z" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def naive_matrix_vector_product(x, y):\n", " z = np.zeros(x.shape[0])\n", " for i in range(x.shape[0]):\n", " z[i] = naive_vector_product(x[i, :], y)\n", " return z" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "def naive_matrix_product(x, y):\n", " assert len(x.shape) == 2\n", " assert len(y.shape) == 2\n", " assert x.shape[1] == y.shape[0]\n", " z = np.zeros((x.shape[0], y.shape[1]))\n", " for i in range(x.shape[0]):\n", " for j in range(y.shape[1]):\n", " row_x = x[i, :]\n", " column_y = y[:, j]\n", " z[i, j] = naive_vector_product(row_x, column_y)\n", " return z" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Tensor reshaping" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "train_images = train_images.reshape((60000, 28 * 28))" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "x = np.array([[0., 1.],\n", " [2., 3.],\n", " [4., 5.]])\n", "x.shape" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "x = x.reshape((6, 1))\n", "x" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "x = x.reshape((2, 3))\n", "x" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "x = np.zeros((300, 20))\n", "x = np.transpose(x)\n", "x.shape" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Geometric interpretation of tensor operations" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### A geometric interpretation of deep learning" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### The engine of neural networks: Gradient-based optimization" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### What's a derivative?" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Derivative of a tensor operation: The gradient" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Stochastic gradient descent" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Chaining derivatives: The Backpropagation algorithm" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "##### The chain rule" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "##### Automatic differentiation with computation graphs" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "### Looking back at our first example" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()\n", "train_images = train_images.reshape((60000, 28 * 28))\n", "train_images = train_images.astype(\"float32\") / 255\n", "test_images = test_images.reshape((10000, 28 * 28))\n", "test_images = test_images.astype(\"float32\") / 255" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = keras.Sequential(\n", " [\n", " layers.Dense(512, activation=\"relu\"),\n", " layers.Dense(10, activation=\"softmax\"),\n", " ]\n", ")" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.compile(\n", " optimizer=\"adam\",\n", " loss=\"sparse_categorical_crossentropy\",\n", " metrics=[\"accuracy\"],\n", ")" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.fit(\n", " train_images,\n", " train_labels,\n", " epochs=5,\n", " batch_size=128,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Reimplementing our first example from scratch" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "##### A simple Dense class" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import keras\n", "from keras import ops\n", "\n", "class NaiveDense:\n", " def __init__(self, input_size, output_size, activation=None):\n", " self.activation = activation\n", " self.W = keras.Variable(\n", " shape=(input_size, output_size), initializer=\"uniform\"\n", " )\n", " self.b = keras.Variable(shape=(output_size,), initializer=\"zeros\")\n", "\n", " def __call__(self, inputs):\n", " x = ops.matmul(inputs, self.W)\n", " x = x + self.b\n", " if self.activation is not None:\n", " x = self.activation(x)\n", " return x\n", "\n", " @property\n", " def weights(self):\n", " return [self.W, self.b]" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "##### A simple Sequential class" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "class NaiveSequential:\n", " def __init__(self, layers):\n", " self.layers = layers\n", "\n", " def __call__(self, inputs):\n", " x = inputs\n", " for layer in self.layers:\n", " x = layer(x)\n", " return x\n", "\n", " @property\n", " def weights(self):\n", " weights = []\n", " for layer in self.layers:\n", " weights += layer.weights\n", " return weights" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model = NaiveSequential(\n", " [\n", " NaiveDense(input_size=28 * 28, output_size=512, activation=ops.relu),\n", " NaiveDense(input_size=512, output_size=10, activation=ops.softmax),\n", " ]\n", ")\n", "assert len(model.weights) == 4" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "##### A batch generator" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import math\n", "\n", "class BatchGenerator:\n", " def __init__(self, images, labels, batch_size=128):\n", " assert len(images) == len(labels)\n", " self.index = 0\n", " self.images = images\n", " self.labels = labels\n", " self.batch_size = batch_size\n", " self.num_batches = math.ceil(len(images) / batch_size)\n", "\n", " def next(self):\n", " images = self.images[self.index : self.index + self.batch_size]\n", " labels = self.labels[self.index : self.index + self.batch_size]\n", " self.index += self.batch_size\n", " return images, labels" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Running one training step" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "##### The weight update step" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "learning_rate = 1e-3\n", "\n", "def update_weights(gradients, weights):\n", " for g, w in zip(gradients, weights):\n", " w.assign(w - g * learning_rate)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "from keras import optimizers\n", "\n", "optimizer = optimizers.SGD(learning_rate=1e-3)\n", "\n", "def update_weights(gradients, weights):\n", " optimizer.apply_gradients(zip(gradients, weights))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "##### Gradient computation" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "%%backend tensorflow\n", "import tensorflow as tf\n", "\n", "x = tf.zeros(shape=())\n", "with tf.GradientTape() as tape:\n", " y = 2 * x + 3\n", "grad_of_y_wrt_x = tape.gradient(y, x)" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "%%backend tensorflow\n", "def one_training_step(model, images_batch, labels_batch):\n", " with tf.GradientTape() as tape:\n", " predictions = model(images_batch)\n", " loss = ops.sparse_categorical_crossentropy(labels_batch, predictions)\n", " average_loss = ops.mean(loss)\n", " gradients = tape.gradient(average_loss, model.weights)\n", " update_weights(gradients, model.weights)\n", " return average_loss" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### The full training loop" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "%%backend tensorflow\n", "def fit(model, images, labels, epochs, batch_size=128):\n", " for epoch_counter in range(epochs):\n", " print(f\"Epoch {epoch_counter}\")\n", " batch_generator = BatchGenerator(images, labels)\n", " for batch_counter in range(batch_generator.num_batches):\n", " images_batch, labels_batch = batch_generator.next()\n", " loss = one_training_step(model, images_batch, labels_batch)\n", " if batch_counter % 100 == 0:\n", " print(f\"loss at batch {batch_counter}: {loss:.2f}\")" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "%%backend tensorflow\n", "from keras.datasets import mnist\n", "\n", "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()\n", "\n", "train_images = train_images.reshape((60000, 28 * 28))\n", "train_images = train_images.astype(\"float32\") / 255\n", "test_images = test_images.reshape((10000, 28 * 28))\n", "test_images = test_images.astype(\"float32\") / 255\n", "\n", "fit(model, train_images, train_labels, epochs=10, batch_size=128)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "#### Evaluating the model" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "%%backend tensorflow\n", "predictions = model(test_images)\n", "predicted_labels = ops.argmax(predictions, axis=1)\n", "matches = predicted_labels == test_labels\n", "f\"accuracy: {ops.mean(matches):.2f}\"" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [], "name": "chapter02_mathematical-building-blocks", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 0 }