{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Installing packages:\n", "\t.package(path: \"/home/ubuntu/fastai_docs/dev_swift/FastaiNotebook_02_fully_connected\")\n", "\t\tFastaiNotebook_02_fully_connected\n", "With SwiftPM flags: []\n", "Working in: /tmp/tmp7mqq1mmn\n", "Fetching https://github.com/mxcl/Path.swift\n", "Fetching https://github.com/JustHTTP/Just\n", "Completed resolution in 1.83s\n", "Cloning https://github.com/mxcl/Path.swift\n", "Resolving https://github.com/mxcl/Path.swift at 0.16.2\n", "Cloning https://github.com/JustHTTP/Just\n", "Resolving https://github.com/JustHTTP/Just at 0.7.1\n", "Compile Swift Module 'Path' (9 sources)\n", "Compile Swift Module 'Just' (1 sources)\n", "Compile Swift Module 'FastaiNotebook_02_fully_connected' (4 sources)\n", "Compile Swift Module 'jupyterInstalledPackages' (1 sources)\n", "Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so\n", "Initializing Swift...\n", "Loading library...\n", "Installation complete!\n" ] } ], "source": [ "%install '.package(path: \"$cwd/FastaiNotebook_02_fully_connected\")' FastaiNotebook_02_fully_connected" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import FastaiNotebook_02_fully_connected" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "import Foundation\n", "import TensorFlow\n", "import Path" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Does nn.Conv2d init work well?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "var (xTrain, yTrain, xValid, yValid) = loadMNIST(path: Path.home/\".fastai\"/\"data\"/\"mnist_tst\")\n", "let (trainMean, trainStd) = (xTrain.mean(), xTrain.standardDeviation())\n", "xTrain = normalize(xTrain, mean: trainMean, std: trainStd)\n", "xValid = normalize(xValid, mean: trainMean, std: trainStd)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TensorShape(dimensions: [60000, 28, 28, 1]) TensorShape(dimensions: [10000, 28, 28, 1])\r\n" ] } ], "source": [ "xTrain = xTrain.reshaped(to: [xTrain.shape[0], 28, 28, 1])\n", "xValid = xValid.reshaped(to: [xValid.shape[0], 28, 28, 1])\n", "print(xTrain.shape, xValid.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let images = xTrain.shape[0]\n", "let classes = xValid.max() + 1\n", "let channels = 32" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "var layer1 = FAConv2D(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let x = xValid[0..<100]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ TensorShape\n", " ▿ dimensions : 4 elements\n", " - 0 : 100\n", " - 1 : 28\n", " - 2 : 28\n", " - 3 : 1\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "extension Tensor where Scalar: TensorFlowFloatingPoint {\n", " func stats() -> (mean: Tensor, std: Tensor) {\n", " return (mean: self.mean(), std: self.standardDeviation())\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " ▿ filter : 2 elements\n", " - mean : 0.0002112739\n", " - std : [[[[0.049460452]]]]\n", " ▿ bias : 2 elements\n", " - mean : 0.0\n", " - std : [0.0]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(filter: layer1.filter.stats(), bias: layer1.bias.stats())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let result = layer1.applied(to: x)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : 0.00045435934\n", " - std : [[[[0.27018127]]]]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "extension Tensor where Scalar: TensorFlowFloatingPoint {\n", " init(kaimingNormal shape: TensorShape, negativeSlope: Double = 1.0) {\n", " // Assumes Leaky ReLU nonlinearity\n", " let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))\n", " let spatialDimCount = shape.count - 2\n", " let receptiveField = shape[0..(\n", " _ x: Tensor,\n", " negativeSlope: Double = 0.0\n", ") -> Tensor {\n", " return max(0, x) + T(negativeSlope) * min(0, x)\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : 0.5224916\n", " - std : [[[[0.9566001]]]]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 0.0)\n", "leakyRelu(layer1.applied(to: x)).stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : 0.086791426\n", " - std : [[[[0.1612546]]]]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "var layer1 = FAConv2D(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)\n", "leakyRelu(layer1.applied(to: x)).stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ TensorShape\n", " ▿ dimensions : 4 elements\n", " - 0 : 5\n", " - 1 : 5\n", " - 2 : 1\n", " - 3 : 32\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "layer1.filter.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "25\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "let spatialDimCount = layer1.filter.rank - 2\n", "let receptiveField = layer1.filter.shape[0.. Double {\n", " return sqrt(2.0 / (1.0 + pow(negativeSlope, 2.0)))\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 5 elements\n", " - .0 : 1.0\n", " - .1 : 1.4142135623730951\n", " - .2 : 1.4141428569978354\n", " - .3 : 1.4071950894605838\n", " - .4 : 0.5773502691896257\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(gain(1.0), gain(0.0), gain(0.01), gain(0.1), gain(sqrt(5.0)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.5760468]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(2 * Tensor(randomUniform: [10000]) - 1).standardDeviation()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5773502691896258\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "1.0 / sqrt(3.0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "extension Tensor where Scalar: TensorFlowFloatingPoint {\n", " init(kaimingUniform shape: TensorShape, negativeSlope: Double = 1.0) {\n", " // Assumes Leaky ReLU nonlinearity\n", " let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))\n", " let spatialDimCount = shape.count - 2\n", " let receptiveField = shape[0..(\n", " filterShape: (5, 5, 1, 8), strides: (2, 2), padding: .same, activation: relu\n", " )\n", " public var conv2 = FAConv2D(\n", " filterShape: (3, 3, 8, 16), strides: (2, 2), padding: .same, activation: relu\n", " )\n", " public var conv3 = FAConv2D(\n", " filterShape: (3, 3, 16, 32), strides: (2, 2), padding: .same, activation: relu\n", " )\n", " public var conv4 = FAConv2D(\n", " filterShape: (3, 3, 32, 1), strides: (2, 2), padding: .valid\n", " )\n", " public var flatten = Flatten()\n", " @differentiable\n", " public func applied(to input: Tensor, in context: Context) -> Tensor {\n", " return input.sequenced(\n", " in: context,\n", " through: conv1, conv2, conv3, conv4, flatten\n", " )\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let y = Tensor(yValid[0..<100])\n", "var model = Model()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : 0.16834545\n", " - std : [[0.06720749]]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "let prediction = model.applied(to: x)\n", "prediction.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : -0.2158769\n", " - std : [[[[0.36486608]]]]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "let gradients = gradient(at: model) { model in\n", " meanSquaredError(predicted: model.applied(\n", " to: x,\n", " in: Context(learningPhase: .training)\n", " ), expected: y)\n", "}\n", "\n", "gradients.conv1.filter.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for keyPath in [\\Model.conv1, \\Model.conv2, \\Model.conv3, \\Model.conv4] {\n", " model[keyPath: keyPath].filter = Tensor(kaimingUniform: model[keyPath: keyPath].filter.shape)\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : 0.06297659\n", " - std : [[0.17011806]]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "let prediction = model.applied(to: x)\n", "prediction.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : -0.26000252\n", " - std : [[[[0.26875323]]]]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "let gradients = gradient(at: model) { model in\n", " meanSquaredError(predicted: model.applied(\n", " to: x,\n", " in: Context(learningPhase: .training)\n", " ), expected: y)\n", "}\n", "\n", "gradients.conv1.filter.stats()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Export" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "notebookToScript(fname: (Path.cwd / \"02a_why_sqrt5.ipynb\").string)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Swift", "language": "swift", "name": "swift" } }, "nbformat": 4, "nbformat_minor": 1 }