{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Installing packages:\n", "\t.package(path: \"/home/jupyter/notebooks/swift/FastaiNotebook_02_fully_connected\")\n", "\t\tFastaiNotebook_02_fully_connected\n", "With SwiftPM flags: []\n", "Working in: /tmp/tmpf7bhzuzj/swift-install\n", "[1/5] Compiling FastaiNotebook_02_fully_connected 01_matmul.swift\n", "[2/5] Compiling FastaiNotebook_02_fully_connected 02_fully_connected.swift\n", "[3/5] Compiling FastaiNotebook_02_fully_connected 00_load_data.swift\n", "[4/5] Compiling FastaiNotebook_02_fully_connected 01a_fastai_layers.swift\n", "[5/6] Merging module FastaiNotebook_02_fully_connected\n", "[6/7] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift\n", "[7/8] Merging module jupyterInstalledPackages\n", "[8/8] Linking libjupyterInstalledPackages.so\n", "Initializing Swift...\n", "Installation complete!\n" ] } ], "source": [ "%install-location $cwd/swift-install\n", "%install '.package(path: \"$cwd/FastaiNotebook_02_fully_connected\")' FastaiNotebook_02_fully_connected" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "import Foundation\n", "import TensorFlow\n", "import Path" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import FastaiNotebook_02_fully_connected" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Does nn.Conv2d init work well?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "var (xTrain, yTrain, xValid, yValid) = loadMNIST(path: Path.home/\".fastai\"/\"data\"/\"mnist_tst\")\n", "let (trainMean, trainStd) = (xTrain.mean(), xTrain.standardDeviation())\n", "xTrain = normalize(xTrain, mean: trainMean, std: trainStd)\n", "xValid = normalize(xValid, mean: trainMean, std: trainStd)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[60000, 28, 28, 1] [10000, 28, 28, 1]\r\n" ] } ], "source": [ "xTrain = xTrain.reshaped(to: [xTrain.shape[0], 28, 28, 1])\n", "xValid = xValid.reshaped(to: [xValid.shape[0], 28, 28, 1])\n", "print(xTrain.shape, xValid.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let images = xTrain.shape[0]\n", "let classes = xValid.max() + 1\n", "let channels = 32" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "var layer1 = FAConv2D(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let x = xValid[0..<100]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ [100, 28, 28, 1]\n", " ▿ dimensions : 4 elements\n", " - 0 : 100\n", " - 1 : 28\n", " - 2 : 28\n", " - 3 : 1\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "extension Tensor where Scalar: TensorFlowFloatingPoint {\n", " func stats() -> (mean: Tensor, std: Tensor) {\n", " return (mean: mean(), std: standardDeviation())\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " ▿ filter : 2 elements\n", " - mean : -0.0027464556\n", " - std : 0.19631124\n", " ▿ bias : 2 elements\n", " - mean : 0.0\n", " - std : 0.0\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(filter: layer1.filter.stats(), bias: layer1.bias.stats())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "withDevice(.cpu){\n", " let result = layer1(x)\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let result = layer1(x)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : 0.00048066635\n", " - std : 0.9185965\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.stats()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This is in 1a now so this code is disabled from here:\n", "\n", "```swift\n", "var rng = PhiloxRandomNumberGenerator.global\n", "\n", "extension Tensor where Scalar: TensorFlowFloatingPoint {\n", " init(kaimingNormal shape: TensorShape, negativeSlope: Double = 1.0) {\n", " // Assumes Leaky ReLU nonlinearity\n", " let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))\n", " let spatialDimCount = shape.count - 2\n", " let receptiveField = shape[0..(\n", " _ x: Tensor,\n", " negativeSlope: Double = 0.0\n", ") -> Tensor {\n", " return max(0, x) + T(negativeSlope) * min(0, x)\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : 0.40438622\n", " - std : 0.8042958\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 0.0)\n", "leakyRelu(layer1(x)).stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : 0.3136924\n", " - std : 0.6081149\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "var layer1 = FAConv2D(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)\n", "leakyRelu(layer1(x)).stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ [5, 5, 1, 32]\n", " ▿ dimensions : 4 elements\n", " - 0 : 5\n", " - 1 : 5\n", " - 2 : 1\n", " - 3 : 32\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "layer1.filter.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "25\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "let spatialDimCount = layer1.filter.rank - 2\n", "let receptiveField = layer1.filter.shape[0.. Double {\n", " return sqrt(2.0 / (1.0 + pow(negativeSlope, 2.0)))\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 5 elements\n", " - .0 : 1.0\n", " - .1 : 1.4142135623730951\n", " - .2 : 1.4141428569978354\n", " - .3 : 1.4071950894605838\n", " - .4 : 0.5773502691896257\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(gain(1.0), gain(0.0), gain(0.01), gain(0.1), gain(sqrt(5.0)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5790101\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(2 * Tensor(randomUniform: [10000]) - 1).standardDeviation()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5773502691896258\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "1.0 / sqrt(3.0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "extension Tensor where Scalar: TensorFlowFloatingPoint {\n", " init(kaimingUniform shape: TensorShape, negativeSlope: Double = 1.0) {\n", " // Assumes Leaky ReLU nonlinearity\n", " let gain = Scalar.init(TensorFlow.sqrt(2.0 / (1.0 + TensorFlow.pow(negativeSlope, 2))))\n", " let spatialDimCount = shape.count - 2\n", " let receptiveField = shape[0..(\n", " filterShape: (5, 5, 1, 8), strides: (2, 2), padding: .same, activation: relu\n", " )\n", " public var conv2 = FAConv2D(\n", " filterShape: (3, 3, 8, 16), strides: (2, 2), padding: .same, activation: relu\n", " )\n", " public var conv3 = FAConv2D(\n", " filterShape: (3, 3, 16, 32), strides: (2, 2), padding: .same, activation: relu\n", " )\n", " public var conv4 = FAConv2D(\n", " filterShape: (3, 3, 32, 1), strides: (2, 2), padding: .valid\n", " )\n", " public var flatten = Flatten()\n", "\n", " @differentiable\n", " public func callAsFunction(_ input: Tensor) -> Tensor {\n", " return input.sequenced(through: conv1, conv2, conv3, conv4, flatten)\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let y = Tensor(yValid[0..<100])\n", "var model = Model()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : 0.1729667\n", " - std : 0.12520388\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "let prediction = model(x)\n", "prediction.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : -0.056964096\n", " - std : 0.2776651\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "let gradients = gradient(at: model) { model in\n", " meanSquaredError(predicted: model(x), expected: y)\n", "}\n", "\n", "gradients.conv1.filter.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for keyPath in [\\Model.conv1, \\Model.conv2, \\Model.conv3, \\Model.conv4] {\n", " model[keyPath: keyPath].filter = Tensor(kaimingUniform: model[keyPath: keyPath].filter.shape)\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : -0.37689942\n", " - std : 0.32016334\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "let prediction = model(x)\n", "prediction.stats()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - mean : 0.100767136\n", " - std : 0.54216325\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "let gradients = gradient(at: model) { model in\n", " meanSquaredError(predicted: model(x), expected: y)\n", "}\n", "\n", "gradients.conv1.filter.stats()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Export" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "success\r\n" ] } ], "source": [ "import NotebookExport\n", "let exporter = NotebookExport(Path.cwd/\"02a_why_sqrt5.ipynb\")\n", "print(exporter.export(usingPrefix: \"FastaiNotebook_\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Swift", "language": "swift", "name": "swift" } }, "nbformat": 4, "nbformat_minor": 1 }