{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Installing packages:\n",
      "\t.package(path: \"/home/ubuntu/fastai_docs/dev_swift/FastaiNotebook_02_fully_connected\")\n",
      "\t\tFastaiNotebook_02_fully_connected\n",
      "With SwiftPM flags: []\n",
      "Working in: /tmp/tmp7mqq1mmn\n",
      "Fetching https://github.com/mxcl/Path.swift\n",
      "Fetching https://github.com/JustHTTP/Just\n",
      "Completed resolution in 1.83s\n",
      "Cloning https://github.com/mxcl/Path.swift\n",
      "Resolving https://github.com/mxcl/Path.swift at 0.16.2\n",
      "Cloning https://github.com/JustHTTP/Just\n",
      "Resolving https://github.com/JustHTTP/Just at 0.7.1\n",
      "Compile Swift Module 'Path' (9 sources)\n",
      "Compile Swift Module 'Just' (1 sources)\n",
      "Compile Swift Module 'FastaiNotebook_02_fully_connected' (4 sources)\n",
      "Compile Swift Module 'jupyterInstalledPackages' (1 sources)\n",
      "Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so\n",
      "Initializing Swift...\n",
      "Loading library...\n",
      "Installation complete!\n"
     ]
    }
   ],
   "source": [
    "%install '.package(path: \"$cwd/FastaiNotebook_02_fully_connected\")' FastaiNotebook_02_fully_connected"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import FastaiNotebook_02_fully_connected"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "// export\n",
    "import Foundation\n",
    "import TensorFlow\n",
    "import Path"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Does nn.Conv2d init work well?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "var (xTrain, yTrain, xValid, yValid) = loadMNIST(path: Path.home/\".fastai\"/\"data\"/\"mnist_tst\")\n",
    "let (trainMean, trainStd) = (xTrain.mean(), xTrain.standardDeviation())\n",
    "xTrain = normalize(xTrain, mean: trainMean, std: trainStd)\n",
    "xValid = normalize(xValid, mean: trainMean, std: trainStd)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TensorShape(dimensions: [60000, 28, 28, 1]) TensorShape(dimensions: [10000, 28, 28, 1])\r\n"
     ]
    }
   ],
   "source": [
    "xTrain = xTrain.reshaped(to: [xTrain.shape[0], 28, 28, 1])\n",
    "xValid = xValid.reshaped(to: [xValid.shape[0], 28, 28, 1])\n",
    "print(xTrain.shape, xValid.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "let images = xTrain.shape[0]\n",
    "let classes = xValid.max() + 1\n",
    "let channels = 32"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "var layer1 = FAConv2D<Float>(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "let x = xValid[0..<100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ TensorShape\n",
       "  ▿ dimensions : 4 elements\n",
       "    - 0 : 100\n",
       "    - 1 : 28\n",
       "    - 2 : 28\n",
       "    - 3 : 1\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "extension Tensor where Scalar: TensorFlowFloatingPoint {\n",
    "    func stats() -> (mean: Tensor, std: Tensor) {\n",
    "        return (mean: self.mean(), std: self.standardDeviation())\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 2 elements\n",
       "  ▿ filter : 2 elements\n",
       "    - mean : 0.0002112739\n",
       "    - std : [[[[0.049460452]]]]\n",
       "  ▿ bias : 2 elements\n",
       "    - mean : 0.0\n",
       "    - std : [0.0]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(filter: layer1.filter.stats(), bias: layer1.bias.stats())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "let result = layer1.applied(to: x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 2 elements\n",
       "  - mean : 0.00045435934\n",
       "  - std : [[[[0.27018127]]]]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "// export\n",
    "extension Tensor where Scalar: TensorFlowFloatingPoint {\n",
    "    init(kaimingNormal shape: TensorShape, negativeSlope: Double = 1.0) {\n",
    "        // Assumes Leaky ReLU nonlinearity\n",
    "        let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))\n",
    "        let spatialDimCount = shape.count - 2\n",
    "        let receptiveField = shape[0..<spatialDimCount].contiguousSize\n",
    "        let fanIn = shape[shape.count - 2] * receptiveField\n",
    "        self.init(\n",
    "            randomNormal: shape,\n",
    "            stddev: gain / sqrt(Scalar(fanIn)),\n",
    "            generator: &PhiloxRandomNumberGenerator.global\n",
    "        )\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 2 elements\n",
       "  - mean : -0.005352263\n",
       "  - std : [[[[1.0782409]]]]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 1.0)\n",
    "layer1.applied(to: x).stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "// export\n",
    "func leakyRelu<T: TensorFlowFloatingPoint>(\n",
    "    _ x: Tensor<T>,\n",
    "    negativeSlope: Double = 0.0\n",
    ") -> Tensor<T> {\n",
    "    return max(0, x) + T(negativeSlope) * min(0, x)\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 2 elements\n",
       "  - mean : 0.5224916\n",
       "  - std : [[[[0.9566001]]]]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 0.0)\n",
    "leakyRelu(layer1.applied(to: x)).stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 2 elements\n",
       "  - mean : 0.086791426\n",
       "  - std : [[[[0.1612546]]]]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "var layer1 = FAConv2D<Float>(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)\n",
    "leakyRelu(layer1.applied(to: x)).stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ TensorShape\n",
       "  ▿ dimensions : 4 elements\n",
       "    - 0 : 5\n",
       "    - 1 : 5\n",
       "    - 2 : 1\n",
       "    - 3 : 32\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "layer1.filter.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "25\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "let spatialDimCount = layer1.filter.rank - 2\n",
    "let receptiveField = layer1.filter.shape[0..<spatialDimCount].contiguousSize\n",
    "receptiveField"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 32\r\n"
     ]
    }
   ],
   "source": [
    "let filtersIn = layer1.filter.shape[2]\n",
    "let filtersOut = layer1.filter.shape[3]\n",
    "print(filtersIn, filtersOut)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "25 800\r\n"
     ]
    }
   ],
   "source": [
    "let fanIn = filtersIn * receptiveField\n",
    "let fanOut = filtersOut * receptiveField\n",
    "print(fanIn, fanOut)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "func gain(_ negativeSlope: Double) -> Double {\n",
    "    return sqrt(2.0 / (1.0 + pow(negativeSlope, 2.0)))\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 5 elements\n",
       "  - .0 : 1.0\n",
       "  - .1 : 1.4142135623730951\n",
       "  - .2 : 1.4141428569978354\n",
       "  - .3 : 1.4071950894605838\n",
       "  - .4 : 0.5773502691896257\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(gain(1.0), gain(0.0), gain(0.01), gain(0.1), gain(sqrt(5.0)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.5760468]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(2 * Tensor<Float>(randomUniform: [10000]) - 1).standardDeviation()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5773502691896258\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "1.0 / sqrt(3.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "//export\n",
    "extension Tensor where Scalar: TensorFlowFloatingPoint {\n",
    "    init(kaimingUniform shape: TensorShape, negativeSlope: Double = 1.0) {\n",
    "        // Assumes Leaky ReLU nonlinearity\n",
    "        let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))\n",
    "        let spatialDimCount = shape.count - 2\n",
    "        let receptiveField = shape[0..<spatialDimCount].contiguousSize\n",
    "        let fanIn = shape[shape.count - 2] * receptiveField\n",
    "        let bound = sqrt(Scalar(3.0)) * gain / sqrt(Scalar(fanIn))\n",
    "        self = bound * (2 * Tensor(\n",
    "            randomUniform: shape,\n",
    "            generator: &PhiloxRandomNumberGenerator.global\n",
    "        ) - 1)\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 2 elements\n",
       "  - mean : 0.46286932\n",
       "  - std : [[[[0.89172804]]]]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "layer1.filter = Tensor(kaimingUniform: layer1.filter.shape, negativeSlope: 0.0)\n",
    "leakyRelu(layer1.applied(to: x)).stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 2 elements\n",
       "  - mean : 0.196594\n",
       "  - std : [[[[0.38863888]]]]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "layer1.filter = Tensor(kaimingUniform: layer1.filter.shape, negativeSlope: sqrt(5.0))\n",
    "leakyRelu(layer1.applied(to: x)).stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "public struct Model: Layer {\n",
    "    public var conv1 = FAConv2D<Float>(\n",
    "        filterShape: (5, 5, 1, 8),   strides: (2, 2), padding: .same, activation: relu\n",
    "    )\n",
    "    public var conv2 = FAConv2D<Float>(\n",
    "        filterShape: (3, 3, 8, 16),  strides: (2, 2), padding: .same, activation: relu\n",
    "    )\n",
    "    public var conv3 = FAConv2D<Float>(\n",
    "        filterShape: (3, 3, 16, 32), strides: (2, 2), padding: .same, activation: relu\n",
    "    )\n",
    "    public var conv4 = FAConv2D<Float>(\n",
    "        filterShape: (3, 3, 32, 1),  strides: (2, 2), padding: .valid\n",
    "    )\n",
    "    public var flatten = Flatten<Float>()\n",
    "    @differentiable\n",
    "    public func applied(to input: Tensor<Float>, in context: Context) -> Tensor<Float> {\n",
    "        return input.sequenced(\n",
    "            in: context,\n",
    "            through: conv1, conv2, conv3, conv4, flatten\n",
    "        )\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "let y = Tensor<Float>(yValid[0..<100])\n",
    "var model = Model()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 2 elements\n",
       "  - mean : 0.16834545\n",
       "  - std : [[0.06720749]]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "let prediction = model.applied(to: x)\n",
    "prediction.stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 2 elements\n",
       "  - mean : -0.2158769\n",
       "  - std : [[[[0.36486608]]]]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "let gradients = gradient(at: model) { model in\n",
    "    meanSquaredError(predicted: model.applied(\n",
    "        to: x,\n",
    "        in: Context(learningPhase: .training)\n",
    "    ), expected: y)\n",
    "}\n",
    "\n",
    "gradients.conv1.filter.stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for keyPath in [\\Model.conv1, \\Model.conv2, \\Model.conv3, \\Model.conv4] {\n",
    "    model[keyPath: keyPath].filter = Tensor(kaimingUniform: model[keyPath: keyPath].filter.shape)\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 2 elements\n",
       "  - mean : 0.06297659\n",
       "  - std : [[0.17011806]]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "let prediction = model.applied(to: x)\n",
    "prediction.stats()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "▿ 2 elements\n",
       "  - mean : -0.26000252\n",
       "  - std : [[[[0.26875323]]]]\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "let gradients = gradient(at: model) { model in\n",
    "    meanSquaredError(predicted: model.applied(\n",
    "        to: x,\n",
    "        in: Context(learningPhase: .training)\n",
    "    ), expected: y)\n",
    "}\n",
    "\n",
    "gradients.conv1.filter.stats()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Export"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "notebookToScript(fname: (Path.cwd / \"02a_why_sqrt5.ipynb\").string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Swift",
   "language": "swift",
   "name": "swift"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}