{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Installing packages:\n",
      "\t.package(path: \"/home/ubuntu/dev_swift/FastaiNotebook_09_optimizer\")\n",
      "\t\tFastaiNotebook_09_optimizer\n",
      "With SwiftPM flags: []\n",
      "Working in: /tmp/tmpe2ebj_t2/swift-install\n",
      "Updating https://github.com/latenitesoft/NotebookExport\n",
      "Updating https://github.com/mxcl/Path.swift\n",
      "Updating https://github.com/JustHTTP/Just\n",
      "Completed resolution in 3.98s\n",
      "Compile Swift Module 'FastaiNotebook_09_optimizer' (14 sources)\n",
      "Compile Swift Module 'jupyterInstalledPackages' (1 sources)\n",
      "Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so\n",
      "Initializing Swift...\n",
      "Installation complete!\n"
     ]
    }
   ],
   "source": [
    "%install-location $cwd/swift-install\n",
    "%install '.package(path: \"$cwd/FastaiNotebook_09_optimizer\")' FastaiNotebook_09_optimizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "// export\n",
    "import Path\n",
    "import TensorFlow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import FastaiNotebook_09_optimizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('inline', 'module://ipykernel.pylab.backend_inline')\n"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%include \"EnableIPythonDisplay.swift\"\n",
    "IPythonDisplay.shell.enable_matplotlib(\"inline\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "//TODO: switch to imagenette when possible to train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "let data = mnistDataBunch(flat: true)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "let (n,m) = (60000,784)\n",
    "let c = 10\n",
    "let nHid = 50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: sgdOpt(lr: 0.1), modelInit: modelInit)\n",
    "let recorder = learner.makeDefaultDelegates(metrics: [accuracy])\n",
    "learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0: [0.34678498, 0.9046]                                                  \n",
      "                                                                           \r"
     ]
    }
   ],
   "source": [
    "learner.fit(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Mixup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "extension RandomDistribution {\n",
    "    // Returns a batch of samples.\n",
    "    func next<G: RandomNumberGenerator>(\n",
    "        _ count: Int, using generator: inout G\n",
    "    ) -> [Sample] {\n",
    "        var result: [Sample] = []\n",
    "        for _ in 0..<count {\n",
    "            result.append(next(using: &generator))\n",
    "        }\n",
    "        return result\n",
    "    }\n",
    "\n",
    "    // Returns a batch of samples, using the global Threefry RNG.\n",
    "    func next(_ count: Int) -> [Sample] {\n",
    "        return next(count, using: &ThreefryRandomNumberGenerator.global)\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Mixup requires one-hot encoded targets since we don't have a loss function with no reduction."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "extension Learner{\n",
    "    public class MixupDelegate: Delegate {\n",
    "        private var distribution: BetaDistribution\n",
    "        \n",
    "        public init(alpha: Float = 0.4){\n",
    "            distribution = BetaDistribution(alpha: alpha, beta: alpha)\n",
    "        }\n",
    "        \n",
    "        override public func batchWillStart(learner: Learner) {\n",
    "            if let xb = learner.currentInput {\n",
    "                if let yb = learner.currentTarget as? Tensor<Float>{\n",
    "                    var lambda = Tensor<Float>(distribution.next(Int(yb.shape[0])))\n",
    "                    lambda = max(lambda, 1-lambda)\n",
    "                    let shuffle = Raw.randomShuffle(value: Tensor<Int32>(0..<Int32(yb.shape[0])))\n",
    "                    let xba = Raw.gather(params: xb, indices: shuffle)\n",
    "                    let yba = Raw.gather(params: yb, indices: shuffle)\n",
    "                    lambda = lambda.expandingShape(at: 1)\n",
    "                    learner.currentInput = lambda * xb + (1-lambda) * xba\n",
    "                    learner.currentTarget = (lambda * yb + (1-lambda) * yba) as! Label\n",
    "                }\n",
    "            }\n",
    "        }\n",
    "    }\n",
    "    \n",
    "    public func makeMixupDelegate(alpha: Float = 0.4) -> MixupDelegate {\n",
    "        return MixupDelegate(alpha: alpha)\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "let (n,m) = (60000,784)\n",
    "let c = 10\n",
    "let nHid = 50"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We need to one-hot encode the targets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "var train1 = data.train.innerDs.map { DataBatch<TF,TF>(xb: $0.xb, \n",
    "                            yb: Raw.oneHot(indices: $0.yb, depth: TI(10), onValue: TF(1), offValue: TF(0))) }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "var valid1 = data.valid.innerDs.map { DataBatch<TF,TF>(xb: $0.xb, \n",
    "                            yb: Raw.oneHot(indices: $0.yb, depth: TI(10), onValue: TF(1), offValue: TF(0))) }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "let data1 = DataBunch(train: train1, valid: valid1, trainLen: data.train.dsCount, \n",
    "                  validLen: data.valid.dsCount, bs: data.train.bs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "func accuracyFloat(_ out: TF, _ targ: TF) -> TF {\n",
    "    return TF(out.argmax(squeezingAxis: 1) .== targ.argmax(squeezingAxis: 1)).mean()\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "let learner = Learner(data: data1, lossFunc: softmaxCrossEntropy, optFunc: sgdOpt(lr: 0.1), modelInit: modelInit)\n",
    "let recorder = learner.makeRecorder()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeShowProgress(), \n",
    "                     learner.makeAvgMetric(metrics: [accuracyFloat]), recorder,\n",
    "                     learner.makeMixupDelegate(alpha: 0.2)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0: [0.53149545, 0.9138]                                                  \n",
      "Epoch 1: [0.50207776, 0.9218]                                                  \n",
      "                                                                             \r"
     ]
    }
   ],
   "source": [
    "learner.fit(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Labelsmoothing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "//export\n",
    "@differentiable(wrt: out)\n",
    "public func labelSmoothingCrossEntropy(_ out: TF, _ targ: TI, ε: Float = 0.1) -> TF {\n",
    "    let c = out.shape[1]\n",
    "    let loss = softmaxCrossEntropy(logits: out, labels: targ)\n",
    "    let logPreds = logSoftmax(out)\n",
    "    return (1-ε) * loss - (ε / Float(c)) * logPreds.mean()\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@differentiable(wrt: out)\n",
    "func lossFunc(_ out: TF, _ targ: TI) -> TF { return labelSmoothingCrossEntropy(out, targ, ε: 0.1) }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "let learner = Learner(data: data, lossFunc: lossFunc, optFunc: sgdOpt(lr: 0.1), modelInit: modelInit)\n",
    "let recorder = learner.makeDefaultDelegates(metrics: [accuracy])\n",
    "learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0: [0.29444185, 0.9296]                                                  \n",
      "Epoch 1: [0.26258504, 0.9429]                                                  \n",
      "                                                                             \r"
     ]
    }
   ],
   "source": [
    "learner.fit(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Export"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "success\r\n"
     ]
    }
   ],
   "source": [
    "import NotebookExport\n",
    "let exporter = NotebookExport(Path.cwd/\"10_mixup_ls.ipynb\")\n",
    "print(exporter.export(usingPrefix: \"FastaiNotebook_\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Swift",
   "language": "swift",
   "name": "swift"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}