{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Installing packages:\n", "\t.package(path: \"/home/ubuntu/dev_swift/FastaiNotebook_09_optimizer\")\n", "\t\tFastaiNotebook_09_optimizer\n", "With SwiftPM flags: []\n", "Working in: /tmp/tmpe2ebj_t2/swift-install\n", "Updating https://github.com/latenitesoft/NotebookExport\n", "Updating https://github.com/mxcl/Path.swift\n", "Updating https://github.com/JustHTTP/Just\n", "Completed resolution in 3.98s\n", "Compile Swift Module 'FastaiNotebook_09_optimizer' (14 sources)\n", "Compile Swift Module 'jupyterInstalledPackages' (1 sources)\n", "Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so\n", "Initializing Swift...\n", "Installation complete!\n" ] } ], "source": [ "%install-location $cwd/swift-install\n", "%install '.package(path: \"$cwd/FastaiNotebook_09_optimizer\")' FastaiNotebook_09_optimizer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "import Path\n", "import TensorFlow" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import FastaiNotebook_09_optimizer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('inline', 'module://ipykernel.pylab.backend_inline')\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%include \"EnableIPythonDisplay.swift\"\n", "IPythonDisplay.shell.enable_matplotlib(\"inline\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "//TODO: switch to imagenette when possible to train" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let data = mnistDataBunch(flat: true)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let (n,m) = (60000,784)\n", "let c = 10\n", "let nHid = 50" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: sgdOpt(lr: 0.1), modelInit: modelInit)\n", "let recorder = learner.makeDefaultDelegates(metrics: [accuracy])\n", "learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: [0.34678498, 0.9046] \n", " \r" ] } ], "source": [ "learner.fit(1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Mixup" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "extension RandomDistribution {\n", " // Returns a batch of samples.\n", " func next(\n", " _ count: Int, using generator: inout G\n", " ) -> [Sample] {\n", " var result: [Sample] = []\n", " for _ in 0.. [Sample] {\n", " return next(count, using: &ThreefryRandomNumberGenerator.global)\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Mixup requires one-hot encoded targets since we don't have a loss function with no reduction." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "extension Learner{\n", " public class MixupDelegate: Delegate {\n", " private var distribution: BetaDistribution\n", " \n", " public init(alpha: Float = 0.4){\n", " distribution = BetaDistribution(alpha: alpha, beta: alpha)\n", " }\n", " \n", " override public func batchWillStart(learner: Learner) {\n", " if let xb = learner.currentInput {\n", " if let yb = learner.currentTarget as? Tensor{\n", " var lambda = Tensor(distribution.next(Int(yb.shape[0])))\n", " lambda = max(lambda, 1-lambda)\n", " let shuffle = Raw.randomShuffle(value: Tensor(0.. MixupDelegate {\n", " return MixupDelegate(alpha: alpha)\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let (n,m) = (60000,784)\n", "let c = 10\n", "let nHid = 50" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We need to one-hot encode the targets" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "var train1 = data.train.innerDs.map { DataBatch(xb: $0.xb, \n", " yb: Raw.oneHot(indices: $0.yb, depth: TI(10), onValue: TF(1), offValue: TF(0))) }" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "var valid1 = data.valid.innerDs.map { DataBatch(xb: $0.xb, \n", " yb: Raw.oneHot(indices: $0.yb, depth: TI(10), onValue: TF(1), offValue: TF(0))) }" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let data1 = DataBunch(train: train1, valid: valid1, trainLen: data.train.dsCount, \n", " validLen: data.valid.dsCount, bs: data.train.bs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "func accuracyFloat(_ out: TF, _ targ: TF) -> TF {\n", " return TF(out.argmax(squeezingAxis: 1) .== targ.argmax(squeezingAxis: 1)).mean()\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data1, lossFunc: softmaxCrossEntropy, optFunc: sgdOpt(lr: 0.1), modelInit: modelInit)\n", "let recorder = learner.makeRecorder()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeShowProgress(), \n", " learner.makeAvgMetric(metrics: [accuracyFloat]), recorder,\n", " learner.makeMixupDelegate(alpha: 0.2)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: [0.53149545, 0.9138] \n", "Epoch 1: [0.50207776, 0.9218] \n", " \r" ] } ], "source": [ "learner.fit(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Labelsmoothing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "@differentiable(wrt: out)\n", "public func labelSmoothingCrossEntropy(_ out: TF, _ targ: TI, ε: Float = 0.1) -> TF {\n", " let c = out.shape[1]\n", " let loss = softmaxCrossEntropy(logits: out, labels: targ)\n", " let logPreds = logSoftmax(out)\n", " return (1-ε) * loss - (ε / Float(c)) * logPreds.mean()\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@differentiable(wrt: out)\n", "func lossFunc(_ out: TF, _ targ: TI) -> TF { return labelSmoothingCrossEntropy(out, targ, ε: 0.1) }" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunc: lossFunc, optFunc: sgdOpt(lr: 0.1), modelInit: modelInit)\n", "let recorder = learner.makeDefaultDelegates(metrics: [accuracy])\n", "learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: [0.29444185, 0.9296] \n", "Epoch 1: [0.26258504, 0.9429] \n", " \r" ] } ], "source": [ "learner.fit(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Export" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "success\r\n" ] } ], "source": [ "import NotebookExport\n", "let exporter = NotebookExport(Path.cwd/\"10_mixup_ls.ipynb\")\n", "print(exporter.export(usingPrefix: \"FastaiNotebook_\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Swift", "language": "swift", "name": "swift" } }, "nbformat": 4, "nbformat_minor": 2 }