{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Installing packages:\n", "\t.package(path: \"/home/ubuntu/fastai_docs/dev_swift/FastaiNotebook_08a_heterogeneous_dictionary\")\n", "\t\tFastaiNotebook_08a_heterogeneous_dictionary\n", "With SwiftPM flags: []\n", "Working in: /tmp/tmperd7354d/swift-install\n", "Fetching https://github.com/mxcl/Path.swift\n", "Fetching https://github.com/JustHTTP/Just\n", "Completed resolution in 4.11s\n", "Cloning https://github.com/mxcl/Path.swift\n", "Resolving https://github.com/mxcl/Path.swift at 0.16.2\n", "Cloning https://github.com/JustHTTP/Just\n", "Resolving https://github.com/JustHTTP/Just at 0.7.1\n", "Compile Swift Module 'Just' (1 sources)\n", "Compile Swift Module 'Path' (9 sources)\n", "Compile Swift Module 'FastaiNotebook_08a_heterogeneous_dictionary' (14 sources)\n", "Compile Swift Module 'jupyterInstalledPackages' (1 sources)\n", "Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so\n", "Initializing Swift...\n", "Installation complete!\n" ] } ], "source": [ "%install '.package(path: \"$cwd/FastaiNotebook_08a_heterogeneous_dictionary\")' FastaiNotebook_08a_heterogeneous_dictionary" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('inline', 'module://ipykernel.pylab.backend_inline')\n" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import FastaiNotebook_08a_heterogeneous_dictionary\n", "%include \"EnableIPythonDisplay.swift\"\n", "IPythonDisplay.shell.enable_matplotlib(\"inline\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "// export\n", "import Path\n", "import TensorFlow" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "let path = downloadImagette()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "let il = ItemList(fromFolder: path, extensions: [\"jpeg\", \"jpg\"])\n", "let sd = SplitData(il, fromFunc: {grandParentSplitter(fName: $0, valid: \"val\")})\n", "var (procItem,procLabel) = (NoopProcessor(),CategoryProcessor())\n", "let sld = SplitLabeledData(sd, fromFunc: parentLabeler, procItem: &procItem, procLabel: &procLabel)\n", "var rawData = sld.toDataBunch(itemToTensor: pathsToTensor, labelToTensor: intsToTensor)\n", "let data = transformData(rawData, tfmItem: { openAndResize(fname: $0, size: 128) })" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "let data = mnistDataBunch(flat: true)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "let (n,m) = (60000,784)\n", "let c = 10\n", "let nHid = 50" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Stateful optimizer" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "//export\n", "open class StatDelegate {\n", " open var name: String { return \"\" }\n", " var defaultConfig: HeterogeneousDictionary { return HeterogeneousDictionary() }\n", " func update(\n", " state: inout [String: Tensor],\n", " for param: Tensor,\n", " along direction: Tensor,\n", " config: inout HeterogeneousDictionary\n", " ) { }\n", "}\n", "\n", "//export\n", "open class StepDelegate {\n", " var defaultConfig: HeterogeneousDictionary { return HeterogeneousDictionary() }\n", " func update(\n", " param: inout Tensor,\n", " along direction: inout Tensor,\n", " state: [String: Tensor],\n", " config: inout HeterogeneousDictionary\n", " ) { }\n", "}" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "//export\n", "class StatefulOptimizer: Optimizer\n", " where Model.AllDifferentiableVariables == Model.CotangentVector{\n", " var configs: [HeterogeneousDictionary]\n", " var learningRate: Float {\n", " get { return configs.last![LearningRate()] } \n", " set { \n", " for i in configs.indices {self.configs[i][LearningRate()] = newValue }\n", " }\n", " }\n", " var learningRates: [Float] {\n", " get {\n", " var res: [Float] = []\n", " for config in configs {res.append(config[LearningRate()])}\n", " return res\n", " }\n", " set { \n", " for i in configs.indices {self.configs[i][LearningRate()] = newValue[i] } \n", " }\n", " }\n", " var splits: (Int) -> Int\n", " var states: [String: Model.AllDifferentiableVariables]\n", " var statDelegates: [StatDelegate]\n", " var stepDelegates: [StepDelegate]\n", " init(\n", " stepDelegates: [StepDelegate],\n", " statDelegates: [StatDelegate],\n", " configs: [HeterogeneousDictionary],\n", " splits: @escaping (Int) -> Int\n", " ) {\n", " self.configs = Array(repeating: HeterogeneousDictionary(), count: configs.count)\n", " states = [:]\n", " for stepDelegate in stepDelegates {\n", " for i in self.configs.indices { self.configs[i].merge(stepDelegate.defaultConfig) { (_, new) in new } }\n", " }\n", " for statDelegate in statDelegates {\n", " for i in self.configs.indices { self.configs[i].merge(statDelegate.defaultConfig) { (_, new) in new } }\n", " states[statDelegate.name] = Model.AllDifferentiableVariables.zero\n", " }\n", " for i in 0...self).enumerated() {\n", " var grad = direction[keyPath: kp]\n", " var state = states.mapValues(){$0[keyPath: kp]}\n", " var config = configs[splits(i)]\n", " for statDelegate in statDelegates {\n", " statDelegate.update(\n", " state: &state,\n", " for: model[keyPath: kp],\n", " along: grad,\n", " config: &config\n", " )\n", " }\n", " for n in states.keys { states[n]![keyPath: kp] = state[n]! }\n", " for stepDelegate in stepDelegates {\n", " stepDelegate.update(\n", " param: &model[keyPath: kp],\n", " along: &grad,\n", " state: state,\n", " config: &config\n", " )\n", " }\n", " }\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "//export\n", "class SGDStep: StepDelegate {\n", " override func update(\n", " param: inout Tensor,\n", " along direction: inout Tensor,\n", " state: [String: Tensor],\n", " config: inout HeterogeneousDictionary\n", " ) {\n", " param -= direction * config[LearningRate()]\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "//export\n", "public struct WeightDecayKey: HetDictKey, Equatable {\n", " public static var defaultValue: Float = 0.0\n", "}\n", "\n", "class WeightDecay: StepDelegate {\n", " override func update(\n", " param: inout Tensor,\n", " along direction: inout Tensor,\n", " state: [String: Tensor],\n", " config: inout HeterogeneousDictionary\n", " ) {\n", " param *= 1 - config[LearningRate()] * config[WeightDecayKey()]\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "//export\n", "\n", "class L2Regularization: StepDelegate {\n", " override func update(\n", " param: inout Tensor,\n", " along direction: inout Tensor,\n", " state: [String: Tensor],\n", " config: inout HeterogeneousDictionary\n", " ) {\n", " direction += config[WeightDecayKey()] * param\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "//export\n", "\n", "public struct Momentum: HetDictKey, Equatable {\n", " public static var defaultValue: Float = 0.9\n", "}\n", "\n", "public struct MomentumDampening: HetDictKey, Equatable {\n", " public static var defaultValue: Float = 0.9\n", "}\n", "\n", "class AverageGrad: StatDelegate {\n", " let dampened: Bool\n", " init(dampened: Bool = false) { self.dampened = dampened }\n", " override var name: String { return \"averageGrad\" }\n", " override func update(\n", " state: inout [String: Tensor],\n", " for param: Tensor,\n", " along direction: Tensor,\n", " config: inout HeterogeneousDictionary\n", " ) {\n", " state[\"averageGrad\"]! *= config[Momentum()]\n", " config[MomentumDampening()] = 1.0 - (dampened ? config[Momentum()] : 0.0)\n", " state[\"averageGrad\"]! += config[MomentumDampening()] * direction\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "func split_func(_ a: Int) -> Int { return a < 2 ? 0 : 1 }" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "var configs = [HeterogeneousDictionary(LearningRate(), 0.0), HeterogeneousDictionary(LearningRate(), 0.01)]\n", "let opt = StatefulOptimizer(stepDelegates: [SGDStep()], statDelegates: [], \n", " configs: configs, splits: split_func)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)\n", "let recorder = learner.makeDefaultDelegates(metrics: [accuracy])\n", "learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "let params = learner.model.allDifferentiableVariables" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 0.0148044545, -0.018168775, 0.08359044, 0.032670785, -0.059204474, 0.0033588395,\r\n", " -0.040620767, -0.04872285, 0.038860243, -0.076237716, 0.032958053, -0.025008192,\r\n", " -0.045404717, -0.055635635, 0.061349068, 0.023430856, 0.06070709, -0.04115163,\r\n", " -0.07197424, -0.061004944, 0.05786184, 0.07555689, -0.03056003, -0.0058024437,\r\n", " 0.074971415, -0.011336141, -0.017656116, -0.034722082, -0.04728878, 0.07238687,\r\n", " -0.07602549, -0.057238247, 0.030227048, -0.0347825, -0.038982436, -0.055281255,\r\n", " 0.07062517, -0.038831223, 0.025091609, -0.0579995, -0.044969853, 0.06816071,\r\n", " -0.06059001, 0.002419782, -0.046240397, 0.0035123578, 0.036387447, -0.07582915,\r\n", " -0.04411659, 0.05419134]\r\n", "0.0\r\n", "[ -0.25084066, 0.21469887, -0.13831557, 0.31209216, -0.045463495, -0.2620387,\r\n", " -0.1689401, 0.118945606, 0.17115703, -0.07897408]\r\n", "0.0\r\n" ] } ], "source": [ "for kp in params.recursivelyAllWritableKeyPaths(to: TF.self) { \n", " print(params[keyPath: kp][0]) \n", "}" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: [1.2309564, 0.6487] \n", "Epoch 1: [0.99529856, 0.7125] \n", " \r" ] } ], "source": [ "learner.fit(2)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 0.0148044545, -0.018168775, 0.08359044, 0.032670785, -0.059204474, 0.0033588395,\r\n", " -0.040620767, -0.04872285, 0.038860243, -0.076237716, 0.032958053, -0.025008192,\r\n", " -0.045404717, -0.055635635, 0.061349068, 0.023430856, 0.06070709, -0.04115163,\r\n", " -0.07197424, -0.061004944, 0.05786184, 0.07555689, -0.03056003, -0.0058024437,\r\n", " 0.074971415, -0.011336141, -0.017656116, -0.034722082, -0.04728878, 0.07238687,\r\n", " -0.07602549, -0.057238247, 0.030227048, -0.0347825, -0.038982436, -0.055281255,\r\n", " 0.07062517, -0.038831223, 0.025091609, -0.0579995, -0.044969853, 0.06816071,\r\n", " -0.06059001, 0.002419782, -0.046240397, 0.0035123578, 0.036387447, -0.07582915,\r\n", " -0.04411659, 0.05419134]\r\n", "0.0\r\n", "[ 0.05842939, 0.19405742, 0.22415991, 0.613552, -0.52648324, -0.042704947,\r\n", " -0.020534433, -0.24653265, 0.10602849, -0.4876513]\r\n", "-0.048730064\r\n" ] } ], "source": [ "let params = learner.model.allDifferentiableVariables\n", "for kp in params.recursivelyAllWritableKeyPaths(to: TF.self) { \n", " print(params[keyPath: kp][0]) \n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "class MomentumStep: StepDelegate {\n", " override func update(\n", " param: inout Tensor,\n", " along direction: inout Tensor,\n", " state: [String: Tensor],\n", " config: inout HeterogeneousDictionary\n", " ) {\n", " param -= config[LearningRate()] * state[\"averageGrad\"]!\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let opt = StatefulOptimizer(stepDelegates: [MomentumStep()], statDelegates: [AverageGrad()], \n", " config: HeterogeneousDictionary(LearningRate(), 0.01))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)\n", "let recorder = learner.makeDefaultDelegates(metrics: [accuracy])\n", "learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: [0.17444672, 0.9463] \n", "Epoch 1: [0.13051678, 0.9596] \n", " \r" ] } ], "source": [ "learner.fit(2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "\n", "public struct SquareMomentum: HetDictKey, Equatable {\n", " public static var defaultValue: Float = 0.99\n", "}\n", "\n", "public struct SquareMomentumDampening: HetDictKey, Equatable {\n", " public static var defaultValue: Float = 0.99\n", "}\n", "\n", "\n", "class AverageSquaredGrad: StatDelegate {\n", " let dampened: Bool\n", " init(dampened: Bool = false) { self.dampened = dampened }\n", " override var name: String { return \"averageSquaredGrad\" }\n", " override func update(\n", " state: inout [String: Tensor],\n", " for param: Tensor,\n", " along direction: Tensor,\n", " config: inout HeterogeneousDictionary\n", " ) {\n", " state[\"averageSquaredGrad\"]! *= config[SquareMomentum()]\n", " config[SquareMomentumDampening()] = 1.0 - (dampened ? config[SquareMomentum()] : 0.0)\n", " state[\"averageSquaredGrad\"]! += config[SquareMomentumDampening()] * direction.squared()\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "class StepCount: StatDelegate {\n", " override var name: String { return \"step\" }\n", " override func update(\n", " state: inout [String: Tensor],\n", " for param: Tensor,\n", " along direction: Tensor,\n", " config: inout HeterogeneousDictionary\n", " ) {\n", " state[\"step\"]! += 1.0\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "func debias(\n", " momentum: Scalar,\n", " dampening: Scalar,\n", " step: Tensor \n", ") -> Tensor {\n", " return dampening * (1 - pow(momentum, step)) / (1 - momentum)\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "public struct Epsilon: HetDictKey, Equatable {\n", " public static var defaultValue: Float = 1e-5\n", "}\n", "\n", "class AdamStep: StepDelegate {\n", " override func update(\n", " param: inout Tensor,\n", " along direction: inout Tensor,\n", " state: [String: Tensor],\n", " config: inout HeterogeneousDictionary\n", " ) {\n", " let debiasedLearningRate = config[LearningRate()] / debias(\n", " momentum: config[Momentum()],\n", " dampening: config[MomentumDampening()],\n", " step: state[\"step\"]!\n", " )\n", " let debiasedRMSGrad = sqrt(state[\"averageSquaredGrad\"]! / debias(\n", " momentum: config[SquareMomentum()],\n", " dampening: config[SquareMomentumDampening()],\n", " step: state[\"step\"]!\n", " )) + config[Epsilon()]\n", " param -= debiasedLearningRate * state[\"averageGrad\"]! / debiasedRMSGrad\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let opt = StatefulOptimizer(\n", " stepDelegates: [AdamStep()], \n", " statDelegates: [AverageGrad(), AverageSquaredGrad(), StepCount()], \n", " config: HeterogeneousDictionary(LearningRate(), 0.01))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)\n", "let recorder = learner.makeDefaultDelegates(metrics: [accuracy])\n", "learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: [0.22817639, 0.9364] \n", "Epoch 1: [0.20807356, 0.9413] \n", " \r" ] } ], "source": [ "learner.fit(2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class LambStep: StepDelegate {\n", " override var defaultConfig: HeterogeneousDictionary {\n", " return HeterogeneousDictionary(Epsilon(), 1e-6, WeightDecayKey(), 0.0)\n", " }\n", " override func update(\n", " param: inout Tensor,\n", " along direction: inout Tensor,\n", " state: [String: Tensor],\n", " config: inout HeterogeneousDictionary\n", " ) {\n", " let debiasedAverageGrad = state[\"averageGrad\"]! / debias(\n", " momentum: config[Momentum()],\n", " dampening: config[MomentumDampening()],\n", " step: state[\"step\"]!\n", " )\n", " let debiasedRMSGrad = sqrt(state[\"averageSquaredGrad\"]! / debias(\n", " momentum: config[SquareMomentum()],\n", " dampening: config[SquareMomentumDampening()],\n", " step: state[\"step\"]!\n", " ) + config[Epsilon()])\n", " let step = debiasedAverageGrad / debiasedRMSGrad + config[WeightDecayKey()] * param\n", " let r1 = sqrt((param * param).mean())\n", " let r2 = sqrt((step * step).mean())\n", " let factor = min(r1 / r2, Float(10.0))\n", " param -= config[LearningRate()] * factor * step\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Export" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "notebookToScript(fname: (Path.cwd / \"09_optimizer.ipynb\").string)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Swift", "language": "swift", "name": "swift" }, "language_info": { "file_extension": ".swift", "mimetype": "text/x-swift", "name": "swift", "version": "" } }, "nbformat": 4, "nbformat_minor": 1 }