{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Callbacks" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Installing packages:\n", "\t.package(path: \"/home/ubuntu/fastai_docs/dev_swift/FastaiNotebook_03_minibatch_training\")\n", "\t\tFastaiNotebook_03_minibatch_training\n", "With SwiftPM flags: []\n", "Working in: /tmp/tmpgb29yv6i/swift-install\n", "Fetching https://github.com/mxcl/Path.swift\n", "Fetching https://github.com/JustHTTP/Just\n", "Completed resolution in 2.55s\n", "Cloning https://github.com/mxcl/Path.swift\n", "Resolving https://github.com/mxcl/Path.swift at 0.16.2\n", "Cloning https://github.com/JustHTTP/Just\n", "Resolving https://github.com/JustHTTP/Just at 0.7.1\n", "Compile Swift Module 'Path' (9 sources)\n", "Compile Swift Module 'Just' (1 sources)\n", "Compile Swift Module 'FastaiNotebook_03_minibatch_training' (7 sources)\n", "Compile Swift Module 'jupyterInstalledPackages' (1 sources)\n", "Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so\n", "Initializing Swift...\n", "Installation complete!\n" ] } ], "source": [ "%install '.package(path: \"$cwd/FastaiNotebook_03_minibatch_training\")' FastaiNotebook_03_minibatch_training" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import FastaiNotebook_03_minibatch_training" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "// export\n", "import Path\n", "import TensorFlow" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "var (xTrain,yTrain,xValid,yValid) = loadMNIST(path: mnistPath, flat: true)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "60000 784 10\r\n" ] } ], "source": [ "let (n,m) = (xTrain.shape[0],xTrain.shape[1])\n", "let c = yTrain.max().scalarized()+1\n", "print(n,m,c)\n", "let nHid = 50" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "// export\n", "public struct BasicModel: Layer {\n", " public var layer1: Dense\n", " public var layer2: Dense\n", " \n", " public init(nIn: Int, nHid: Int, nOut: Int){\n", " layer1 = Dense(inputSize: nIn, outputSize: nHid, activation: relu)\n", " layer2 = Dense(inputSize: nHid, outputSize: nOut)\n", " }\n", " \n", " @differentiable\n", " public func applied(to input: Tensor) -> Tensor {\n", " return input.sequenced(through: layer1, layer2)\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "var model = BasicModel(nIn: m, nHid: nHid, nOut: Int(c))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "//export \n", "public struct FADataset where Element: TensorGroup{\n", " public var innerDs: Dataset\n", " public var shuffle: Bool = false\n", " public var bs: Int = 64 \n", " public var dsCount: Int\n", " \n", " public var count: Int {\n", " return dsCount%bs == 0 ? dsCount/bs : dsCount/bs+1\n", " }\n", " \n", " public var ds: Dataset { \n", " if !shuffle { return innerDs.batched(Int64(bs))}\n", " let seed = Int64.random(in: Int64.min.., len: Int, shuffle: Bool = false, bs: Int = 64){\n", " (self.innerDs,self.dsCount,self.shuffle,self.bs) = (ds, len, shuffle, bs)\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "// export\n", "public struct DataBunch where Element: TensorGroup{\n", " public var train: FADataset\n", " public var valid: FADataset \n", " \n", " public init(train: Dataset, valid: Dataset, trainLen: Int, validLen: Int, bs: Int = 64) {\n", " self.train = FADataset(train, len: trainLen, shuffle: true, bs: bs)\n", " self.valid = FADataset(valid, len: validLen, shuffle: false, bs: 2*bs)\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "//export\n", "public func mnistDataBunch(path: Path = mnistPath, flat: Bool = false, bs: Int = 64\n", " ) -> DataBunch>{\n", " let (xTrain,yTrain,xValid,yValid) = loadMNIST(path: path, flat: flat)\n", " return DataBunch(train: Dataset(elements:DataBatch(xb:xTrain, yb:yTrain)), \n", " valid: Dataset(elements:DataBatch(xb:xValid, yb:yValid)),\n", " trainLen: xTrain.shape[0],\n", " validLen: xValid.shape[0],\n", " bs: bs)\n", "}" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "let data = mnistDataBunch(flat: true)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "938\n" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.train.count" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Shuffle test" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "232.35490609999997 ms\r\n" ] } ], "source": [ "time(repeating: 10) {\n", " var tst = data.train.ds\n", " var firstBatch: DataBatch? = nil\n", " for batch in tst{\n", " firstBatch = batch\n", " break\n", " }\n", " firstBatch!.yb\n", "}" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 2, 0, 9, 5, 1, 9, 2, 3, 9, 3, 1, 6, 6, 9, 5, 5, 5, 7, 9, 7, 0, 4, 1, 5, 9, 9, 2, 7, 6, 3, 7, 2, 1, 0, 2, 1, 5, 9, 3, 7, 4, 4, 3, 8, 7, 6, 3, 1, 1, 1, 2, 1, 0, 1, 9, 7, 1, 8, 9, 2, 0, 3, 4]\n" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "var tst = data.train.ds\n", "var firstBatch: DataBatch? = nil\n", "for batch in tst{\n", " firstBatch = batch\n", " break\n", "}\n", "firstBatch!.yb" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Learner" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "// export\n", "public enum LearnerAction: Error {\n", " case skipEpoch\n", " case skipBatch\n", " case stop\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Basic class" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "// export\n", "/// A model learner, responsible for initializing and training a model on a given dataset.\n", "public final class Learner\n", " where Opt.Scalar: Differentiable,\n", " // Constrain model input to Tensor, to work around\n", " // https://forums.fast.ai/t/fix-ad-crash-in-learner/42970.\n", " Opt.Model.Input == Tensor\n", "{\n", " // Common type aliases.\n", " public typealias Input = Model.Input\n", " public typealias Data = DataBunch>\n", " public typealias Loss = Tensor\n", " public typealias Optimizer = Opt\n", " public typealias Model = Optimizer.Model\n", " public typealias Variables = Model.AllDifferentiableVariables\n", " public typealias EventHandler = (Learner) throws -> Void\n", " \n", " /// A wrapper class to hold the loss function, to work around\n", " // https://forums.fast.ai/t/fix-ad-crash-in-learner/42970.\n", " public final class LossFunction {\n", " public typealias F = @differentiable (Model.Output, @nondiff Label) -> Loss\n", " public var f: F\n", " init(_ f: @escaping F) { self.f = f }\n", " }\n", " \n", " /// The dataset on which the model will be trained.\n", " public var data: Data\n", " /// The optimizer used for updating model parameters along gradient vectors.\n", " public var optimizer: Optimizer\n", " /// The function that computes a loss value when given a prediction and a label.\n", " public var lossFunction: LossFunction\n", " /// The model being trained.\n", " public var model: Model\n", " \n", " //Is there a better way to initialize those to not make them Optionals?\n", " public var currentInput: Input? = nil\n", " public var currentTarget: Label? = nil\n", " public var currentOutput: Model.Output? = nil\n", " \n", " /// The number of total epochs.\n", " public private(set) var epochCount: Int = .zero\n", " /// The current epoch.\n", " public private(set) var currentEpoch: Int = .zero\n", " /// The current gradient.\n", " public private(set) var currentGradient: Model.CotangentVector = .zero\n", " /// The current loss.\n", " public private(set) var currentLoss: Loss = .zero\n", " /// In training mode or not\n", " public private(set) var inTrain: Bool = false\n", " /// The current epoch + iteration, float between 0.0 and epochCount\n", " public private(set) var pctEpochs: Float = 0.0\n", " /// The current iteration\n", " public private(set) var currentIter: Int = 0\n", " /// The number of iterations in the current dataset\n", " public private(set) var iterCount: Int = 0\n", " \n", " open class Delegate {\n", " open var order: Int { return 0 }\n", " public init () {}\n", " \n", " open func trainingWillStart(learner: Learner) throws {}\n", " /// The completion of model training.\n", " open func trainingDidFinish(learner: Learner) throws {}\n", " /// A closure which will be called upon the start of an epoch.\n", " open func epochWillStart(learner: Learner) throws {}\n", " /// A closure which will be called upon the completion of an epoch.\n", " open func epochDidFinish(learner: Learner) throws {}\n", " /// A closure which will be called upon the start of model validation.\n", " open func validationWillStart(learner: Learner) throws {}\n", " /// A closure which will be called upon the start of training on a batch.\n", " open func batchWillStart(learner: Learner) throws {}\n", " /// A closure which will be called upon the completion of training on a batch.\n", " open func batchDidFinish(learner: Learner) throws {}\n", " /// A closure which will be called when a new gradient has been computed.\n", " open func didProduceNewGradient(learner: Learner) throws {}\n", " /// A closure which will be called upon the completion of an optimizer update.\n", " open func optimizerDidUpdate(learner: Learner) throws {}\n", " ///\n", " /// TODO: learnerDidProduceNewOutput and learnerDidProduceNewLoss need to\n", " /// be differentiable once we can have the loss function inside the Learner\n", " }\n", " \n", " public var delegates: [Delegate] = [] {\n", " didSet { delegates.sort { $0.order < $1.order } }\n", " }\n", " \n", " /// Creates a learner.\n", " ///\n", " /// - Parameters:\n", " /// - data: The databunch used for training and validation.\n", " /// - lossFunction: The loss function.\n", " /// - optimizer: The optimizer used for updating model parameters.\n", " /// - modelInitializer: The closure that produces the model to be trained.\n", " public init(data: Data,\n", " lossFunction: @escaping LossFunction.F,\n", " optimizer: Optimizer,\n", " initializingWith modelInitializer: () -> Model) {\n", " self.data = data\n", " self.optimizer = optimizer\n", " self.lossFunction = LossFunction(lossFunction)\n", " self.model = modelInitializer()\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then let's write the parts of the training loop:" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "// export\n", "extension Learner {\n", " /// Trains the model on the given batch.\n", " ///\n", " /// - Parameter batch: The batch of input data and labels to be trained on.\n", " ///\n", " private func evaluate(onBatch batch: DataBatch) throws {\n", " currentOutput = model.applied(to: currentInput!)\n", " currentLoss = lossFunction.f(currentOutput!, currentTarget!)\n", " }\n", " \n", " private func train(onBatch batch: DataBatch) throws {\n", " let (xb,yb) = (currentInput!,currentTarget!)\n", " (currentLoss, currentGradient) = model.valueWithGradient { model -> Loss in \n", " let y = model.applied(to: xb) \n", " currentOutput = y\n", " return lossFunction.f(y, yb)\n", " }\n", " try delegates.forEach { try $0.didProduceNewGradient(learner: self) }\n", " optimizer.update(&model.allDifferentiableVariables, along: self.currentGradient)\n", " }\n", " \n", " /// Performs a training epoch on a Dataset.\n", " private func train(onDataset ds: FADataset>) throws {\n", " iterCount = ds.count\n", " for batch in ds.ds {\n", " (currentInput, currentTarget) = (batch.xb, batch.yb)\n", " try delegates.forEach { try $0.batchWillStart(learner: self) }\n", " do { if inTrain { try train(onBatch: batch) } else { try evaluate(onBatch: batch) }}\n", " catch LearnerAction.skipBatch {}\n", " try delegates.forEach { try $0.batchDidFinish(learner: self) }\n", " }\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And the whole fit function." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "// export\n", "extension Learner {\n", " /// Starts fitting.\n", " /// - Parameter epochCount: The number of epochs that will be run.\n", " public func fit(_ epochCount: Int) throws {\n", " self.epochCount = epochCount\n", " do {\n", " try delegates.forEach { try $0.trainingWillStart(learner: self) }\n", " for i in 0..(learningRate: 1e-2)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: Int(c))}" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "learner.fit(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Let's add Callbacks!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Extension with convenience methods to add delegates:" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "// export\n", "public extension Learner {\n", " func addDelegate(_ delegate: Learner.Delegate) {\n", " delegates.append(delegate)\n", " }\n", " \n", " func addDelegates(_ delegates: [Learner.Delegate]) {\n", " self.delegates += delegates\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train/eval" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Callback classes are defined as extensions of the Learner." ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "// export\n", "extension Learner {\n", " public class TrainEvalDelegate: Delegate {\n", " public override func trainingWillStart(learner: Learner) {\n", " learner.pctEpochs = 0.0\n", " }\n", "\n", " public override func epochWillStart(learner: Learner) {\n", " Context.local.learningPhase = .training\n", " learner.pctEpochs = Float(learner.currentEpoch)\n", " learner.inTrain = true\n", " learner.currentIter = 0\n", " }\n", " \n", " public override func batchDidFinish(learner: Learner) {\n", " learner.currentIter += 1\n", " if learner.inTrain{ learner.pctEpochs += 1.0 / Float(learner.iterCount) }\n", " }\n", " \n", " public override func validationWillStart(learner: Learner) {\n", " Context.local.learningPhase = .inference\n", " learner.inTrain = false\n", " learner.currentIter = 0\n", " }\n", " }\n", " \n", " public func makeTrainEvalDelegate() -> TrainEvalDelegate { return TrainEvalDelegate() }\n", "}" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "learner.delegates = [learner.makeTrainEvalDelegate()]" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "learner.fit(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### AverageMetric" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "// export\n", "// TODO: make metrics more generic (probably for after the course)\n", "extension Learner {\n", " public class AvgMetric: Delegate {\n", " public let metrics: [(TF,TI) -> TF]\n", " var total: Int = 0\n", " var partials: [TF] = []\n", " \n", " public init(metrics: [(TF, TI) -> TF]){ self.metrics = metrics}\n", " \n", " public override func epochWillStart(learner: Learner) {\n", " total = 0\n", " partials = Array(repeating: Tensor(0), count: metrics.count + 1)\n", " }\n", " \n", " public override func batchDidFinish(learner: Learner) {\n", " if !learner.inTrain{\n", " if let target = learner.currentTarget as? TI{\n", " let bs = target.shape[0]\n", " total += bs\n", " partials[0] += Float(bs) * learner.currentLoss\n", " for i in 1...metrics.count{\n", " partials[i] += Float(bs) * metrics[i-1]((learner.currentOutput as! TF), target)\n", " }\n", " }\n", " }\n", " }\n", " \n", " public override func epochDidFinish(learner: Learner) {\n", " for i in 0...metrics.count {partials[i] = partials[i] / Float(total)}\n", " print(\"Epoch \\(learner.currentEpoch): \\(partials)\")\n", " }\n", " }\n", " \n", " public func makeAvgMetric(metrics: [(TF,TI) -> TF]) -> AvgMetric{\n", " return AvgMetric(metrics: metrics)\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy])]" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: [0.49971578, 0.8738]\n", "Epoch 1: [0.38159794, 0.8953]\n" ] } ], "source": [ "learner.fit(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Normalization" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "// export\n", "extension Learner {\n", " public class Normalize: Delegate {\n", " public let mean, std: TF\n", " public init(mean: TF, std: TF){ \n", " (self.mean,self.std) = (mean,std)\n", " }\n", " \n", " public override func batchWillStart(learner: Learner) {\n", " learner.currentInput = (learner.currentInput! - mean) / std\n", " }\n", " }\n", " \n", " public func makeNormalize(mean: TF, std: TF) -> Normalize{\n", " return Normalize(mean: mean, std: std)\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - .0 : 0.13066047\n", " - .1 : 0.3081079\n" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(xTrain.mean(), xTrain.standardDeviation())" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "// export\n", "public let mnistStats = (mean: TF(0.13066047), std: TF(0.3081079))" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy]),\n", " learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)]" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: [0.2994869, 0.9128]\n", "Epoch 1: [0.2483886, 0.9272]\n" ] } ], "source": [ "learner.fit(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Export" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "notebookToScript(fname: (Path.cwd / \"04_callbacks.ipynb\").string)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Swift", "language": "swift", "name": "swift" }, "language_info": { "file_extension": ".swift", "mimetype": "text/x-swift", "name": "swift", "version": "" } }, "nbformat": 4, "nbformat_minor": 1 }