{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Callbacks" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Installing packages:\n", "\t.package(path: \"/home/ubuntu/fastai_docs/dev_swift/FastaiNotebook_03_minibatch_training\")\n", "\t\tFastaiNotebook_03_minibatch_training\n", "With SwiftPM flags: []\n", "Working in: /tmp/tmpaz6m2tdz\n", "Fetching https://github.com/mxcl/Path.swift\n", "Fetching https://github.com/JustHTTP/Just\n", "Completed resolution in 2.39s\n", "Cloning https://github.com/JustHTTP/Just\n", "Resolving https://github.com/JustHTTP/Just at 0.7.1\n", "Cloning https://github.com/mxcl/Path.swift\n", "Resolving https://github.com/mxcl/Path.swift at 0.16.2\n", "Compile Swift Module 'Just' (1 sources)\n", "Compile Swift Module 'Path' (9 sources)\n", "Compile Swift Module 'FastaiNotebook_03_minibatch_training' (6 sources)\n", "Compile Swift Module 'jupyterInstalledPackages' (1 sources)\n", "Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so\n", "Initializing Swift...\n", "Loading library...\n", "Installation complete!\n" ] } ], "source": [ "%install '.package(path: \"$cwd/FastaiNotebook_03_minibatch_training\")' FastaiNotebook_03_minibatch_training" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import FastaiNotebook_03_minibatch_training" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "import Path\n", "import TensorFlow" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "var (xTrain,yTrain,xValid,yValid) = loadMNIST(path: mnistPath, flat: true)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "60000 784 10\r\n" ] } ], "source": [ "let (n,m) = (Int(xTrain.shape[0]),Int(xTrain.shape[1]))\n", "let c = yTrain.max()+1\n", "print(n,m,c)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Those can't be used to define a model cause they're not Ints though..." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let (n,m) = (60000,784)\n", "let c = 10\n", "let nHid = 50" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "public struct BasicModel: Layer {\n", " public var layer1: Dense\n", " public var layer2: Dense\n", " \n", " public init(nIn: Int, nHid: Int, nOut: Int){\n", " layer1 = Dense(inputSize: nIn, outputSize: nHid, activation: relu)\n", " layer2 = Dense(inputSize: nHid, outputSize: nOut)\n", " }\n", " \n", " @differentiable\n", " public func applied(to input: Tensor, in context: Context) -> Tensor {\n", " return input.sequenced(in: context, through: layer1, layer2)\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "var model = BasicModel(nIn: m, nHid: nHid, nOut: c)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "public struct DataBunch where Element: TensorGroup{\n", " private var _train: Dataset\n", " private var _valid: Dataset\n", " public var shuffleTrain: Bool = true\n", " public var shuffleValid: Bool = false\n", " public var batchSize: Int = 64 \n", " public var train: Dataset { return processDs(_train, shuffleTrain) }\n", " public var valid: Dataset { return processDs(_valid, shuffleValid) }\n", " \n", " private func processDs(_ ds: Dataset, _ shuffle: Bool) -> Dataset{\n", " if !shuffle { return ds.batched(Int64(batchSize))}\n", " let count = Int64(ds.count(where: {_ in true}))\n", " return ds.batched(Int64(batchSize)).shuffled(sampleCount: count, randomSeed: Int64(random()))\n", " }\n", " \n", " public init(train: Dataset, valid: Dataset, batchSize: Int = 64) {\n", " (self._train, self._valid, self.batchSize) = (train, valid, batchSize)\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "public func mnistDataBunch(path: Path = mnistPath, flat: Bool = false, bs: Int = 64\n", " ) -> DataBunch, Tensor>>{\n", " let (xTrain,yTrain,xValid,yValid) = loadMNIST(path: path, flat: flat)\n", " return DataBunch(train: Dataset(elements:DataBatch(xb:xTrain, yb:yTrain)), \n", " valid: Dataset(elements:DataBatch(xb:xValid, yb:yValid)),\n", " batchSize: bs)\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let data = mnistDataBunch(flat: true)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Shuffle test" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[8, 8, 1, 7, 9, 8, 8, 1, 2, 0, 4, 0, 9, 6, 9, 7, 0, 7, 4, 5, 4, 0, 3, 6, 9, 1, 7, 1, 9, 2, 3, 5, 5, 0, 9, 7, 0, 0, 4, 4, 1, 4, 5, 1, 0, 8, 3, 6, 4, 0, 8, 0, 1, 0, 7, 3, 2, 7, 6, 1, 7, 6, 7, 4]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "var tst = data.train\n", "var firstBatch: DataBatch, Tensor>? = nil\n", "for batch in tst{\n", " firstBatch = batch\n", " break\n", "}\n", "firstBatch!.yb" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[8, 0, 9, 7, 2, 5, 2, 8, 8, 8, 7, 3, 3, 7, 3, 8, 8, 9, 8, 2, 6, 5, 2, 3, 8, 1, 9, 7, 9, 3, 1, 9, 6, 1, 9, 9, 5, 9, 6, 6, 7, 0, 4, 9, 4, 4, 6, 3, 0, 7, 4, 0, 0, 4, 0, 1, 6, 4, 5, 3, 3, 5, 3, 4]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "var tst = data.train\n", "var firstBatch: DataBatch, Tensor>? = nil\n", "for batch in tst{\n", " firstBatch = batch\n", " break\n", "}\n", "firstBatch!.yb" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Learner" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "public enum LearnerAction: Error {\n", " case skipEpoch\n", " case skipBatch\n", " case stop\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Basic class" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "/// A model learner, responsible for initializing and training a model on a given dataset.\n", "public final class Learner\n", " where Opt.Scalar: Differentiable,\n", " // Constrain model input to Tensor, to work around\n", " // https://forums.fast.ai/t/fix-ad-crash-in-learner/42970.\n", " Opt.Model.Input == Tensor\n", "{\n", " // Common type aliases.\n", " public typealias Input = Model.Input\n", " public typealias Data = DataBunch>\n", " public typealias Loss = Tensor\n", " public typealias Optimizer = Opt\n", " public typealias Model = Optimizer.Model\n", " public typealias Variables = Model.AllDifferentiableVariables\n", " public typealias EventHandler = (Learner) throws -> Void\n", " \n", " /// A wrapper class to hold the loss function, to work around\n", " // https://forums.fast.ai/t/fix-ad-crash-in-learner/42970.\n", " public final class LossFunction {\n", " public typealias F = @differentiable (Model.Output, @nondiff Label) -> Loss\n", " public var f: F\n", " init(_ f: @escaping F) { self.f = f }\n", " }\n", " \n", " /// The dataset on which the model will be trained.\n", " public var data: Data\n", " /// The optimizer used for updating model parameters along gradient vectors.\n", " public var optimizer: Optimizer\n", " /// The function that computes a loss value when given a prediction and a label.\n", " public var lossFunction: LossFunction\n", " /// The model being trained.\n", " public var model: Model\n", " \n", " //Is there a better way to initialize those to not make them Optionals?\n", " public var currentInput: Input? = nil\n", " public var currentTarget: Label? = nil\n", " public var currentOutput: Model.Output? = nil\n", " \n", " /// The number of total epochs.\n", " public private(set) var epochCount: Int = .zero\n", " /// The current epoch.\n", " public private(set) var currentEpoch: Int = .zero\n", " /// The current gradient.\n", " public private(set) var currentGradient: Model.CotangentVector = .zero\n", " /// The current loss.\n", " public private(set) var currentLoss: Loss = .zero\n", " /// In training mode or not\n", " public private(set) var inTrain: Bool = false\n", " /// The current epoch + iteration, float between 0.0 and epochCount\n", " public private(set) var pctEpochs: Float = 0.0\n", " /// The current iteration\n", " public private(set) var currentIter: Int = 0\n", " /// The number of iterations in the current dataset\n", " public private(set) var iterCount: Int = 0\n", " \n", " open class Delegate {\n", " open var order: Int { return 0 }\n", " public init () {}\n", " \n", " open func trainingWillStart(learner: Learner) throws {}\n", " /// The completion of model training.\n", " open func trainingDidFinish(learner: Learner) throws {}\n", " /// A closure which will be called upon the start of an epoch.\n", " open func epochWillStart(learner: Learner) throws {}\n", " /// A closure which will be called upon the completion of an epoch.\n", " open func epochDidFinish(learner: Learner) throws {}\n", " /// A closure which will be called upon the start of model validation.\n", " open func validationWillStart(learner: Learner) throws {}\n", " /// A closure which will be called upon the start of training on a batch.\n", " open func batchWillStart(learner: Learner) throws {}\n", " /// A closure which will be called upon the completion of training on a batch.\n", " open func batchDidFinish(learner: Learner) throws {}\n", " /// A closure which will be called when a new gradient has been computed.\n", " open func didProduceNewGradient(learner: Learner) throws {}\n", " /// A closure which will be called upon the completion of an optimizer update.\n", " open func optimizerDidUpdate(learner: Learner) throws {}\n", " ///\n", " /// TODO: learnerDidProduceNewOutput and learnerDidProduceNewLoss need to\n", " /// be differentiable once we can have the loss function inside the Learner\n", " }\n", " \n", " public var delegates: [Delegate] = [] {\n", " didSet { delegates.sort { $0.order < $1.order } }\n", " }\n", " \n", " /// The context used for layer applications.\n", " public private(set) var context = Context(learningPhase: .training)\n", "\n", " /// Creates a learner.\n", " ///\n", " /// - Parameters:\n", " /// - dataset: The dataset which will be trained on.\n", " /// - lossFunction: The loss function.\n", " /// - optimizer: The optimizer used for updating model parameters along\n", " /// gradient vectors.\n", " /// - modelInitializer: The closure that produces an model to be trained.\n", " ///\n", " public init(data: Data,\n", " lossFunction: @escaping LossFunction.F,\n", " optimizer: Optimizer,\n", " initializingWith modelInitializer: () -> Model) {\n", " self.data = data\n", " self.optimizer = optimizer\n", " self.lossFunction = LossFunction(lossFunction)\n", " self.model = modelInitializer()\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then let's write the parts of the training loop:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "extension Learner {\n", " /// Trains the model on the given batch.\n", " ///\n", " /// - Parameter batch: The batch of input data and labels to be trained on.\n", " ///\n", " private func evaluate(onBatch batch: DataBatch) throws {\n", " currentOutput = model.applied(to: currentInput!, in: context)\n", " currentLoss = lossFunction.f(currentOutput!, currentTarget!)\n", " }\n", " \n", " private func train(onBatch batch: DataBatch) throws {\n", " let (xb,yb) = (currentInput!,currentTarget!)\n", " (currentLoss, currentGradient) = model.valueWithGradient { model -> Loss in \n", " let y = model.applied(to: xb, in: context) \n", " currentOutput = y\n", " return lossFunction.f(y, yb)\n", " }\n", " try delegates.forEach { try $0.didProduceNewGradient(learner: self) }\n", " optimizer.update(&model.allDifferentiableVariables, along: self.currentGradient)\n", " }\n", " \n", " /// Performs a training epoch on a Dataset.\n", " private func train(onDataset ds: Dataset>) throws {\n", " iterCount = ds.count(where: {_ in true})\n", " for batch in ds {\n", " (currentInput, currentTarget) = (batch.xb, batch.yb)\n", " try delegates.forEach { try $0.batchWillStart(learner: self) }\n", " do { if inTrain { try train(onBatch: batch) } else { try evaluate(onBatch: batch) }}\n", " catch LearnerAction.skipBatch {}\n", " try delegates.forEach { try $0.batchDidFinish(learner: self) }\n", " }\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And the whole fit function." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "extension Learner {\n", " /// Starts fitting.\n", " /// - Parameter epochCount: The number of epochs that will be run.\n", " public func fit(_ epochCount: Int) throws {\n", " self.epochCount = epochCount\n", " do {\n", " try delegates.forEach { try $0.trainingWillStart(learner: self) }\n", " for i in 0..(learningRate: 1e-2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learner.fit(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Let's add Callbacks!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train/eval" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Callback classes are defined as extensions of the Learner." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "extension Learner {\n", " public class TrainEvalDelegate: Delegate {\n", " public override func trainingWillStart(learner: Learner) {\n", " learner.pctEpochs = 0.0\n", " }\n", "\n", " public override func epochWillStart(learner: Learner) {\n", " learner.pctEpochs = Float(learner.currentEpoch)\n", " learner.context = Context(learningPhase: .training)\n", " learner.inTrain = true\n", " learner.currentIter = 0\n", " }\n", " \n", " public override func batchDidFinish(learner: Learner) {\n", " learner.currentIter += 1\n", " if learner.inTrain{\n", " learner.pctEpochs += 1.0 / Float(learner.iterCount)\n", " }\n", " }\n", " \n", " public override func validationWillStart(learner: Learner) {\n", " learner.context = Context(learningPhase: .inference)\n", " learner.inTrain = false\n", " learner.currentIter = 0\n", " }\n", " }\n", " \n", " public func makeTrainEvalDelegate() -> TrainEvalDelegate { return TrainEvalDelegate() }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learner.delegates = [learner.makeTrainEvalDelegate()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learner.fit(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### AverageMetric" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "// TODO: make metrics more generic (probably for after the course)\n", "extension Learner {\n", " public class AvgMetric: Delegate {\n", " public let metrics: [(Tensor, Tensor) -> Tensor]\n", " var total: Int = 0\n", " var partials: [Tensor] = []\n", " \n", " public init(metrics: [(Tensor, Tensor) -> Tensor]){ self.metrics = metrics}\n", " \n", " public override func epochWillStart(learner: Learner) {\n", " total = 0\n", " partials = Array(repeating: Tensor(0), count: metrics.count + 1)\n", " }\n", " \n", " public override func batchDidFinish(learner: Learner) {\n", " if !learner.inTrain{\n", " if let target = learner.currentTarget as? Tensor{\n", " let bs = target.shape[0]\n", " total += Int(bs)\n", " partials[0] += Float(bs) * learner.currentLoss\n", " for i in 1...metrics.count{\n", " partials[i] += Float(bs) * metrics[i-1]((learner.currentOutput as! Tensor), target)\n", " }\n", " }\n", " }\n", " }\n", " \n", " public override func epochDidFinish(learner: Learner) {\n", " for i in 0...metrics.count {partials[i] = partials[i] / Float(total)}\n", " print(\"Epoch \\(learner.currentEpoch): \\(partials)\")\n", " }\n", " }\n", " \n", " public func makeAvgMetric(metrics: [(Tensor, Tensor) -> Tensor]) -> AvgMetric{\n", " return AvgMetric(metrics: metrics)\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy])]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: [0.48076352, 0.8779]\n", "Epoch 1: [0.36723757, 0.8996]\n" ] } ], "source": [ "learner.fit(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Normalization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "// TODO: make metrics more generic (probably for after the course)\n", "extension Learner {\n", " public class Normalize: Delegate {\n", " public let mean, std: Tensor\n", " public init(mean: Tensor, std: Tensor){ \n", " (self.mean,self.std) = (mean,std)\n", " }\n", " \n", " public override func batchWillStart(learner: Learner) {\n", " learner.currentInput = (learner.currentInput! - mean) / std\n", " }\n", " }\n", " \n", " public func makeNormalize(mean: Tensor, std: Tensor) -> Normalize{\n", " return Normalize(mean: mean, std: std)\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ 2 elements\n", " - .0 : 0.13066047\n", " - .1 : [[0.3081079]]\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(xTrain.mean(), xTrain.standardDeviation())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "public let mnistStats = (mean: Tensor(0.13066047), std: Tensor(0.3081079))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy]),\n", " learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: [0.30538177, 0.9112]\n", "Epoch 1: [0.2491324, 0.9276]\n" ] } ], "source": [ "learner.fit(2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Export" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "notebookToScript(fname: (Path.cwd / \"04_callbacks.ipynb\").string)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Swift", "language": "swift", "name": "swift" } }, "nbformat": 4, "nbformat_minor": 1 }