{ "metadata" : { "kernelspec" : { "display_name" : "Swift", "name" : "swift", "language" : "swift" } }, "cells" : [ { "metadata" : { }, "source" : [ "# Annealing" ], "cell_type" : "markdown" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "%install-location $cwd\/swift-install\n", "%install '.package(path: \"$cwd\/FastaiNotebook_04_callbacks\")' FastaiNotebook_04_callbacks" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "\/\/ export\n", "import Path\n", "import TensorFlow" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "import FastaiNotebook_04_callbacks" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "%include \"EnableIPythonDisplay.swift\"\n", "IPythonDisplay.shell.enable_matplotlib(\"inline\")" ], "metadata" : { }, "cell_type" : "code" }, { "metadata" : { }, "source" : [ "## Load data" ], "cell_type" : "markdown" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "let data = mnistDataBunch(flat: true)" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "let (n,m) = (60000,784)\n", "let c = 10\n", "let nHid = 50" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "func optFunc(_ model: BasicModel) -> SGD {return SGD(for: model, learningRate: 1e-2)}" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit)" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy]),\n", " learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)]" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "\/\/Crashes! See: SR-10436\n", "\/\/learner.delegates = [type(of: learner).TrainEvalDelegate(), type(of: learner).AvgMetric(metrics: [accuracy])]" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "learner.fit(2)" ], "metadata" : { }, "cell_type" : "code" }, { "cell_type" : "markdown", "metadata" : { }, "source" : [ "## Recoder" ] }, { "cell_type" : "markdown", "source" : [ "The Recorder's role is to keep track of the loss and our scheduled learning rate. " ], "metadata" : { } }, { "execution_count" : null, "source" : [ "\/\/ export\n", "import Python\n", "public let np = Python.import(\"numpy\")\n", "public let plt = Python.import(\"matplotlib.pyplot\")" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "\/\/ export\n", "public func plot(_ arr1: [S1], _ arr2: [S2], logScale:Bool = false, xLabel: String=\"\", yLabel: String = \"\") \n", " where S1:PythonConvertible, S2:PythonConvertible{\n", " plt.figure(figsize: [6,4])\n", " let (npArr1, npArr2) = (np.array(arr1), np.array(arr2))\n", " if logScale {plt.xscale(\"log\")} \n", " if !xLabel.isEmpty {plt.xlabel(xLabel)}\n", " if !yLabel.isEmpty {plt.ylabel(yLabel)} \n", " let fig = plt.plot(npArr1, npArr2)\n", " plt.show(fig)\n", "}" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "\/\/ export\n", "extension Learner where Opt.Scalar: PythonConvertible{\n", " public class Recorder: Delegate {\n", " public var losses: [Loss] = []\n", " public var lrs: [Opt.Scalar] = []\n", " \n", " public override func batchDidFinish(learner: Learner) {\n", " if learner.inTrain {\n", " losses.append(learner.currentLoss)\n", " lrs.append(learner.opt.learningRate)\n", " }\n", " }\n", " \n", " public func plotLosses(){\n", " plot(Array(0.. Recorder {\n", " return Recorder()\n", " }\n", "}" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit)" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "metadata" : { }, "cell_type" : "markdown", "source" : [ "Utility optional property to get backour `Recorder` if it was created by a utility function. This doesn't always work properly for unkwnon reasons" ] }, { "execution_count" : null, "outputs" : [ ], "source" : [ "\/\/TODO: Fix\n", "extension Learner where Opt.Scalar: PythonConvertible{\n", " public var recorder: Learner.Recorder? {\n", " for callback in learner.delegates {\n", " if let recorder = callback as? Learner.Recorder { return recorder }\n", " }\n", " return nil\n", " }\n", "}" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy]), \n", " learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std), learner.makeRecorder()]" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "learner.fit(2)" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "learner.recorder!.plotLosses()" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "cell_type" : "markdown", "source" : [ "### Progress bar" ], "metadata" : { } }, { "metadata" : { }, "cell_type" : "markdown", "source" : [ "It's nice to keep track of where we're at in the training with a progress bar." ] }, { "execution_count" : null, "outputs" : [ ], "source" : [ "\/\/ export\n", "import Foundation" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "\/\/ export\n", "func formatTime(_ t: Float) -> String {\n", " let t = Int(t)\n", " let (h,m,s) = (t\/3600, (t\/60)%60, t%60)\n", " return h != 0 ? String(format: \"%02d:%02d:%02d\", h, m, s) : String(format: \"%02d:%02d\", m, s)\n", "}" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "formatTime(78.23)" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "\/\/ export\n", "public struct ProgressBar{\n", " let total: Int\n", " let length: Int = 50\n", " let showEvery: Float = 0.2\n", " let fillChar: Character = \"X\"\n", " public var comment: String = \"\"\n", " private var waitFor: Int = 0\n", " private var startTime: UInt64 = 0\n", " private var lastPrint: UInt64 = 0\n", " private var lastShow: UInt64 = 0\n", " private var estimatedTotal: Float = 0.0\n", " private var bar: String = \"\"\n", " \n", " public init(_ c: Int) { total = c }\n", " \n", " public mutating func update(_ val: Int){\n", " lastShow = DispatchTime.now().uptimeNanoseconds\n", " if val == 0 { startTime = lastShow } \n", " else {\n", " let averageTime = Float(lastShow - startTime) \/ (1e9 * Float(val))\n", " estimatedTotal = Float(total) * averageTime\n", " }\n", " if val == 0 || lastShow - lastPrint >= Int(1e9 * showEvery) { update_bar(val) }\n", " }\n", " \n", " public mutating func update_bar(_ val: Int){\n", " lastPrint = lastShow\n", " let prevLength = bar.count\n", " bar = String(repeating: fillChar, count: (val * length) \/ total)\n", " bar += String(repeating: \"-\", count: length - (val * length) \/ total)\n", " let pct = String(format: \"%.2f\", 100.0 * Float(val)\/Float(total))\n", " let elapsedTime = Float(lastShow - startTime) \/ 1e9\n", " let remaingTime = estimatedTotal - elapsedTime\n", " bar += \" \\(pct)% [\\(val)\/\\(total) \\(formatTime(elapsedTime))<\\(formatTime(remaingTime))\"\n", " bar += comment.isEmpty ? \"]\" : \" \\(comment)]\"\n", " if bar.count < prevLength { bar += String(repeating: \" \", count: prevLength-bar.count) }\n", " print(bar, terminator:\"\\r\")\n", " fflush(stdout)\n", " }\n", " \n", " public func remove(){\n", " print(String(repeating: \" \", count: bar.count), terminator:\"\\r\")\n", " fflush(stdout)\n", " }\n", "}" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "var tst = ProgressBar(100)\n", "for i in 0...100{\n", " tst.update(i)\n", " usleep(50000)\n", "}\n", "tst.remove()" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "\/\/ export\n", "extension Learner {\n", " public class ShowProgress: Delegate {\n", " var pbar: ProgressBar? = nil\n", " var iter: Int = 0\n", " \n", " public override func epochWillStart(learner: Learner) {\n", " pbar = ProgressBar(learner.data.train.count)\n", " }\n", " \n", " public override func validationWillStart(learner: Learner) {\n", " if pbar != nil { pbar!.remove() }\n", " pbar = ProgressBar(learner.data.valid.count)\n", " }\n", " \n", " public override func epochDidFinish(learner: Learner) {\n", " if pbar != nil { pbar!.remove() }\n", " }\n", " \n", " public override func batchWillStart(learner: Learner) {\n", " if learner.currentIter == 0 {pbar!.update(0)}\n", " }\n", " \n", " public override func batchDidFinish(learner: Learner) {\n", " pbar!.update(learner.currentIter)\n", " }\n", " \n", " public override func trainingDidFinish(learner: Learner) {\n", " if pbar != nil { pbar!.remove() }\n", " }\n", " }\n", " \n", " public func makeShowProgress() -> ShowProgress { return ShowProgress() }\n", "}" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit)" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeShowProgress(), \n", " learner.makeAvgMetric(metrics: [accuracy]), learner.makeRecorder(),\n", " learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)]" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "learner.fit(2)" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "cell_type" : "markdown", "source" : [ "### Annealing" ], "metadata" : { } }, { "execution_count" : null, "source" : [ "\/\/ export\n", "\/\/\/ A non-generalized learning rate scheduler\n", "extension Learner where Opt.Scalar: BinaryFloatingPoint {\n", " public class LRScheduler: Delegate {\n", " public override var order: Int { return 1 }\n", " public typealias ScheduleFunc = (Float) -> Float\n", "\n", " \/\/ A learning rate schedule from step to float.\n", " public var scheduler: ScheduleFunc\n", " \n", " public init(scheduler: @escaping (Float) -> Float) {\n", " self.scheduler = scheduler\n", " }\n", " \n", " override public func batchWillStart(learner: Learner) {\n", " learner.opt.learningRate = Opt.Scalar(scheduler(learner.pctEpochs\/Float(learner.epochCount)))\n", " }\n", " }\n", " \n", " public func makeLRScheduler(scheduler: @escaping (Float) -> Float) -> LRScheduler {\n", " return LRScheduler(scheduler: scheduler)\n", " }\n", "}" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "\/\/ export\n", "public func linearSchedule(start: Float, end: Float, pct: Float) -> Float {\n", " return start + pct * (end - start)\n", "}\n", "\n", "public func makeAnnealer(start: Float, end: Float, schedule: @escaping (Float, Float, Float) -> Float) -> (Float) -> Float { \n", " return { pct in return schedule(start, end, pct) }\n", "}" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "let annealer = makeAnnealer(start: 1e-2, end: 0.1, schedule: linearSchedule)\n", "annealer(0.3)" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit)\n", "let recorder = learner.makeRecorder()" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeShowProgress(), \n", " learner.makeAvgMetric(metrics: [accuracy]), recorder,\n", " learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std),\n", " learner.makeLRScheduler(scheduler: annealer)]" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "learner.fit(2)" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "recorder.plotLRs()" ], "metadata" : { }, "cell_type" : "code" }, { "cell_type" : "markdown", "metadata" : { }, "source" : [ "More annealing functions" ] }, { "execution_count" : null, "source" : [ "\/\/ export\n", "public func constantSchedule(start: Float, end: Float, pct: Float) -> Float {\n", " return start\n", "}\n", "\n", "public func cosineSchedule(start: Float, end: Float, pct: Float) -> Float {\n", " return start + (1 + cos(Float.pi*(1-pct))) * (end-start) \/ 2\n", "}\n", "\n", "public func expSchedule(start: Float, end: Float, pct: Float) -> Float {\n", " return start * pow(end \/ start, pct)\n", "}" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "\/\/ export\n", "public func combineSchedules(pcts: [Float], schedules: [(Float) -> Float]) -> ((Float) -> Float){\n", " var cumPcts: [Float] = [0]\n", " for pct in pcts {cumPcts.append(cumPcts.last! + pct)}\n", " func inner(pct: Float) -> Float{\n", " if (pct == 0.0) { return schedules[0](0.0) }\n", " if (pct > 1.0) { return schedules.last!(1.0) }\n", " let i = cumPcts.firstIndex(where: {$0 >= pct})! - 1\n", " let actualPos = (pct-cumPcts[i]) \/ (cumPcts[i+1]-cumPcts[i])\n", " return schedules[i](actualPos)\n", " }\n", " return inner\n", "}" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "let mySchedule = combineSchedules(pcts: [0.3, 0.7], \n", " schedules: [makeAnnealer(start: 0.3, end: 0.6, schedule: cosineSchedule),\n", " makeAnnealer(start: 0.6, end: 0.2, schedule: cosineSchedule)])" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit)\n", "let recorder = learner.makeRecorder()" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeShowProgress(), \n", " learner.makeAvgMetric(metrics: [accuracy]), recorder,\n", " learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std),\n", " learner.makeLRScheduler(scheduler: mySchedule)]" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "learner.fit(2)" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ "recorder.plotLRs()" ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "outputs" : [ ], "source" : [ "\/\/Needs fixing \n", "\/\/learner.recorder!.plotLRs()" ], "metadata" : { }, "cell_type" : "code" }, { "cell_type" : "markdown", "source" : [ "## Export" ], "metadata" : { } }, { "execution_count" : null, "outputs" : [ ], "source" : [ "import NotebookExport\n", "let exporter = NotebookExport(Path.cwd\/\"05_anneal.ipynb\")\n", "print(exporter.export(usingPrefix: \"FastaiNotebook_\"))" ], "metadata" : { }, "cell_type" : "code" }, { "execution_count" : null, "source" : [ ], "outputs" : [ ], "metadata" : { }, "cell_type" : "code" } ], "nbformat" : 4, "nbformat_minor" : 1 }