{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Installing packages:\n", "\t.package(url: \"https://github.com/mxcl/Path.swift\", from: \"0.16.1\")\n", "\t\tPath\n", "\t.package(url: \"https://github.com/JustHTTP/Just\", from: \"0.7.1\")\n", "\t\tJust\n", "With SwiftPM flags: []\n", "Working in: /tmp/tmpinjts7mc\n", "Fetching https://github.com/mxcl/Path.swift\n", "Fetching https://github.com/JustHTTP/Just\n", "Completed resolution in 0.99s\n", "Cloning https://github.com/JustHTTP/Just\n", "Resolving https://github.com/JustHTTP/Just at 0.7.1\n", "Cloning https://github.com/mxcl/Path.swift\n", "Resolving https://github.com/mxcl/Path.swift at 0.16.2\n", "Compile Swift Module 'Just' (1 sources)\n", "Compile Swift Module 'Path' (9 sources)\n", "Compile Swift Module 'jupyterInstalledPackages' (1 sources)\n", "Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so\n", "Initializing Swift...\n", "Loading library...\n", "Installation complete!\n" ] } ], "source": [ "%install '.package(url: \"https://github.com/mxcl/Path.swift\", from: \"0.16.1\")' Path\n", "%install '.package(url: \"https://github.com/JustHTTP/Just\", from: \"0.7.1\")' Just" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Getting the MNIST dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "import Foundation\n", "import Just\n", "import Path" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "public func shellCommand(_ launchPath: String, _ arguments: [String]) -> String?\n", "{\n", " let task = Process()\n", " task.executableURL = URL.init(fileURLWithPath:launchPath)\n", " task.arguments = arguments\n", "\n", " let pipe = Pipe()\n", " task.standardOutput = pipe\n", " do {try task.run()} catch {print(\"Unexpected error: \\(error).\")}\n", "\n", " let data = pipe.fileHandleForReading.readDataToEndOfFile()\n", " let output = String(data: data, encoding: String.Encoding.utf8)\n", "\n", " return output\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 11M\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 11K Apr 17 11:08 00_load_data.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 25K Apr 17 11:08 01_matmul.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 12K Apr 17 11:08 01a_fastai_layers.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 32K Apr 16 13:02 01b_sequential_layer.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 10K Apr 17 11:08 01c_array_differentiable.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 26K Apr 17 11:08 02_fully_connected.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 18K Apr 15 18:31 02a_why_sqrt5.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 16K Apr 15 18:36 02b_initializing.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 23K Apr 15 21:38 03_minibatch_training.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 24K Apr 17 11:08 04_callbacks.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 75K Apr 17 11:08 05_anneal.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 59K Apr 15 20:04 05b_early_stopping.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 143K Apr 16 13:20 06_cuda.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 31K Apr 16 13:23 07_batchnorm.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 586K Apr 17 11:08 08_data_block.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 6.9K Apr 16 17:35 08a_heterogeneous_dictionary.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 21K Apr 17 11:08 09_optimizer.ipynb\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 10 03:35 FastaiNotebook_00_load_data\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 12 17:38 FastaiNotebook_01_matmul\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 12 17:38 FastaiNotebook_01a_fastai_layers\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 17 11:08 FastaiNotebook_01c_array_differentiable\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 17 11:08 FastaiNotebook_02_fully_connected\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 17 11:08 FastaiNotebook_02a_why_sqrt5\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 17 11:08 FastaiNotebook_03_minibatch_training\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 17 11:08 FastaiNotebook_04_callbacks\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 17 11:08 FastaiNotebook_05_anneal\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 17 11:08 FastaiNotebook_05b_early_stopping\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 17 11:08 FastaiNotebook_06_cuda\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 17 11:08 FastaiNotebook_07_batchnorm\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 16 15:38 FastaiNotebook_08_data_block\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 16 17:09 FastaiNotebook_08a_heterogeneous_dictionary\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 10 03:35 FastaiNotebooks\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 3.0K Apr 10 03:35 create_packages.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 1.2K Apr 10 13:11 create_packages.py\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 52K Apr 11 13:05 image_dataset_ops.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 13K Apr 10 03:35 learner.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 4.9K Apr 10 03:35 swift_nn.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 4.4K Apr 17 11:08 test_array_differentiable.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 9.5M Apr 17 11:10 train-images-idx3-ubyte.gz\r\n", "\r\n" ] } ], "source": [ "if let res = shellCommand(\"/bin/ls\", [\"-lh\"]){print(res)}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "public func downloadFile(_ url: String, dest: String?=nil, force: Bool=false){\n", " let dest_name = (dest ?? (Path.cwd/url.split(separator: \"/\").last!).string)\n", " let url_dest = URL.init(fileURLWithPath: (dest ?? (Path.cwd/url.split(separator: \"/\").last!).string))\n", " if (force || !Path(dest_name)!.exists){\n", " print(\"Downloading \\(url)...\")\n", " if let cts = Just.get(url).content{\n", " do {try cts.write(to: URL.init(fileURLWithPath:dest_name))}\n", " catch {print(\"Can't write to \\(url_dest).\\n\\(error)\")}\n", " } else {print(\"Can't reach \\(url)\")}\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "downloadFile(\"https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "import TensorFlow" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "protocol ConvertableFromByte {\n", " init(_ d:UInt8)\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "extension Float : ConvertableFromByte{}\n", "extension Int : ConvertableFromByte{}\n", "extension Int32 : ConvertableFromByte{}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "func readData(_ fn:String, _ skip:Int) -> Tensor {\n", " let data = try! Data.init(contentsOf: URL.init(fileURLWithPath: fn)).dropFirst(skip)\n", " return Tensor(data.map(T.init))\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "func loadMNIST(training: Bool, labels: Bool, path: Path, flat: Bool) -> Tensor {\n", " let split = training ? \"train\" : \"t10k\"\n", " let kind = labels ? \"labels\" : \"images\"\n", " let batch = training ? 60000 : 10000\n", " let shape: TensorShape = labels ? [batch] : (flat ? [batch, 784] : [batch, 28, 28])\n", " let dropK = labels ? 8 : 16\n", " let baseUrl = \"https://storage.googleapis.com/cvdf-datasets/mnist/\"\n", " let fname = split + \"-\" + kind + \"-idx\\(labels ? 1 : 3)-ubyte\"\n", " let file = path/fname\n", " if !file.exists {\n", " downloadFile(\"\\(baseUrl)\\(fname).gz\", dest:(path/\"\\(fname).gz\").string)\n", " _ = shellCommand(\"/bin/gunzip\", [\"-fq\", (path/\"\\(fname).gz\").string])\n", " }\n", " let data = try! Data.init(contentsOf: URL.init(fileURLWithPath: file.string)).dropFirst(dropK)\n", " if labels { return Tensor(data.map(T.init)) }\n", " else { return Tensor(data.map(T.init)).reshaped(to: shape)}\n", "}\n", "\n", "public func loadMNIST(path:Path, flat:Bool = false) -> (Tensor, Tensor, Tensor, Tensor) {\n", " try! path.mkdir(.p)\n", " return (\n", " loadMNIST(training: true, labels: false, path: path, flat: flat) / 255.0,\n", " loadMNIST(training: true, labels: true, path: path, flat: flat),\n", " loadMNIST(training: false, labels: false, path: path, flat: flat) / 255.0,\n", " loadMNIST(training: false, labels: true, path: path, flat: flat)\n", " )\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "public let mnistPath = Path.home/\".fastai\"/\"data\"/\"mnist_tst\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let (xTrain, yTrain, xValid, yValid) = loadMNIST(path: mnistPath)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ TensorShape\n", " ▿ dimensions : 3 elements\n", " - 0 : 60000\n", " - 1 : 28\n", " - 2 : 28\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xTrain.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "▿ TensorShape\n", " ▿ dimensions : 2 elements\n", " - 0 : 60000\n", " - 1 : 784\n" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "let (xTrain, yTrain, xValid, yValid) = loadMNIST(path: mnistPath, flat: true)\n", "xTrain.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Timing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export \n", "import Dispatch\n", "public func time(_ function: () -> ()) {\n", " let start = DispatchTime.now()\n", " function()\n", " let end = DispatchTime.now()\n", " let nanoseconds = Double(end.uptimeNanoseconds - start.uptimeNanoseconds)\n", " let milliseconds = nanoseconds / 1e6\n", " print(\"\\(milliseconds) ms\")\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "372.649852 ms\r\n" ] } ], "source": [ "time {var valImgs: Tensor = loadMNIST(training:false, labels: false, path: mnistPath, flat:false)}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export \n", "public func time(repeating: Int, _ function: () -> ()) {\n", " function()\n", " var times:[Double] = []\n", " for _ in 1...repeating{\n", " let start = DispatchTime.now()\n", " function()\n", " let end = DispatchTime.now()\n", " let nanoseconds = Double(end.uptimeNanoseconds - start.uptimeNanoseconds)\n", " let milliseconds = nanoseconds / 1e6\n", " times.append(milliseconds)\n", " }\n", " print(\"\\(times.reduce(0.0, +)/Double(times.count)) ms\")\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "274.26179950000005 ms\r\n" ] } ], "source": [ "time(repeating:10) {var valImgs: Tensor = loadMNIST(training:false, labels: false, path: mnistPath, flat:false)}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Export" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "public func notebookToScript(fname: String){\n", " let url_fname = URL.init(fileURLWithPath: fname)\n", " let last = fname.lastPathComponent\n", " let out_fname = (url_fname.deletingLastPathComponent().appendingPathComponent(\"FastaiNotebooks\", isDirectory: true)\n", " .appendingPathComponent(\"Sources\", isDirectory: true)\n", " .appendingPathComponent(\"FastaiNotebooks\", isDirectory: true).appendingPathComponent(last)\n", " .deletingPathExtension().appendingPathExtension(\"swift\"))\n", " do{\n", " let data = try Data.init(contentsOf: url_fname)\n", " let jsonData = try! JSONSerialization.jsonObject(with: data, options: .allowFragments) as! [String: Any]\n", " let cells = jsonData[\"cells\"] as! [[String:Any]]\n", " var module = \"\"\"\n", "/*\n", "THIS FILE WAS AUTOGENERATED! DO NOT EDIT!\n", "file to edit: \\(fname.lastPathComponent)\n", "\n", "*/\n", " \n", "\"\"\"\n", " for cell in cells{\n", " if let source = cell[\"source\"] as? [String]{\n", " if source.isEmpty {continue}\n", " if source[0].range(of: #\"^\\s*//\\s*export\\s*$\"#, options: .regularExpression) != nil{\n", " module.append(\"\\n\" + source[1...].joined() + \"\\n\")\n", " }\n", " }\n", " }\n", " try? module.write(to: out_fname, atomically: false, encoding: .utf8)\n", " } catch {print(\"Can't read the content of \\(fname)\")}\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "public func exportNotebooks(_ path: Path){\n", " for entry in try! path.ls(){\n", " if entry.kind == Entry.Kind.file{\n", " if entry.path.basename().range(of: #\"^\\d*_.*ipynb$\"#, options: .regularExpression) != nil { \n", " print(\"Converting \\(entry.path.basename())\")\n", " notebookToScript(fname: entry.path.basename())\n", " }\n", " }\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "notebookToScript(fname: (Path.cwd / \"00_load_data.ipynb\").string)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Swift", "language": "swift", "name": "swift" } }, "nbformat": 4, "nbformat_minor": 2 }