{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Installing packages:\n", "\t.package(url: \"https://github.com/mxcl/Path.swift\", from: \"0.16.1\")\n", "\t\tPath\n", "\t.package(url: \"https://github.com/JustHTTP/Just\", from: \"0.7.1\")\n", "\t\tJust\n", "With SwiftPM flags: []\n", "Working in: /tmp/tmpaxpvuhom\n", "Fetching https://github.com/mxcl/Path.swift\n", "Fetching https://github.com/JustHTTP/Just\n", "Completed resolution in 1.06s\n", "Cloning https://github.com/mxcl/Path.swift\n", "Resolving https://github.com/mxcl/Path.swift at 0.16.2\n", "Cloning https://github.com/JustHTTP/Just\n", "Resolving https://github.com/JustHTTP/Just at 0.7.1\n", "Compile Swift Module 'Just' (1 sources)\n", "Compile Swift Module 'Path' (9 sources)\n", "Compile Swift Module 'jupyterInstalledPackages' (1 sources)\n", "Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so\n", "Initializing Swift...\n", "Loading library...\n", "Installation complete!\n" ] } ], "source": [ "%install '.package(url: \"https://github.com/mxcl/Path.swift\", from: \"0.16.1\")' Path\n", "%install '.package(url: \"https://github.com/JustHTTP/Just\", from: \"0.7.1\")' Just" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Getting the MNIST dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "import Foundation\n", "import Just\n", "import Path" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "public func shellCommand(_ launchPath: String, _ arguments: [String]) -> String?\n", "{\n", " let task = Process()\n", " task.executableURL = URL.init(fileURLWithPath:launchPath)\n", " task.arguments = arguments\n", "\n", " let pipe = Pipe()\n", " task.standardOutput = pipe\n", " do {try task.run()} catch {print(\"Unexpected error: \\(error).\")}\n", "\n", " let data = pipe.fileHandleForReading.readDataToEndOfFile()\n", " let output = String(data: data, encoding: String.Encoding.utf8)\n", "\n", " return output\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 1.1M\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 11K Apr 9 13:30 00_load_data.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 14K Apr 9 13:30 01_matmul.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 17K Apr 9 13:30 02_fully_connected.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 12K Apr 9 13:30 02a_why_sqrt5.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 19K Apr 9 13:30 03_minibatch_training.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 20K Apr 9 13:30 04_callbacks.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 15K Apr 9 13:30 04_callbacks_alt.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 18K Apr 9 13:30 04_callbacks_bug.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 15K Apr 9 13:30 05_anneal.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 4.3K Apr 5 13:57 05b_early_stopping.ipynb\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 9 13:30 FastaiNotebook_00_load_data\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 9 13:30 FastaiNotebook_01_matmul\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 9 13:30 FastaiNotebook_02_fully_connected\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 9 13:30 FastaiNotebook_02a_why_sqrt5\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 9 13:30 FastaiNotebook_03_minibatch_training\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Apr 9 13:30 FastaiNotebook_04_callbacks\r\n", "drwxrwxr-x 3 ubuntu ubuntu 4.0K Mar 21 20:30 FastaiNotebooks\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 3.0K Apr 9 13:30 create_packages.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 1.2K Apr 9 13:30 create_packages.py\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 891K Apr 9 13:30 image_dataset_ops.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 13K Apr 9 13:30 learner.ipynb\r\n", "-rw-rw-r-- 1 ubuntu ubuntu 4.9K Apr 9 13:30 swift_nn.ipynb\r\n", "\r\n" ] } ], "source": [ "if let res = shellCommand(\"/bin/ls\", [\"-lh\"]){print(res)}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "public func downloadFile(_ url: String, dest: String?=nil, force: Bool=false){\n", " let dest_name = (dest ?? (Path.cwd/url.split(separator: \"/\").last!).string)\n", " let url_dest = URL.init(fileURLWithPath: (dest ?? (Path.cwd/url.split(separator: \"/\").last!).string))\n", " if (force || !Path(dest_name)!.exists){\n", " print(\"Downloading \\(url)...\")\n", " if let cts = Just.get(url).content{\n", " do {try cts.write(to: URL.init(fileURLWithPath:dest_name))}\n", " catch {print(\"Can't write to \\(url_dest).\\n\\(error)\")}\n", " } else {print(\"Can't reach \\(url)\")}\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "import TensorFlow" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "protocol ConvertableFromByte {\n", " init(_ d:UInt8)\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "extension Float : ConvertableFromByte{}\n", "extension Int32 : ConvertableFromByte{}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "func readData(_ fn:String, _ skip:Int) -> Tensor {\n", " let data = try! Data.init(contentsOf: URL.init(fileURLWithPath: fn)).dropFirst(skip)\n", " return Tensor(data.map(T.init))\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "func loadMNIST(training: Bool, labels: Bool, path: Path, flat: Bool) -> Tensor {\n", " let split = training ? \"train\" : \"t10k\"\n", " let kind = labels ? \"labels\" : \"images\"\n", " let batch = training ? Int32(60000) : Int32(10000)\n", " let shape: TensorShape = labels ? [batch] : (flat ? [batch, 784] : [batch, 28, 28])\n", " let dropK = labels ? 8 : 16\n", " let baseUrl = \"https://storage.googleapis.com/cvdf-datasets/mnist/\"\n", " let fname = split + \"-\" + kind + \"-idx\\(labels ? 1 : 3)-ubyte\"\n", " let file = path/fname\n", " if !file.exists {\n", " downloadFile(\"\\(baseUrl)\\(fname).gz\", dest:(path/\"\\(fname).gz\").string)\n", " _ = shellCommand(\"/bin/gunzip\", [\"-fq\", (path/\"\\(fname).gz\").string])\n", " }\n", " let data = try! Data.init(contentsOf: URL.init(fileURLWithPath: file.string)).dropFirst(dropK)\n", " if labels { return Tensor(data.map(T.init)) }\n", " else { return Tensor(data.map(T.init)).reshaped(to: shape)}\n", "}\n", "\n", "public func loadMNIST(path:Path, flat:Bool = false) -> (Tensor, Tensor, Tensor, Tensor) {\n", " try! path.mkdir(.p)\n", " return (\n", " loadMNIST(training: true, labels: false, path: path, flat: flat) / 255.0,\n", " loadMNIST(training: true, labels: true, path: path, flat: flat),\n", " loadMNIST(training: false, labels: false, path: path, flat: flat) / 255.0,\n", " loadMNIST(training: false, labels: true, path: path, flat: flat)\n", " )\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export\n", "public let mnistPath = Path.home/\".fastai\"/\"data\"/\"mnist_tst\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let (xTrain, yTrain, xValid, yValid) = loadMNIST(path: mnistPath)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "xTrain.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "let (xTrain, yTrain, xValid, yValid) = loadMNIST(path: mnistPath, flat: true)\n", "xTrain.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Timing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export \n", "import Dispatch\n", "public func time(_ function: () -> ()) {\n", " let start = DispatchTime.now()\n", " function()\n", " let end = DispatchTime.now()\n", " let nanoseconds = Double(end.uptimeNanoseconds - start.uptimeNanoseconds)\n", " let milliseconds = nanoseconds / 1e6\n", " print(\"\\(milliseconds) ms\")\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "time {var valImgs: Tensor = loadMNIST(training:false, labels: false, path: mnistPath, flat:false)}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "//export \n", "public func time(repeating: Int, _ function: () -> ()) {\n", " function()\n", " var times:[Double] = []\n", " for _ in 1...repeating{\n", " let start = DispatchTime.now()\n", " function()\n", " let end = DispatchTime.now()\n", " let nanoseconds = Double(end.uptimeNanoseconds - start.uptimeNanoseconds)\n", " let milliseconds = nanoseconds / 1e6\n", " times.append(milliseconds)\n", " }\n", " print(\"\\(times.reduce(0.0, +)/Double(times.count)) ms\")\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "time(repeating:10) {var valImgs: Tensor = loadMNIST(training:false, labels: false, path: mnistPath, flat:false)}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Export" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "public func notebookToScript(fname: String){\n", " let url_fname = URL.init(fileURLWithPath: fname)\n", " let last = fname.lastPathComponent\n", " let out_fname = (url_fname.deletingLastPathComponent().appendingPathComponent(\"FastaiNotebooks\", isDirectory: true)\n", " .appendingPathComponent(\"Sources\", isDirectory: true)\n", " .appendingPathComponent(\"FastaiNotebooks\", isDirectory: true).appendingPathComponent(last)\n", " .deletingPathExtension().appendingPathExtension(\"swift\"))\n", " do{\n", " let data = try Data.init(contentsOf: url_fname)\n", " let jsonData = try! JSONSerialization.jsonObject(with: data, options: .allowFragments) as! [String: Any]\n", " let cells = jsonData[\"cells\"] as! [[String:Any]]\n", " var module = \"\"\"\n", "/*\n", "THIS FILE WAS AUTOGENERATED! DO NOT EDIT!\n", "file to edit: \\(fname.lastPathComponent)\n", "\n", "*/\n", " \n", "\"\"\"\n", " for cell in cells{\n", " if let source = cell[\"source\"] as? [String]{\n", " if source.isEmpty {continue}\n", " if source[0].range(of: #\"^\\s*//\\s*export\\s*$\"#, options: .regularExpression) != nil{\n", " module.append(\"\\n\" + source[1...].joined() + \"\\n\")\n", " }\n", " }\n", " }\n", " try? module.write(to: out_fname, atomically: false, encoding: .utf8)\n", " } catch {print(\"Can't read the content of \\(fname)\")}\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "// export\n", "public func exportNotebooks(_ path: Path){\n", " for entry in try! path.ls(){\n", " if entry.kind == Entry.Kind.file{\n", " if entry.path.basename().range(of: #\"^\\d*_.*ipynb$\"#, options: .regularExpression) != nil { \n", " print(\"Converting \\(entry.path.basename())\")\n", " notebookToScript(fname: entry.path.basename())\n", " }\n", " }\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "notebookToScript(fname: (Path.cwd / \"00_load_data.ipynb\").string)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Swift", "language": "swift", "name": "swift" } }, "nbformat": 4, "nbformat_minor": 2 }