{ "metadata": { "name": "", "signature": "sha256:d26fd98f44af3cfad61a4510bd1b24f38310d0d2ab3d252c12cf3013bd7a8054" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": { "internals": { "slide_type": "subslide" }, "slideshow": { "slide_type": "slide" } }, "source": [ "Convert Input Files\n", "====================" ] }, { "cell_type": "markdown", "metadata": { "internals": {}, "slideshow": { "slide_type": "-" } }, "source": [ "It's much faster to load the training and test data from NumPy native files. Load up the CSV files and dump them back out as compressed `.npz` files." ] }, { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import print_function\n", "import numpy as np" ], "language": "python", "metadata": { "internals": {}, "slideshow": { "slide_type": "-" } }, "outputs": [], "prompt_number": 1 }, { "cell_type": "markdown", "metadata": { "internals": {}, "slideshow": { "slide_type": "-" } }, "source": [ "Load raw CSVs\n", "------------------" ] }, { "cell_type": "code", "collapsed": false, "input": [ "X_train = np.genfromtxt(\"X_train_public.csv\", delimiter=',')\n", "Y_train = np.genfromtxt(\"Y_train_public.csv\", delimiter=',')\n", "X_test = np.genfromtxt(\"X_test_public.csv\", delimiter=',')\n", "Y_test = np.genfromtxt(\"Y_test_public.csv\", delimiter=',')\n", "X_comp = np.genfromtxt(\"X_test_private.csv\", delimiter=',')" ], "language": "python", "metadata": { "internals": { "slide_helper": "subslide_end" }, "slide_helper": "slide_end", "slideshow": { "slide_type": "-" } }, "outputs": [], "prompt_number": 2 }, { "cell_type": "markdown", "metadata": { "internals": { "slide_type": "subslide" }, "slideshow": { "slide_type": "slide" } }, "source": [ "Check Input Ranges\n", "--------" ] }, { "cell_type": "code", "collapsed": false, "input": [ "for arr in [X_train, Y_train, X_test, Y_test, X_comp]:\n", " print(arr.min(), arr.max())" ], "language": "python", "metadata": { "internals": { "slide_helper": "subslide_end" }, "slide_helper": "slide_end", "slideshow": { "slide_type": "-" } }, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "-32768.0 32767.0\n", "0.0 1.0\n", "-32768.0" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 32767.0\n", "0.0 1.0\n", "-32768.0" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 32767.0\n" ] } ], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": { "internals": { "slide_type": "subslide" }, "slideshow": { "slide_type": "slide" } }, "source": [ "Convert to int for Efficiency\n", "-----------" ] }, { "cell_type": "code", "collapsed": false, "input": [ "X_train = np.int16(X_train)\n", "Y_train = np.int16(Y_train)\n", "X_test = np.int16(X_test)\n", "Y_test = np.int16(Y_test)\n", "X_comp = np.int16(X_comp)" ], "language": "python", "metadata": { "internals": {}, "slideshow": { "slide_type": "-" } }, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "for arr in [X_train, Y_train, X_test, Y_test, X_comp]:\n", " print(arr.min(), arr.max())" ], "language": "python", "metadata": { "internals": { "slide_helper": "subslide_end" }, "slide_helper": "slide_end", "slideshow": { "slide_type": "-" } }, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "-32768 32767\n", "0 1\n", "-32768" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 32767\n", "0 1\n", "-32768" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 32767\n" ] } ], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": { "internals": { "slide_type": "subslide" }, "slideshow": { "slide_type": "slide" } }, "source": [ "Save to Binary Compressed File\n", "---------" ] }, { "cell_type": "code", "collapsed": false, "input": [ "np.savez_compressed(\n", " \"data_files.npz\",\n", " X_train=X_train,\n", " Y_train=Y_train,\n", " X_test=X_test,\n", " Y_test=Y_test,\n", " X_comp=X_comp)" ], "language": "python", "metadata": { "internals": { "slide_helper": "subslide_end" }, "slide_helper": "slide_end", "slideshow": { "slide_type": "-" } }, "outputs": [], "prompt_number": 6 } ], "metadata": {} } ] }