{ "cells": [ { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Advanced NumPy" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "np.random.seed(12345)\n", "import matplotlib.pyplot as plt\n", "plt.rc('figure', figsize=(10, 6))\n", "PREVIOUS_MAX_ROWS = pd.options.display.max_rows\n", "pd.options.display.max_rows = 20\n", "np.set_printoptions(precision=4, suppress=True)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## ndarray Object Internals" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "np.ones((10, 5)).shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "np.ones((3, 4, 5), dtype=np.float64).strides" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### NumPy dtype Hierarchy" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ints = np.ones(10, dtype=np.uint16)\n", "floats = np.ones(10, dtype=np.float32)\n", "np.issubdtype(ints.dtype, np.integer)\n", "np.issubdtype(floats.dtype, np.floating)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "np.float64.mro()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "np.issubdtype(ints.dtype, np.number)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Advanced Array Manipulation" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Reshaping Arrays" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(8)\n", "arr\n", "arr.reshape((4, 2))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr.reshape((4, 2)).reshape((2, 4))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(15)\n", "arr.reshape((5, -1))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "other_arr = np.ones((3, 5))\n", "other_arr.shape\n", "arr.reshape(other_arr.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(15).reshape((5, 3))\n", "arr\n", "arr.ravel()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr.flatten()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### C Versus Fortran Order" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(12).reshape((3, 4))\n", "arr\n", "arr.ravel()\n", "arr.ravel('F')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Concatenating and Splitting Arrays" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr1 = np.array([[1, 2, 3], [4, 5, 6]])\n", "arr2 = np.array([[7, 8, 9], [10, 11, 12]])\n", "np.concatenate([arr1, arr2], axis=0)\n", "np.concatenate([arr1, arr2], axis=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "np.vstack((arr1, arr2))\n", "np.hstack((arr1, arr2))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.random.randn(5, 2)\n", "arr\n", "first, second, third = np.split(arr, [1, 3])\n", "first\n", "second\n", "third" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Stacking helpers: r_ and c_" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(6)\n", "arr1 = arr.reshape((3, 2))\n", "arr2 = np.random.randn(3, 2)\n", "np.r_[arr1, arr2]\n", "np.c_[np.r_[arr1, arr2], arr]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "np.c_[1:6, -10:-5]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Repeating Elements: tile and repeat" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(3)\n", "arr\n", "arr.repeat(3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr.repeat([2, 3, 4])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.random.randn(2, 2)\n", "arr\n", "arr.repeat(2, axis=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr.repeat([2, 3], axis=0)\n", "arr.repeat([2, 3], axis=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr\n", "np.tile(arr, 2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr\n", "np.tile(arr, (2, 1))\n", "np.tile(arr, (3, 2))" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Fancy Indexing Equivalents: take and put" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(10) * 100\n", "inds = [7, 1, 2, 6]\n", "arr[inds]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr.take(inds)\n", "arr.put(inds, 42)\n", "arr\n", "arr.put(inds, [40, 41, 42, 43])\n", "arr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "inds = [2, 0, 2, 1]\n", "arr = np.random.randn(2, 4)\n", "arr\n", "arr.take(inds, axis=1)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Broadcasting" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(5)\n", "arr\n", "arr * 4" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.random.randn(4, 3)\n", "arr.mean(0)\n", "demeaned = arr - arr.mean(0)\n", "demeaned\n", "demeaned.mean(0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr\n", "row_means = arr.mean(1)\n", "row_means.shape\n", "row_means.reshape((4, 1))\n", "demeaned = arr - row_means.reshape((4, 1))\n", "demeaned.mean(1)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Broadcasting Over Other Axes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr - arr.mean(1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr - arr.mean(1).reshape((4, 1))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.zeros((4, 4))\n", "arr_3d = arr[:, np.newaxis, :]\n", "arr_3d.shape\n", "arr_1d = np.random.normal(size=3)\n", "arr_1d[:, np.newaxis]\n", "arr_1d[np.newaxis, :]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.random.randn(3, 4, 5)\n", "depth_means = arr.mean(2)\n", "depth_means\n", "depth_means.shape\n", "demeaned = arr - depth_means[:, :, np.newaxis]\n", "demeaned.mean(2)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "```python\n", "def demean_axis(arr, axis=0):\n", " means = arr.mean(axis)\n", "\n", " # This generalizes things like [:, :, np.newaxis] to N dimensions\n", " indexer = [slice(None)] * arr.ndim\n", " indexer[axis] = np.newaxis\n", " return arr - means[indexer]\n", "```" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Setting Array Values by Broadcasting" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.zeros((4, 3))\n", "arr[:] = 5\n", "arr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "col = np.array([1.28, -0.42, 0.44, 1.6])\n", "arr[:] = col[:, np.newaxis]\n", "arr\n", "arr[:2] = [[-1.37], [0.509]]\n", "arr" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Advanced ufunc Usage" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### ufunc Instance Methods" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(10)\n", "np.add.reduce(arr)\n", "arr.sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "np.random.seed(12346) # for reproducibility\n", "arr = np.random.randn(5, 5)\n", "arr[::2].sort(1) # sort a few rows\n", "arr[:, :-1] < arr[:, 1:]\n", "np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(15).reshape((3, 5))\n", "np.add.accumulate(arr, axis=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(3).repeat([1, 2, 2])\n", "arr\n", "np.multiply.outer(arr, np.arange(5))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "x, y = np.random.randn(3, 4), np.random.randn(5)\n", "result = np.subtract.outer(x, y)\n", "result.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.arange(10)\n", "np.add.reduceat(arr, [0, 5, 8])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.multiply.outer(np.arange(4), np.arange(5))\n", "arr\n", "np.add.reduceat(arr, [0, 2, 4], axis=1)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Writing New ufuncs in Python" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "def add_elements(x, y):\n", " return x + y\n", "add_them = np.frompyfunc(add_elements, 2, 1)\n", "add_them(np.arange(8), np.arange(8))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "add_them = np.vectorize(add_elements, otypes=[np.float64])\n", "add_them(np.arange(8), np.arange(8))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.random.randn(10000)\n", "%timeit add_them(arr, arr)\n", "%timeit np.add(arr, arr)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Structured and Record Arrays" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "dtype = [('x', np.float64), ('y', np.int32)]\n", "sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)\n", "sarr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sarr[0]\n", "sarr[0]['y']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sarr['x']" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Nested dtypes and Multidimensional Fields" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "dtype = [('x', np.int64, 3), ('y', np.int32)]\n", "arr = np.zeros(4, dtype=dtype)\n", "arr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr[0]['x']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr['x']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]\n", "data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)\n", "data['x']\n", "data['y']\n", "data['x']['a']" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Why Use Structured Arrays?" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## More About Sorting" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.random.randn(6)\n", "arr.sort()\n", "arr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.random.randn(3, 5)\n", "arr\n", "arr[:, 0].sort() # Sort first column values in-place\n", "arr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.random.randn(5)\n", "arr\n", "np.sort(arr)\n", "arr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.random.randn(3, 5)\n", "arr\n", "arr.sort(axis=1)\n", "arr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr[:, ::-1]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Indirect Sorts: argsort and lexsort" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "values = np.array([5, 0, 1, 3, 2])\n", "indexer = values.argsort()\n", "indexer\n", "values[indexer]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.random.randn(3, 5)\n", "arr[0] = values\n", "arr\n", "arr[:, arr[0].argsort()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])\n", "last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])\n", "sorter = np.lexsort((first_name, last_name))\n", "sorter\n", "zip(last_name[sorter], first_name[sorter])" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Alternative Sort Algorithms" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "values = np.array(['2:first', '2:second', '1:first', '1:second',\n", " '1:third'])\n", "key = np.array([2, 2, 1, 1, 1])\n", "indexer = key.argsort(kind='mergesort')\n", "indexer\n", "values.take(indexer)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Partially Sorting Arrays" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "np.random.seed(12345)\n", "arr = np.random.randn(20)\n", "arr\n", "np.partition(arr, 3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "indices = np.argpartition(arr, 3)\n", "indices\n", "arr.take(indices)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### numpy.searchsorted: Finding Elements in a Sorted Array" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.array([0, 1, 7, 12, 15])\n", "arr.searchsorted(9)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr.searchsorted([0, 8, 11, 16])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr = np.array([0, 0, 0, 1, 1, 1, 1])\n", "arr.searchsorted([0, 1])\n", "arr.searchsorted([0, 1], side='right')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "data = np.floor(np.random.uniform(0, 10000, size=50))\n", "bins = np.array([0, 100, 1000, 5000, 10000])\n", "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "labels = bins.searchsorted(data)\n", "labels" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.Series(data).groupby(labels).mean()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Writing Fast NumPy Functions with Numba" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import numpy as np\n", "\n", "def mean_distance(x, y):\n", " nx = len(x)\n", " result = 0.0\n", " count = 0\n", " for i in range(nx):\n", " result += x[i] - y[i]\n", " count += 1\n", " return result / count" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "```python\n", "In [209]: x = np.random.randn(10000000)\n", "\n", "In [210]: y = np.random.randn(10000000)\n", "\n", "In [211]: %timeit mean_distance(x, y)\n", "1 loop, best of 3: 2 s per loop\n", "\n", "In [212]: %timeit (x - y).mean()\n", "100 loops, best of 3: 14.7 ms per loop\n", "```" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "```python\n", "In [213]: import numba as nb\n", "\n", "In [214]: numba_mean_distance = nb.jit(mean_distance)\n", "```" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "```python\n", "@nb.jit\n", "def mean_distance(x, y):\n", " nx = len(x)\n", " result = 0.0\n", " count = 0\n", " for i in range(nx):\n", " result += x[i] - y[i]\n", " count += 1\n", " return result / count\n", "```" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "```python\n", "In [215]: %timeit numba_mean_distance(x, y)\n", "100 loops, best of 3: 10.3 ms per loop\n", "```" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "```python\n", "from numba import float64, njit\n", "\n", "@njit(float64(float64[:], float64[:]))\n", "def mean_distance(x, y):\n", " return (x - y).mean()\n", "```" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Creating Custom numpy.ufunc Objects with Numba" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "```python\n", "from numba import vectorize\n", "\n", "@vectorize\n", "def nb_add(x, y):\n", " return x + y\n", "```" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "```python\n", "In [13]: x = np.arange(10)\n", "\n", "In [14]: nb_add(x, x)\n", "Out[14]: array([ 0., 2., 4., 6., 8., 10., 12., 14., 16., 18.])\n", "\n", "In [15]: nb_add.accumulate(x, 0)\n", "Out[15]: array([ 0., 1., 3., 6., 10., 15., 21., 28., 36., 45.])\n", "```" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Advanced Array Input and Output" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Memory-Mapped Files" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "mmap = np.memmap('mymmap', dtype='float64', mode='w+',\n", " shape=(10000, 10000))\n", "mmap" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "section = mmap[:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "section[:] = np.random.randn(5, 10000)\n", "mmap.flush()\n", "mmap\n", "del mmap" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))\n", "mmap" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "%xdel mmap\n", "!rm mymmap" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### HDF5 and Other Array Storage Options" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Performance Tips" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### The Importance of Contiguous Memory" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr_c = np.ones((1000, 1000), order='C')\n", "arr_f = np.ones((1000, 1000), order='F')\n", "arr_c.flags\n", "arr_f.flags\n", "arr_f.flags.f_contiguous" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "%timeit arr_c.sum(1)\n", "%timeit arr_f.sum(1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr_f.copy('C').flags" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "arr_c[:50].flags.contiguous\n", "arr_c[:, :50].flags" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "%xdel arr_c\n", "%xdel arr_f" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.options.display.max_rows = PREVIOUS_MAX_ROWS" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }