{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Advanced NumPy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from __future__ import division\n",
    "from numpy.random import randn\n",
    "from pandas import Series\n",
    "import numpy as np\n",
    "np.set_printoptions(precision=4)\n",
    "import sys"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ndarray object internals"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### NumPy dtype hierarchy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ints = np.ones(10, dtype=np.uint16)\n",
    "floats = np.ones(10, dtype=np.float32)\n",
    "np.issubdtype(ints.dtype, np.integer)\n",
    "np.issubdtype(floats.dtype, np.floating)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.float64.mro()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Advanced array manipulation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Reshaping arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(8)\n",
    "arr\n",
    "arr.reshape((4, 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr.reshape((4, 2)).reshape((2, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(15)\n",
    "arr.reshape((5, -1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "other_arr = np.ones((3, 5))\n",
    "other_arr.shape\n",
    "arr.reshape(other_arr.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(15).reshape((5, 3))\n",
    "arr\n",
    "arr.ravel()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr.flatten()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### C vs. Fortran order"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(12).reshape((3, 4))\n",
    "arr\n",
    "arr.ravel()\n",
    "arr.ravel('F')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concatenating and splitting arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr1 = np.array([[1, 2, 3], [4, 5, 6]])\n",
    "arr2 = np.array([[7, 8, 9], [10, 11, 12]])\n",
    "np.concatenate([arr1, arr2], axis=0)\n",
    "np.concatenate([arr1, arr2], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.vstack((arr1, arr2))\n",
    "np.hstack((arr1, arr2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from numpy.random import randn\n",
    "arr = randn(5, 2)\n",
    "arr\n",
    "first, second, third = np.split(arr, [1, 3])\n",
    "first\n",
    "second\n",
    "third"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Stacking helpers: "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(6)\n",
    "arr1 = arr.reshape((3, 2))\n",
    "arr2 = randn(3, 2)\n",
    "np.r_[arr1, arr2]\n",
    "np.c_[np.r_[arr1, arr2], arr]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.c_[1:6, -10:-5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Repeating elements: tile and repeat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(3)\n",
    "arr.repeat(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr.repeat([2, 3, 4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = randn(2, 2)\n",
    "arr\n",
    "arr.repeat(2, axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr.repeat([2, 3], axis=0)\n",
    "arr.repeat([2, 3], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr\n",
    "np.tile(arr, 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr\n",
    "np.tile(arr, (2, 1))\n",
    "np.tile(arr, (3, 2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Fancy indexing equivalents: take and put"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(10) * 100\n",
    "inds = [7, 1, 2, 6]\n",
    "arr[inds]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr.take(inds)\n",
    "arr.put(inds, 42)\n",
    "arr\n",
    "arr.put(inds, [40, 41, 42, 43])\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "inds = [2, 0, 2, 1]\n",
    "arr = randn(2, 4)\n",
    "arr\n",
    "arr.take(inds, axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Broadcasting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(5)\n",
    "arr\n",
    "arr * 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = randn(4, 3)\n",
    "arr.mean(0)\n",
    "demeaned = arr - arr.mean(0)\n",
    "demeaned\n",
    "demeaned.mean(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr\n",
    "row_means = arr.mean(1)\n",
    "row_means.reshape((4, 1))\n",
    "demeaned = arr - row_means.reshape((4, 1))\n",
    "demeaned.mean(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Broadcasting over other axes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr - arr.mean(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr - arr.mean(1).reshape((4, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.zeros((4, 4))\n",
    "arr_3d = arr[:, np.newaxis, :]\n",
    "arr_3d.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr_1d = np.random.normal(size=3)\n",
    "arr_1d[:, np.newaxis]\n",
    "arr_1d[np.newaxis, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = randn(3, 4, 5)\n",
    "depth_means = arr.mean(2)\n",
    "depth_means\n",
    "demeaned = arr - depth_means[:, :, np.newaxis]\n",
    "demeaned.mean(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def demean_axis(arr, axis=0):\n",
    "    means = arr.mean(axis)\n",
    "\n",
    "    # This generalized things like [:, :, np.newaxis] to N dimensions\n",
    "    indexer = [slice(None)] * arr.ndim\n",
    "    indexer[axis] = np.newaxis\n",
    "    return arr - means[indexer]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Setting array values by broadcasting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.zeros((4, 3))\n",
    "arr[:] = 5\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "col = np.array([1.28, -0.42, 0.44, 1.6])\n",
    "arr[:] = col[:, np.newaxis]\n",
    "arr\n",
    "arr[:2] = [[-1.37], [0.509]]\n",
    "arr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Advanced ufunc usage"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Ufunc instance methods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(10)\n",
    "np.add.reduce(arr)\n",
    "arr.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.random.seed(12346)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = randn(5, 5)\n",
    "arr[::2].sort(1) # sort a few rows\n",
    "arr[:, :-1] < arr[:, 1:]\n",
    "np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(15).reshape((3, 5))\n",
    "np.add.accumulate(arr, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(3).repeat([1, 2, 2])\n",
    "arr\n",
    "np.multiply.outer(arr, np.arange(5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "result = np.subtract.outer(randn(3, 4), randn(5))\n",
    "result.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.arange(10)\n",
    "np.add.reduceat(arr, [0, 5, 8])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.multiply.outer(np.arange(4), np.arange(5))\n",
    "arr\n",
    "np.add.reduceat(arr, [0, 2, 4], axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Custom ufuncs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def add_elements(x, y):\n",
    "    return x + y\n",
    "add_them = np.frompyfunc(add_elements, 2, 1)\n",
    "add_them(np.arange(8), np.arange(8))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "add_them = np.vectorize(add_elements, otypes=[np.float64])\n",
    "add_them(np.arange(8), np.arange(8))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = randn(10000)\n",
    "%timeit add_them(arr, arr)\n",
    "%timeit np.add(arr, arr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Structured and record arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dtype = [('x', np.float64), ('y', np.int32)]\n",
    "sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)\n",
    "sarr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "sarr[0]\n",
    "sarr[0]['y']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "sarr['x']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Nested dtypes and multidimensional fields"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dtype = [('x', np.int64, 3), ('y', np.int32)]\n",
    "arr = np.zeros(4, dtype=dtype)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr[0]['x']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr['x']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]\n",
    "data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)\n",
    "data['x']\n",
    "data['y']\n",
    "data['x']['a']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Why use structured arrays?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Structured array manipulations: numpy.lib.recfunctions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## More about sorting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = randn(6)\n",
    "arr.sort()\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = randn(3, 5)\n",
    "arr\n",
    "arr[:, 0].sort()  # Sort first column values in-place\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = randn(5)\n",
    "arr\n",
    "np.sort(arr)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = randn(3, 5)\n",
    "arr\n",
    "arr.sort(axis=1)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr[:, ::-1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Indirect sorts: argsort and lexsort"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "values = np.array([5, 0, 1, 3, 2])\n",
    "indexer = values.argsort()\n",
    "indexer\n",
    "values[indexer]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = randn(3, 5)\n",
    "arr[0] = values\n",
    "arr\n",
    "arr[:, arr[0].argsort()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])\n",
    "last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])\n",
    "sorter = np.lexsort((first_name, last_name))\n",
    "zip(last_name[sorter], first_name[sorter])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Alternate sort algorithms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third'])\n",
    "key = np.array([2, 2, 1, 1, 1])\n",
    "indexer = key.argsort(kind='mergesort')\n",
    "indexer\n",
    "values.take(indexer)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### numpy.searchsorted: Finding elements in a sorted array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.array([0, 1, 7, 12, 15])\n",
    "arr.searchsorted(9)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr.searchsorted([0, 8, 11, 16])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr = np.array([0, 0, 0, 1, 1, 1, 1])\n",
    "arr.searchsorted([0, 1])\n",
    "arr.searchsorted([0, 1], side='right')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data = np.floor(np.random.uniform(0, 10000, size=50))\n",
    "bins = np.array([0, 100, 1000, 5000, 10000])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "labels = bins.searchsorted(data)\n",
    "labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Series(data).groupby(labels).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.digitize(data, bins)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## NumPy matrix class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X =  np.array([[ 8.82768214,  3.82222409, -1.14276475,  2.04411587],\n",
    "               [ 3.82222409,  6.75272284,  0.83909108,  2.08293758],\n",
    "               [-1.14276475,  0.83909108,  5.01690521,  0.79573241],\n",
    "               [ 2.04411587,  2.08293758,  0.79573241,  6.24095859]])\n",
    "X[:, 0]  # one-dimensional\n",
    "y = X[:, :1]  # two-dimensional by slicing\n",
    "X\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.dot(y.T, np.dot(X, y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Xm = np.matrix(X)\n",
    "ym = Xm[:, 0]\n",
    "Xm\n",
    "ym\n",
    "ym.T * Xm * ym"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Xm.I * X"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Advanced array input and output"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Memory-mapped files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "mmap = np.memmap('mymmap', dtype='float64', mode='w+', shape=(10000, 10000))\n",
    "mmap"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "section = mmap[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "section[:] = np.random.randn(5, 10000)\n",
    "mmap.flush()\n",
    "mmap\n",
    "del mmap"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))\n",
    "mmap"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%xdel mmap\n",
    "!rm mymmap"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### HDF5 and other array storage options"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Performance tips"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### The importance of contiguous memory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr_c = np.ones((1000, 1000), order='C')\n",
    "arr_f = np.ones((1000, 1000), order='F')\n",
    "arr_c.flags\n",
    "arr_f.flags\n",
    "arr_f.flags.f_contiguous"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%timeit arr_c.sum(1)\n",
    "%timeit arr_f.sum(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr_f.copy('C').flags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "arr_c[:50].flags.contiguous\n",
    "arr_c[:, :50].flags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%xdel arr_c\n",
    "%xdel arr_f\n",
    "%cd .."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Other speed options: Cython, f2py, C"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```cython\n",
    "from numpy cimport ndarray, float64_t\n",
    "\n",
    "def sum_elements(ndarray[float64_t] arr):\n",
    "    cdef Py_ssize_t i, n = len(arr)\n",
    "    cdef float64_t result = 0\n",
    "\n",
    "    for i in range(n):\n",
    "        result += arr[i]\n",
    "\n",
    "    return result\n",
    "```"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}