{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Profiling\n",
    "\n",
    "```\n",
    "conda install line_profiler\n",
    "```\n",
    "\n",
    "or\n",
    "\n",
    "```\n",
    "pip install line_profiler\n",
    "```\n",
    "\n",
    "- Annotate the function you want to profile with @profile\n",
    "- Save your Python code to a text file, say `mycode.py`\n",
    "- Run `kernprof -l mycode.py`\n",
    "- View the output: `python -m line_profiler mycode.py.lprof`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Breakout: Testing and Profiling"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Examine the `alloc_norm_mul` function below.  Use profiling to speed it up, one step at a time."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def _norm_rows(X):\n",
    "    X = X.copy()\n",
    "    m, n = X.shape\n",
    "        \n",
    "    for i in range(m):\n",
    "        row_sum = 0\n",
    "        \n",
    "        for j in range(n):\n",
    "            row_sum += X[i, j]\n",
    "            \n",
    "        for j in range(n):\n",
    "            X[i, j] /= row_sum\n",
    "            \n",
    "    return X\n",
    "\n",
    "\n",
    "def _polynomial(X):\n",
    "    return (X + (3 * X))**2\n",
    "\n",
    "\n",
    "\n",
    "def _matmul(A, B, out):\n",
    "    rows_A, cols_A = A.shape\n",
    "    rows_B, cols_B = B.shape\n",
    "\n",
    "    # Take each row in A\n",
    "    for i in range(rows_A):\n",
    "\n",
    "        # And multiply by every column in B\n",
    "        for j in range(cols_B):\n",
    "            s = 0\n",
    "            for k in range(cols_A):\n",
    "                s = s + A[i, k] * B[k, j]\n",
    "\n",
    "            out[i, j] = s\n",
    "\n",
    "\n",
    "def alloc_norm_mul(A, B):\n",
    "    \"\"\"Take two matrices, A and B,\n",
    "    \n",
    "    - normalize their rows by dividing with their sums\n",
    "    - do a polynomial transformation on each\n",
    "    - multiply them and return the result.\n",
    "\n",
    "    \"\"\"\n",
    "    m, n = A.shape\n",
    "    p, q = B.shape\n",
    "    \n",
    "    if not (n == p):\n",
    "        raise ValueError('Matrix dimensions are incompatible')\n",
    "        \n",
    "    # Output shape\n",
    "    M, N = m, q\n",
    "    \n",
    "    # Step 1: allocate output memory\n",
    "    out = []\n",
    "    for i in range(M):\n",
    "        row = [[0]] * N\n",
    "        out.append(row)\n",
    "        \n",
    "    out = np.array(out, dtype=np.float64).squeeze()\n",
    "        \n",
    "    # Step 2: normalize arrays by dividing each row by its sum\n",
    "    A = _norm_rows(A)\n",
    "    B = _norm_rows(B)\n",
    "    \n",
    "    A = _polynomial(A)\n",
    "    B = _polynomial(B)\n",
    "    \n",
    "    _matmul(A, B, out)\n",
    "    \n",
    "    return out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 loops, best of 3: 1.63 s per loop\n"
     ]
    }
   ],
   "source": [
    "rng = np.random.RandomState(0)  # Sorry, Josh, -1 doesn't work\n",
    "\n",
    "A = rng.uniform(size=((500, 100))) * 100\n",
    "B = rng.uniform(size=((100, 60))) * 500\n",
    "\n",
    "%timeit alloc_norm_mul(A, B)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " "
     ]
    }
   ],
   "source": [
    "%prun alloc_norm_mul(A, B)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Solution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ellipsis"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Only execute below if you want to see the solution.  Don't do that before you've tried.\n",
    "\n",
    "# No, seriously, you want to try it first.\n",
    "\n",
    "...\n",
    "...\n",
    "...\n",
    "...\n",
    "...\n",
    "...\n",
    "...\n",
    "\n",
    "# OK, OK, here we go."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%load alloc_norm_mul_opt.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.testing.assert_allclose(alloc_norm_mul(A, B), alloc_norm_mul_opt(A, B), atol=1e-6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000 loops, best of 3: 671 µs per loop\n"
     ]
    }
   ],
   "source": [
    "%timeit alloc_norm_mul_opt(A, B)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2429.2101341281664"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "1.63 / 671e-6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}