{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Profiling\n", "\n", "```\n", "conda install line_profiler\n", "```\n", "\n", "or\n", "\n", "```\n", "pip install line_profiler\n", "```\n", "\n", "- Annotate the function you want to profile with @profile\n", "- Save your Python code to a text file, say `mycode.py`\n", "- Run `kernprof -l mycode.py`\n", "- View the output: `python -m line_profiler mycode.py.lprof`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Breakout: Testing and Profiling" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Examine the `alloc_norm_mul` function below. Use profiling to speed it up, one step at a time." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def _norm_rows(X):\n", " X = X.copy()\n", " m, n = X.shape\n", " \n", " for i in range(m):\n", " row_sum = 0\n", " \n", " for j in range(n):\n", " row_sum += X[i, j]\n", " \n", " for j in range(n):\n", " X[i, j] /= row_sum\n", " \n", " return X\n", "\n", "\n", "def _polynomial(X):\n", " return (X + (3 * X))**2\n", "\n", "\n", "\n", "def _matmul(A, B, out):\n", " rows_A, cols_A = A.shape\n", " rows_B, cols_B = B.shape\n", "\n", " # Take each row in A\n", " for i in range(rows_A):\n", "\n", " # And multiply by every column in B\n", " for j in range(cols_B):\n", " s = 0\n", " for k in range(cols_A):\n", " s = s + A[i, k] * B[k, j]\n", "\n", " out[i, j] = s\n", "\n", "\n", "def alloc_norm_mul(A, B):\n", " \"\"\"Take two matrices, A and B,\n", " \n", " - normalize their rows by dividing with their sums\n", " - do a polynomial transformation on each\n", " - multiply them and return the result.\n", "\n", " \"\"\"\n", " m, n = A.shape\n", " p, q = B.shape\n", " \n", " if not (n == p):\n", " raise ValueError('Matrix dimensions are incompatible')\n", " \n", " # Output shape\n", " M, N = m, q\n", " \n", " # Step 1: allocate output memory\n", " out = []\n", " for i in range(M):\n", " row = [[0]] * N\n", " out.append(row)\n", " \n", " out = np.array(out, dtype=np.float64).squeeze()\n", " \n", " # Step 2: normalize arrays by dividing each row by its sum\n", " A = _norm_rows(A)\n", " B = _norm_rows(B)\n", " \n", " A = _polynomial(A)\n", " B = _polynomial(B)\n", " \n", " _matmul(A, B, out)\n", " \n", " return out" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 loops, best of 3: 1.63 s per loop\n" ] } ], "source": [ "rng = np.random.RandomState(0) # Sorry, Josh, -1 doesn't work\n", "\n", "A = rng.uniform(size=((500, 100))) * 100\n", "B = rng.uniform(size=((100, 60))) * 500\n", "\n", "%timeit alloc_norm_mul(A, B)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " " ] } ], "source": [ "%prun alloc_norm_mul(A, B)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Solution" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Ellipsis" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Only execute below if you want to see the solution. Don't do that before you've tried.\n", "\n", "# No, seriously, you want to try it first.\n", "\n", "...\n", "...\n", "...\n", "...\n", "...\n", "...\n", "...\n", "\n", "# OK, OK, here we go." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%load alloc_norm_mul_opt.py" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "np.testing.assert_allclose(alloc_norm_mul(A, B), alloc_norm_mul_opt(A, B), atol=1e-6)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1000 loops, best of 3: 671 µs per loop\n" ] } ], "source": [ "%timeit alloc_norm_mul_opt(A, B)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "2429.2101341281664" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "1.63 / 671e-6" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }