{ "metadata": { "name": "memview_bench_2" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Memoryview Benchmarks: Round 2" ] }, { "cell_type": "code", "collapsed": false, "input": [ "%load_ext cythonmagic" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Inlined Memoryview" ] }, { "cell_type": "code", "collapsed": false, "input": [ "%%cython\n", "\n", "import numpy as np\n", "cimport numpy as np\n", "cimport cython\n", "\n", "@cython.boundscheck(False)\n", "@cython.wraparound(False)\n", "cdef inline double inner_func(double[:, ::1] X):\n", " return X[0, 0]\n", "\n", "def loop_1(int N, switch=True):\n", " cdef double[:, ::1] X = np.zeros((100, 100))\n", " cdef int i\n", " for i in range(N):\n", " # this should be inlined by the compiler\n", " inner_func(X)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "missing cimport: /home/vanderplas/.config/ipython/cython/_cython_magic_aad7a6b46ed24a8baf2373c6b8784ac3.pyx\n", "cython\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "timeit loop_1(1E6)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "100000 loops, best of 3: 10.1 us per loop\n" ] } ], "prompt_number": 3 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Inlined ndarray" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we'll repeat, but make a dummy function such that we\n", "can guarantee that `inner_func` will not be inlined" ] }, { "cell_type": "code", "collapsed": false, "input": [ "%%cython\n", "\n", "import numpy as np\n", "cimport numpy as np\n", "cimport cython\n", "\n", "ctypedef double (*inner_func_ptr)(double[:, ::1])\n", "\n", "@cython.boundscheck(False)\n", "@cython.wraparound(False)\n", "cdef double inner_func_1(double[:, ::1] X):\n", " return X[0, 0]\n", "\n", "@cython.boundscheck(False)\n", "@cython.wraparound(False)\n", "cdef double inner_func_2(double[:, ::1] X):\n", " return X[0, 0]\n", "\n", "def loop_2(int N, switch=True):\n", " # use a switch to ensure that inlining can't happen: compilers\n", " # are usually smart enough to figure it out otherwise.\n", " cdef inner_func_ptr func\n", " if switch:\n", " func = inner_func_1\n", " else:\n", " func = inner_func_2\n", " \n", " cdef double[:, ::1] X = np.zeros((100, 100))\n", " cdef int i\n", " for i in range(N):\n", " func(X)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "missing cimport: /home/vanderplas/.config/ipython/cython/_cython_magic_ac4b8923a5786ceee8b7f92131a08998.pyx\n", "cython\n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "%timeit loop_2(1E6)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 22.9 ms per loop\n" ] } ], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this case, inlining improves the computation speed by a factor of 2000!" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Attempting to inline ndarrays" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here we'll replicate the fast method above, but with a typed numpy array\n", "rather than a typed memoryview:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "%%cython\n", "\n", "import numpy as np\n", "cimport numpy as np\n", "cimport cython\n", "\n", "@cython.boundscheck(False)\n", "@cython.wraparound(False)\n", "cdef inline double inner_func(np.ndarray[double, ndim=2, mode='c'] X):\n", " return X[0, 0]\n", "\n", "def loop_3(int N, switch=True):\n", " cdef np.ndarray[double, ndim=2, mode='c'] X = np.zeros((100, 100))\n", " cdef int i\n", " for i in range(N):\n", " inner_func(X)\n", " " ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "missing cimport: /home/vanderplas/.config/ipython/cython/_cython_magic_0523a248c0a6af0427879e879eda0095.pyx\n", "cython\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "warning: /home/vanderplas/.config/ipython/cython/_cython_magic_0523a248c0a6af0427879e879eda0095.pyx:8:30: Buffer unpacking not optimized away.\n", "warning: /home/vanderplas/.config/ipython/cython/_cython_magic_0523a248c0a6af0427879e879eda0095.pyx:8:30: Buffer unpacking not optimized away.\n" ] } ], "prompt_number": 6 }, { "cell_type": "markdown", "metadata": {}, "source": [ "These warnings indicate a problem: buffer unpacking\n", "cannot be optimized for the numpy array dtype.\n", "Let's see how this compares to the other implementations:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "print \"inlined memview:\"\n", "%timeit loop_1(1E6)\n", "print \"non-inlined memview:\"\n", "%timeit loop_2(1E6)\n", "print \"inlined ndarray:\"\n", "%timeit loop_3(1E6)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "inlined memview:\n", "100000 loops, best of 3: 10.1 us per loop" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "non-inlined memview:\n", "10 loops, best of 3: 22.9 ms per loop" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "inlined ndarray:\n", "1 loops, best of 3: 617 ms per loop" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 7 }, { "cell_type": "markdown", "metadata": {}, "source": [ "The result for the ndarray is many times slower than\n", "either the inlined or the non-inlined example above!" ] } ], "metadata": {} } ] }