{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "%load https://raw.github.com/lebedov/scikits.cuda/master/demos/fft_demo.py" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "%%time\n", "\n", "\"\"\"\n", "Demonstrates how to use the PyCUDA interface to CUFFT to compute 1D FFTs.\n", "\"\"\"\n", "\n", "import pycuda.autoinit\n", "import pycuda.gpuarray as gpuarray\n", "import numpy as np\n", "\n", "import scikits.cuda.fft as cu_fft\n", "\n", "print 'Testing fft/ifft..'\n", "N = 4096*16000\n", "\n", "x = np.asarray(np.random.rand(N), np.float32)\n", "xf = np.fft.fft(x)\n", "y = np.real(np.fft.ifft(xf))\n", "\n", "x_gpu = gpuarray.to_gpu(x)\n", "xf_gpu = gpuarray.empty(N/2+1, np.complex64)\n", "plan_forward = cu_fft.Plan(x_gpu.shape, np.float32, np.complex64)\n", "cu_fft.fft(x_gpu, xf_gpu, plan_forward)\n", "\n", "y_gpu = gpuarray.empty_like(x_gpu)\n", "plan_inverse = cu_fft.Plan(x_gpu.shape, np.complex64, np.float32)\n", "cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True)\n", "\n", "print 'Success status: ', np.allclose(y, y_gpu.get(), atol=1e-6)\n", "\n", "print 'Testing in-place fft..'\n", "x = np.asarray(np.random.rand(N)+1j*np.random.rand(N), np.complex64)\n", "x_gpu = gpuarray.to_gpu(x)\n", "\n", "plan = cu_fft.Plan(x_gpu.shape, np.complex64, np.complex64)\n", "cu_fft.fft(x_gpu, x_gpu, plan)\n", "\n", "cu_fft.ifft(x_gpu, x_gpu, plan, True)\n", "\n", "print 'Success status: ', np.allclose(x, x_gpu.get(), atol=1e-6)\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Testing fft/ifft..\n", "Success status: " ] }, { "output_type": "stream", "stream": "stdout", "text": [ " True\n", "Testing in-place fft..\n", "Success status: " ] }, { "output_type": "stream", "stream": "stdout", "text": [ " True\n", "CPU times: user 14.9 s, sys: 3.21 s, total: 18.2 s\n", "Wall time: 18.4 s\n" ] } ], "prompt_number": 50 }, { "cell_type": "code", "collapsed": false, "input": [ "%%time \n", "cu_fft.fft(x_gpu, x_gpu, plan)\n", "x_gpu.get()[1234]" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "CPU times: user 132 ms, sys: 148 ms, total: 280 ms\n", "Wall time: 280 ms\n" ] } ], "prompt_number": 40 }, { "cell_type": "code", "collapsed": false, "input": [ "%%time \n", "xfft = np.fft.fft(x)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "CPU times: user 4.01 s, sys: 176 ms, total: 4.19 s\n", "Wall time: 4.19 s\n" ] } ], "prompt_number": 41 }, { "cell_type": "code", "collapsed": false, "input": [ "x.shape[0]/1024**2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 30, "text": [ "62" ] } ], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [ "x_gpu.get()[1234]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 49, "text": [ "(nan+nan*j)" ] } ], "prompt_number": 49 }, { "cell_type": "code", "collapsed": false, "input": [ "%load https://github.com/lebedov/scikits.cuda/raw/master/demos/dot_demo.py" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": [ "%%time\n", "\n", "\"\"\"\n", "Demonstrates multiplication of two matrices on the GPU.\n", "\"\"\"\n", "\n", "import pycuda.autoinit\n", "import pycuda.gpuarray as gpuarray\n", "import pycuda.driver as drv\n", "import numpy as np\n", "\n", "import scikits.cuda.linalg as culinalg\n", "import scikits.cuda.misc as cumisc\n", "culinalg.init()\n", "\n", "# Double precision is only supported by devices with compute\n", "# capability >= 1.3:\n", "import string\n", "demo_types = [np.float32, np.complex64]\n", "if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:\n", " demo_types.extend([np.float64, np.complex128])\n", "\n", "for t in demo_types:\n", " print 'Testing matrix multiplication for type ' + str(np.dtype(t))\n", " if np.iscomplexobj(t()):\n", " a = np.asarray(np.random.rand(10, 5)+1j*np.random.rand(10, 5), t)\n", " b = np.asarray(np.random.rand(5, 5)+1j*np.random.rand(5, 5), t)\n", " c = np.asarray(np.random.rand(5, 5)+1j*np.random.rand(5, 5), t)\n", " else:\n", " a = np.asarray(np.random.rand(10, 5), t)\n", " b = np.asarray(np.random.rand(5, 5), t)\n", " c = np.asarray(np.random.rand(5, 5), t)\n", "\n", " a_gpu = gpuarray.to_gpu(a)\n", " b_gpu = gpuarray.to_gpu(b)\n", " c_gpu = gpuarray.to_gpu(c)\n", "\n", " temp_gpu = culinalg.dot(a_gpu, b_gpu)\n", " d_gpu = culinalg.dot(temp_gpu, c_gpu)\n", " temp_gpu.gpudata.free()\n", " del(temp_gpu)\n", " print 'Success status: ', np.allclose(np.dot(np.dot(a, b), c) , d_gpu.get())\n", "\n", " print 'Testing vector multiplication for type ' + str(np.dtype(t))\n", " if np.iscomplexobj(t()):\n", " d = np.asarray(np.random.rand(5)+1j*np.random.rand(5), t)\n", " e = np.asarray(np.random.rand(5)+1j*np.random.rand(5), t)\n", " else:\n", " d = np.asarray(np.random.rand(5), t)\n", " e = np.asarray(np.random.rand(5), t)\n", "\n", " d_gpu = gpuarray.to_gpu(d)\n", " e_gpu = gpuarray.to_gpu(e)\n", "\n", " temp = culinalg.dot(d_gpu, e_gpu)\n", " print 'Success status: ', np.allclose(np.dot(d, e), temp)\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Testing matrix multiplication for type float32\n", "Success status: True\n", "Testing vector multiplication for type float32\n", "Success status: True\n", "Testing matrix multiplication for type complex64\n", "Success status: True\n", "Testing vector multiplication for type complex64\n", "Success status: True\n", "Testing matrix multiplication for type float64\n", "Success status: True\n", "Testing vector multiplication for type float64\n", "Success status: True\n", "Testing matrix multiplication for type complex128\n", "Success status: True\n", "Testing vector multiplication for type complex128\n", "Success status: True\n", "CPU times: user 4 ms, sys: 0 ns, total: 4 ms\n", "Wall time: 3.7 ms\n" ] } ], "prompt_number": 46 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 47 }, { "cell_type": "code", "collapsed": false, "input": [ " " ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }