{ "metadata": { "name": "", "signature": "sha256:d283db7c1491bf6f71f21106d9ab4ec634e65438c7b3fbb940eb10f0e0b4e0e1" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#Parallel - PyCuda\n", "\n", "Lecturer:\n", "*\u00c1lvaro Leitao*[2](http://www.cwi.nl/people/2687) - [A.Leitao_at_cwi.nl](mailto:A.Leitao_at_cwi.nl)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cd ./PyCUDA/" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/home/jpsilva/Lisbon1214/notebooks/PyCUDA\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "import os\n", "os.listdir('.')" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "['pycuda_montecarlo_pi_GPUArrays.py',\n", " 'pycuda_montecarlo_pi.py',\n", " 'pycuda_sumarrays.py']" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "%load pycuda_montecarlo_pi.py" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "import pycuda.driver as cuda\n", "import pycuda.autoinit\n", "from pycuda.compiler import SourceModule\n", "import numpy\n", "import time\n", "\n", "C_cuda_code = SourceModule(\"\"\"\n", "__global__ void MC_Pi(float *U1, float *U2, float *counter)\n", "{\n", "\tconst int i = blockIdx.x*blockDim.x + threadIdx.x;\n", "\tcounter[i] = 0;\n", "\tif( (pow(U1[i],2) + pow(U2[i],2)) <= 1.0 )\n", "\t\t\tcounter[i] = 1;\n", "}\n", "\"\"\")\n", "\n", "n = 16*1024*1204\n", "U1 = numpy.random.rand(n).astype('f')\n", "U2 = numpy.random.rand(n).astype('f')\n", "counter = numpy.zeros(n).astype('f')\n", "\n", "start_time = time.time()\n", "\n", "\n", "func = C_cuda_code.get_function(\"MC_Pi\")\n", "\n", "size_block = 1024\n", "size_grid = int((n-1)/size_block + 1)\n", "func(cuda.In(U1), cuda.In(U2), cuda.Out(counter), block=(size_block,1,1), grid=(size_grid,1))\n", "\n", "counter = numpy.sum(counter)\n", "\n", "print \"PI_gpu = \", 4.0*counter/n\n", "print \"Time elapsed GPU: \", time.time() - start_time, \"s\"\n", "\n", "start_time = time.time()\n", "\n", "counter_cpu = numpy.sum( (numpy.power(U1,2) + numpy.power(U2,2)) <= 1.0 )\n", "\n", "print \"PI_cpu = \", 4.0*counter_cpu/n\n", "print \"Time elapsed CPU: \", time.time() - start_time, \"s\"\n" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "%load pycuda_montecarlo_pi_GPUArrays.py" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "import pycuda.driver as cuda\n", "import pycuda.autoinit\n", "import numpy\n", "import time\n", "\n", "import pycuda.gpuarray as gpuarray\n", "import pycuda.curandom as curandom\n", "\n", "\n", "n = 16*1024*1204\n", "U1 = curandom.rand(n)\n", "U2 = curandom.rand(n)\n", "counter = gpuarray.zeros(n, dtype='f')\n", "\n", "start_time = time.time()\n", "\n", "counter = gpuarray.sum( (U1*U1 + U2*U2) <= 1.0 )\n", "\n", "print \"PI_gpu = \", 4.0*counter/n\n", "print \"Time elapsed GPUArrays: \", time.time() - start_time, \"s\"\n", "\n", "# Sequential part\n", "\n", "U1 = numpy.random.rand(n).astype('f')\n", "U2 = numpy.random.rand(n).astype('f')\n", "\n", "start_time = time.time()\n", "\n", "counter_cpu = numpy.sum( (numpy.power(U1,2) + numpy.power(U2,2)) <= 1.0 )\n", "\n", "print \"PI_cpu = \", 4.0*counter_cpu/n\n", "print \"Time elapsed CPU: \", time.time() - start_time, \"s\"" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "%load pycuda_sumarrays.py" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "import pycuda.driver as cuda\n", "import pycuda.autoinit\n", "from pycuda.compiler import SourceModule\n", "import numpy\n", "import time\n", "\n", "C_cuda_code = SourceModule(\"\"\"\n", "__global__ void sum_arrays(float *a, float *b, float *c)\n", "{\n", "\tconst int i = blockIdx.x*blockDim.x + threadIdx.x;\n", "\tc[i] = a[i] + b[i];\n", "}\n", "\"\"\")\n", "\n", "n = 16*1024*1024\n", "a = numpy.random.randn(n).astype('f')\n", "b = numpy.random.randn(n).astype('f')\n", "c = numpy.zeros(n).astype('f')\n", "\n", "a_gpu = cuda.mem_alloc(a.nbytes)\n", "b_gpu = cuda.mem_alloc(b.nbytes)\n", "c_gpu = cuda.mem_alloc(c.nbytes)\n", "\n", "start_time = time.time()\n", "\n", "cuda.memcpy_htod(a_gpu, a)\n", "cuda.memcpy_htod(b_gpu, b)\n", "\n", "func = C_cuda_code.get_function(\"sum_arrays\")\n", "\n", "size_block = 1024\n", "size_grid = int((n-1)/size_block + 1)\n", "func(a_gpu, b_gpu, c_gpu, block=(size_block,1,1), grid=(size_grid,1))\n", "\n", "cuda.memcpy_dtoh(c, c_gpu)\n", "\n", "print \"Time elapsed GPU: \", time.time() - start_time, \"s\"\n", "\n", "start_time = time.time()\n", "c_cpu = a + b\n", "print \"Time elapsed CPU: \", time.time() - start_time, \"s\"\n", "\n", "print numpy.alltrue(abs(c-c_cpu) < 1e-5)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "%load_ext version_information\n", "%version_information numpy, pycuda" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
SoftwareVersion
Python2.7.9 64bit [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
IPython2.3.1
OSLinux 3.16.0 28 generic x86_64 with debian jessie sid
numpy1.9.1
pycudapycuda
Wed Dec 17 13:18:01 2014 CET
" ], "json": [ "{\"Software versions\": [{\"version\": \"2.7.9 64bit [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\", \"module\": \"Python\"}, {\"version\": \"2.3.1\", \"module\": \"IPython\"}, {\"version\": \"Linux 3.16.0 28 generic x86_64 with debian jessie sid\", \"module\": \"OS\"}, {\"version\": \"1.9.1\", \"module\": \"numpy\"}, {\"version\": \"pycuda\", \"module\": \"pycuda\"}]}" ], "latex": [ "\\begin{tabular}{|l|l|}\\hline\n", "{\\bf Software} & {\\bf Version} \\\\ \\hline\\hline\n", "Python & 2.7.9 64bit [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] \\\\ \\hline\n", "IPython & 2.3.1 \\\\ \\hline\n", "OS & Linux 3.16.0 28 generic x86\\_64 with debian jessie sid \\\\ \\hline\n", "numpy & 1.9.1 \\\\ \\hline\n", "pycuda & pycuda \\\\ \\hline\n", "\\hline \\multicolumn{2}{|l|}{Wed Dec 17 13:18:01 2014 CET} \\\\ \\hline\n", "\\end{tabular}\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ "Software versions\n", "Python 2.7.9 64bit [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", "IPython 2.3.1\n", "OS Linux 3.16.0 28 generic x86_64 with debian jessie sid\n", "numpy 1.9.1\n", "pycuda pycuda\n", "Wed Dec 17 13:18:01 2014 CET" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "from IPython.core.display import HTML\n", "def css_styling():\n", " styles = open(\"./styles/custom.css\", \"r\").read()\n", " return HTML(styles)\n", "css_styling()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\n", "\n", "\n", "\n", "\n" ], "metadata": {}, "output_type": "pyout", "prompt_number": 1, "text": [ "" ] } ], "prompt_number": 1 } ], "metadata": {} } ] }