import sys,os,os.path from kinetica_proc import ProcData from skcuda import cublas import numpy as np import pycuda.autoinit import pycuda.gpuarray as gpuarray # Update PATH with location of nvcc compiler CUDA_BIN = '/usr/local/cuda/bin' os.environ['PATH'] = os.environ['PATH'] + os.pathsep + os.path.join(os.sep,os.sep.join(CUDA_BIN.split('/'))) # Set XDG_CACHE_HOME directory for PyCUDA to use for temp files; # will use HOME directory for temp space if not set, which for gpudb_proc # is /home/gpudb, where it doesn't have write access os.environ['XDG_CACHE_HOME'] = '/tmp' def cublas_max_element_index(h,x): x_gpu = gpuarray.to_gpu(x) return cublas.cublasIsamax(h, x.size, x_gpu.gpudata,1) def cublas_swap_vectors(h,x,y): x_gpu = gpuarray.to_gpu(x) y_gpu = gpuarray.to_gpu(y) cublas.cublasSswap(h, x.size, x_gpu.gpudata, 1, y_gpu.gpudata, 1) return x_gpu.get(), y_gpu.get() def cublas_add_vectors(h,x,y): x_gpu = gpuarray.to_gpu(x) y_gpu = gpuarray.to_gpu(y) cublas.cublasSaxpy(h, x.size, 1.0, x_gpu.gpudata, 1, y_gpu.gpudata, 1) return y_gpu.get() def cublas_matrix_vector_product(h,M,x): M_gpu = gpuarray.to_gpu(M) x_gpu = gpuarray.to_gpu(x) y1_gpu = gpuarray.empty((M.shape[1], 1), np.float32) cublas.cublasSgemv(h, 'n', M.shape[1], M.shape[0], np.float32(1.0), M_gpu.gpudata, M.shape[1], x_gpu.gpudata, 1, np.float32(0.0), y1_gpu.gpudata, 1) return y1_gpu.get() def cublas_vector_transpose_product(h,x): x_gpu = gpuarray.to_gpu(x) A_gpu = gpuarray.zeros((x.shape[0], x.shape[0]), np.float32) cublas.cublasSsyr(h, 'u', x.shape[0], 1.0, x_gpu.gpudata, 1, A_gpu.gpudata, x.shape[0]) return A_gpu.get() def cublas_matrix_transpose_product(h,A): A_gpu = gpuarray.to_gpu(A) C_gpu = gpuarray.zeros((A.shape[0], A.shape[0]), np.float32) cublas.cublasSsyrk(h, 'u', 't', A.shape[0], A.shape[1], 1.0, A_gpu.gpudata, A.shape[1], 0.0, C_gpu.gpudata, A.shape[0]) return C_gpu.get() def example(pd): np.set_printoptions(linewidth=200) in_table = pd.input_data[0] x = np.ndarray(shape=(in_table.size, 1), dtype=float).astype(np.float32) y = np.ndarray(shape=(in_table.size, 1), dtype=float).astype(np.float32) M = np.ndarray(shape=(in_table.size, 3), dtype=float).astype(np.float32) # Initialize vectors & matrix with database values for i in xrange(0, in_table.size): x[i,0] = in_table['x'][i] y[i,0] = in_table['y'][i] M[i,0] = in_table['x'][i] M[i,1] = in_table['y'][i] M[i,2] = in_table['z'][i] h = cublas.cublasCreate() print "x = \n%s" % x print "y = \n%s" % y print "M = \n%s" % M print print "Swap vectors x & y (cuBLAS)" x_swap, y_swap = cublas_swap_vectors(h,x,y) print "x = \n%s" % x_swap print "y = \n%s" % y_swap print print "Swap vectors x & y (NumPy)" x_swap, y_swap = x.copy(), y.copy() x_swap[:, 0], y_swap[:, 0] = y_swap[:, 0], x_swap[:, 0].copy() print "x = \n%s" % x_swap print "y = \n%s" % y_swap print print "Max element index (cuBLAS)" print cublas_max_element_index(h,x) print print "Max element index (NumPy)" print np.argmax(x) print print "x + y (cuBLAS)" print cublas_add_vectors(h,x,y) print print "x + y (NumPy)" print x + y print print "M T * x (cuBLAS)" print cublas_matrix_vector_product(h,M,x) print print "M T * x (NumPy)" print M.T.dot(x) print print "x * x T (cuBLAS)" print cublas_vector_transpose_product(h,x) print print "x * x T (NumPy)" print x * x.T print print "M * M T (cuBLAS)" print cublas_matrix_transpose_product(h,M) print print "M * M T (NumPy)" print M.dot(M.T) cublas.cublasDestroy(h) if __name__ == "__main__": proc_data = ProcData() if int(proc_data.request_info["data_segment_number"]) + 1 == int(proc_data.request_info["data_segment_count"]): example(proc_data) proc_data.complete()