11 #ifndef TESSERACT_OPENCL_OPENCLWRAPPER_H_ 12 #define TESSERACT_OPENCL_OPENCLWRAPPER_H_ 15 #include "allheaders.h" 22 #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \ 23 defined(__CYGWIN__) || defined(__MINGW32__) 28 #define IF_WINDOWS(X) X 32 #define NOT_WINDOWS(X) 33 #elif defined( __linux__ ) 42 #define NOT_WINDOWS(X) X 43 #elif defined( __APPLE__ ) 52 #define NOT_WINDOWS(X) X 62 #define NOT_WINDOWS(X) X 77 #define PERF_COUNT_VERBOSE 1 78 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n" 83 #if PERF_COUNT_VERBOSE >= 2 84 #define PERF_COUNT_START(FUNCT_NAME) \ 85 char *funct_name = FUNCT_NAME; \ 86 double elapsed_time_sec; \ 87 LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \ 88 QueryPerformanceFrequency(&freq); \ 89 QueryPerformanceCounter(&time_funct_start); \ 90 time_sub_start = time_funct_start; \ 91 time_sub_end = time_funct_start; 93 #define PERF_COUNT_END \ 94 QueryPerformanceCounter(&time_funct_end); \ 95 elapsed_time_sec = (time_funct_end.QuadPart - time_funct_start.QuadPart) / \ 96 (double)(freq.QuadPart); \ 97 printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); 99 #define PERF_COUNT_START(FUNCT_NAME) 100 #define PERF_COUNT_END 103 #if PERF_COUNT_VERBOSE >= 3 104 #define PERF_COUNT_SUB(SUB) \ 105 QueryPerformanceCounter(&time_sub_end); \ 106 elapsed_time_sec = (time_sub_end.QuadPart - time_sub_start.QuadPart) / \ 107 (double)(freq.QuadPart); \ 108 printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ 109 time_sub_start = time_sub_end; 111 #define PERF_COUNT_SUB(SUB) 118 #if PERF_COUNT_VERBOSE >= 2 119 #define PERF_COUNT_START(FUNCT_NAME) \ 120 char *funct_name = FUNCT_NAME; \ 121 double elapsed_time_sec; \ 122 timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \ 123 clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); \ 124 time_sub_start = time_funct_start; \ 125 time_sub_end = time_funct_start; 127 #define PERF_COUNT_END \ 128 clock_gettime(CLOCK_MONOTONIC, &time_funct_end); \ 130 (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + \ 131 (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; \ 132 printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); 134 #define PERF_COUNT_START(FUNCT_NAME) 135 #define PERF_COUNT_END 138 #if PERF_COUNT_VERBOSE >= 3 139 #define PERF_COUNT_SUB(SUB) \ 140 clock_gettime(CLOCK_MONOTONIC, &time_sub_end); \ 142 (time_sub_end.tv_sec - time_sub_start.tv_sec) * 1.0 + \ 143 (time_sub_end.tv_nsec - time_sub_start.tv_nsec) / 1000000000.0; \ 144 printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ 145 time_sub_start = time_sub_end; 147 #define PERF_COUNT_SUB(SUB) 159 #define strcasecmp strcmp 162 #define MAX_KERNEL_STRING_LEN 64 163 #define MAX_CLFILE_NUM 50 164 #define MAX_CLKERNEL_NUM 200 165 #define MAX_KERNEL_NAME_LEN 64 166 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E 167 #define GROUPSIZE_X 16 168 #define GROUPSIZE_Y 16 169 #define GROUPSIZE_HMORX 256 170 #define GROUPSIZE_HMORY 1 172 typedef struct _KernelEnv
174 cl_context mpkContext;
175 cl_command_queue mpkCmdQueue;
176 cl_program mpkProgram;
178 char mckKernelName[150];
181 typedef struct _OpenCLEnv
183 cl_platform_id mpOclPlatformID;
184 cl_context mpOclContext;
185 cl_device_id mpOclDevsID;
186 cl_command_queue mpOclCmdQueue;
188 typedef int ( *cl_kernel_function )(
void **userdata, KernelEnv *kenv );
190 #define CHECK_OPENCL(status,name) \ 191 if( status != CL_SUCCESS ) \ 193 printf ("OpenCL error code is %d at when %s .\n", status, name); \ 197 typedef struct _GPUEnv
200 cl_platform_id mpPlatformID;
201 cl_device_type mDevType;
202 cl_context mpContext;
203 cl_device_id *mpArryDevsID;
204 cl_device_id mpDevID;
205 cl_command_queue mpCmdQueue;
206 cl_kernel mpArryKernels[MAX_CLFILE_NUM];
207 cl_program mpArryPrograms[MAX_CLFILE_NUM];
208 char mArryKnelSrcFile[MAX_CLFILE_NUM][256],
209 mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
210 cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
211 int mnKernelCount, mnFileCount,
223 static GPUEnv gpuEnv;
227 static int InitEnv();
228 static int InitOpenclRunEnv(
int argc );
229 static int InitOpenclRunEnv_DeviceSelection(
int argc );
230 static int RegistOpenclKernel();
231 static int ReleaseOpenclRunEnv();
232 static int ReleaseOpenclEnv( GPUEnv *gpuInfo );
233 static int CompileKernelFile( GPUEnv *gpuInfo,
const char *buildOption );
234 static int CachedOfKernerPrg(
const GPUEnv *gpuEnvCached,
const char * clFileName );
235 static int GeneratBinFromKernelSource( cl_program program,
const char * clFileName );
236 static int WriteBinaryToFile(
const char* fileName,
const char* birary,
size_t numBytes );
237 static int BinaryGenerated(
const char * clFileName, FILE ** fhandle );
239 static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line);
240 static int composeRGBPixelCl(
int *tiffdata,
int *line,
int h,
int w);
245 static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs);
246 static void releaseMorphCLBuffers();
248 static void pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline,
249 Pix **pix_hline, Pix **pixClosed,
250 bool getpixClosed, l_int32 close_hsize,
251 l_int32 close_vsize, l_int32 open_hsize,
252 l_int32 open_vsize, l_int32 line_hsize,
257 static int SetKernelEnv( KernelEnv *envInfo );
266 static int LoadOpencl();
269 static void FreeOpenclDll();
272 inline static int AddKernelConfig(
int kCount,
const char *kName );
275 static int HistogramRectOCL(
unsigned char *imagedata,
int bytes_per_pixel,
276 int bytes_per_line,
int left,
int top,
278 int *histogramAllChannels);
280 static int ThresholdRectToPixOCL(
unsigned char *imagedata,
281 int bytes_per_pixel,
int bytes_per_line,
282 int *thresholds,
int *hi_values, Pix **pix,
283 int rect_height,
int rect_width,
284 int rect_top,
int rect_left);
286 static ds_device getDeviceSelection();
287 static ds_device selectedDevice;
288 static bool deviceIsSelected;
289 static bool selectedDeviceIsOpenCL();
293 #endif // TESSERACT_OPENCL_OPENCLWRAPPER_H_