41 #ifndef PCL_GPU_PEOPLE__NCV_HPP_
42 #define PCL_GPU_PEOPLE__NCV_HPP_
44 #if (defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS
45 #define NCV_EXPORTS __declspec(dllexport)
51 #define WIN32_LEAN_AND_MEAN
54 #include <cuda_runtime.h>
57 #include <pcl/console/print.h>
80 #define NCV_CT_PREP_PASTE_AUX(a,b) a##b
81 #define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b)
86 #define NCV_CT_ASSERT(X) \
87 typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
88 NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
96 #if !defined(__align__) && !defined(__CUDACC__)
97 #if defined(_WIN32) || defined(_WIN64)
98 #define __align__(n) __declspec(align(n))
99 #elif defined(__unix__)
100 #define __align__(n) __attribute__((__aligned__(n)))
110 using NcvBool = bool;
111 using Ncv64s =
long long;
113 #if defined(__APPLE__) && !defined(__CUDACC__)
114 using Ncv64u = std::uint64_t;
116 using Ncv64u =
unsigned long long;
120 using Ncv32u =
unsigned int;
121 using Ncv16s = short;
122 using Ncv16u =
unsigned short;
124 using Ncv8u =
unsigned char;
125 using Ncv32f = float;
126 using Ncv64f = double;
191 NCV_CT_ASSERT(
sizeof(NcvBool) <= 4);
192 NCV_CT_ASSERT(
sizeof(Ncv64s) == 8);
193 NCV_CT_ASSERT(
sizeof(Ncv64u) == 8);
194 NCV_CT_ASSERT(
sizeof(Ncv32s) == 4);
195 NCV_CT_ASSERT(
sizeof(Ncv32u) == 4);
196 NCV_CT_ASSERT(
sizeof(Ncv16s) == 2);
197 NCV_CT_ASSERT(
sizeof(Ncv16u) == 2);
198 NCV_CT_ASSERT(
sizeof(Ncv8s) == 1);
199 NCV_CT_ASSERT(
sizeof(Ncv8u) == 1);
200 NCV_CT_ASSERT(
sizeof(Ncv32f) == 4);
201 NCV_CT_ASSERT(
sizeof(Ncv64f) == 8);
202 NCV_CT_ASSERT(
sizeof(
NcvRect8u) ==
sizeof(Ncv32u));
203 NCV_CT_ASSERT(
sizeof(
NcvRect32s) == 4 *
sizeof(Ncv32s));
204 NCV_CT_ASSERT(
sizeof(
NcvRect32u) == 4 *
sizeof(Ncv32u));
205 NCV_CT_ASSERT(
sizeof(
NcvSize32u) == 2 *
sizeof(Ncv32u));
215 const Ncv32u K_WARP_SIZE = 32;
216 const Ncv32u K_LOG2_WARP_SIZE = 5;
224 NCV_EXPORTS
void ncvDebugOutput(
const std::string &msg);
226 using NCVDebugOutputHandler = void (
const std::string &);
228 NCV_EXPORTS
void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
230 #define ncvAssertPrintCheck(pred, msg) \
235 std::ostringstream oss; \
236 oss << "NCV Assertion Failed: " << msg << ", file=" << __FILE__ << ", line=" << __LINE__ << std::endl; \
237 ncvDebugOutput(oss.str()); \
241 #define ncvAssertPrintReturn(pred, msg, err) \
244 ncvAssertPrintCheck(pred, msg); \
245 if (!(pred)) return err; \
248 #define ncvAssertReturn(pred, err) \
249 ncvAssertPrintReturn(pred, "retcode=" << (int)err, err)
251 #define ncvAssertReturnNcvStat(ncvOp) \
254 NCVStatus _ncvStat = ncvOp; \
255 ncvAssertPrintReturn(NCV_SUCCESS==_ncvStat, "NcvStat=" << (int)_ncvStat, _ncvStat); \
258 #define ncvAssertCUDAReturn(cudacall, errCode) \
261 cudaError_t res = cudacall; \
262 ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \
265 #define ncvAssertCUDALastErrorReturn(errCode) \
268 cudaError_t res = cudaGetLastError(); \
269 ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \
286 NCV_INCONSISTENT_INPUT,
287 NCV_TEXTURE_BIND_ERROR,
288 NCV_DIMENSIONS_INVALID,
294 NCV_ALLOCATOR_NOT_INITIALIZED,
295 NCV_ALLOCATOR_BAD_ALLOC,
296 NCV_ALLOCATOR_BAD_DEALLOC,
297 NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,
298 NCV_ALLOCATOR_DEALLOC_ORDER,
299 NCV_ALLOCATOR_BAD_REUSE,
302 NCV_MEM_RESIDENCE_ERROR,
303 NCV_MEM_INSUFFICIENT_CAPACITY,
305 NCV_HAAR_INVALID_PIXEL_STEP,
306 NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,
307 NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,
308 NCV_HAAR_TOO_LARGE_FEATURES,
309 NCV_HAAR_XML_LOADING_EXCEPTION,
311 NCV_NOIMPL_HAAR_TILTED_FEATURES,
314 NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
317 NPPST_SUCCESS = NCV_SUCCESS,
319 NPPST_CUDA_KERNEL_EXECUTION_ERROR,
320 NPPST_NULL_POINTER_ERROR,
321 NPPST_TEXTURE_BIND_ERROR,
330 NPPST_MEM_INSUFFICIENT_BUFFER,
331 NPPST_MEM_RESIDENCE_ERROR,
332 NPPST_MEM_INTERNAL_ERROR,
337 using NCVStatus = Ncv32u;
339 #define NCV_SET_SKIP_COND(x) \
340 bool __ncv_skip_cond = x
342 #define NCV_RESET_SKIP_COND(x) \
345 #define NCV_SKIP_COND_BEGIN \
346 if (!__ncv_skip_cond) {
348 #define NCV_SKIP_COND_END \
362 NCV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
371 NCVMemoryTypeHostPageable,
372 NCVMemoryTypeHostPinned,
407 virtual NCVStatus alloc(
NCVMemSegment &seg, std::size_t size) = 0;
410 virtual NcvBool isInitialized()
const = 0;
411 virtual NcvBool isCounting()
const = 0;
413 virtual NCVMemoryType memType()
const = 0;
414 virtual Ncv32u alignment()
const = 0;
415 virtual std::size_t maxSize()
const = 0;
432 NCVMemStackAllocator(NCVMemoryType memT, std::size_t capacity, Ncv32u alignment,
void *reusePtr=
nullptr);
441 virtual NCVMemoryType
memType()
const;
443 virtual std::size_t
maxSize()
const;
447 NCVMemoryType _memType;
452 std::size_t currentSize;
453 std::size_t _maxSize;
454 NcvBool bReusesMemory;
474 virtual NCVMemoryType
memType()
const;
476 virtual std::size_t
maxSize()
const;
483 NCVMemoryType _memType;
485 std::size_t currentSize;
486 std::size_t _maxSize;
493 NCV_EXPORTS NCVStatus memSegCopyHelper(
void *dst, NCVMemoryType dstType,
494 const void *src, NCVMemoryType srcType,
495 std::size_t sz, cudaStream_t cuStream);
498 NCV_EXPORTS NCVStatus memSegCopyHelper2D(
void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
499 const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
500 Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream);
531 ncvAssertReturn(dst.
_length == this->_length, NCV_MEM_COPY_ERROR);
532 howMuch = this->
_length *
sizeof(T);
536 ncvAssertReturn(dst.
_length *
sizeof(T) >= howMuch &&
537 this->_length *
sizeof(T) >= howMuch &&
538 howMuch > 0, NCV_MEM_COPY_ERROR);
540 ncvAssertReturn((this->
_ptr !=
nullptr || this->
_memtype == NCVMemoryTypeNone) &&
541 (dst.
_ptr !=
nullptr || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
543 NCVStatus ncvStat = NCV_SUCCESS;
544 if (this->
_memtype != NCVMemoryTypeNone)
547 this->_ptr, this->_memtype,
585 this->allocatedMem.
clear();
587 ncvStat = allocator.
alloc(this->allocatedMem,
length *
sizeof(T));
588 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS,
"NCVVectorAlloc ctor:: alloc failed", );
592 this->_memtype = this->allocatedMem.begin.memtype;
599 ncvStat = allocator.
dealloc(this->allocatedMem);
600 ncvAssertPrintCheck(ncvStat == NCV_SUCCESS,
"NCVVectorAlloc dtor:: dealloc failed");
639 this->bReused =
false;
646 this->bReused =
true;
651 this->bReused =
false;
654 ncvAssertPrintReturn(
length *
sizeof(T) <= memSegment.
size, \
655 "NCVVectorReuse ctor:: memory binding failed due to size mismatch", );
661 this->bReused =
true;
666 return this->bReused;
703 return _pitch /
sizeof(T);
711 ncvAssertReturn(dst.
_pitch == this->_pitch &&
712 dst.
_height == this->_height, NCV_MEM_COPY_ERROR);
718 this->_pitch * this->_height >= howMuch &&
719 howMuch > 0, NCV_MEM_COPY_ERROR);
721 ncvAssertReturn((this->
_ptr !=
nullptr || this->
_memtype == NCVMemoryTypeNone) &&
722 (dst.
_ptr !=
nullptr || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
724 NCVStatus ncvStat = NCV_SUCCESS;
725 if (this->
_memtype != NCVMemoryTypeNone)
728 this->_ptr, this->_memtype,
737 ncvAssertReturn(this->
width() >= roi.
width && this->height() >= roi.
height &&
739 ncvAssertReturn((this->
_ptr != NULL || this->
_memtype == NCVMemoryTypeNone) &&
740 (dst.
_ptr != NULL || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
742 NCVStatus ncvStat = NCV_SUCCESS;
743 if (this->
_memtype != NCVMemoryTypeNone)
746 this->_ptr, this->_pitch, this->_memtype,
753 T &
at(Ncv32u x, Ncv32u y)
const
755 NcvBool bOutRange = (x >= this->
_width || y >= this->
_height);
756 ncvAssertPrintCheck(!bOutRange,
"Error addressing matrix at [" << x <<
", " << y <<
"]");
761 return ((T *)((Ncv8u *)this->
_ptr + y * this->
_pitch))[x];
799 this->allocatedMem.
clear();
801 Ncv32u widthBytes =
width *
sizeof(T);
802 Ncv32u pitchBytes = alignUp(widthBytes, allocator.
alignment());
806 ncvAssertPrintReturn(
pitch >= pitchBytes &&
808 "NCVMatrixAlloc ctor:: incorrect pitch passed", );
812 Ncv32u requiredAllocSize = pitchBytes *
height;
814 ncvStat = allocator.
alloc(this->allocatedMem, requiredAllocSize);
815 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS,
"NCVMatrixAlloc ctor:: alloc failed", );
818 this->_width =
width;
820 this->_pitch = pitchBytes;
821 this->_memtype = this->allocatedMem.begin.memtype;
828 ncvStat = allocator.
dealloc(this->allocatedMem);
829 ncvAssertPrintCheck(ncvStat == NCV_SUCCESS,
"NCVMatrixAlloc dtor:: dealloc failed");
869 this->bReused =
false;
872 Ncv32u widthBytes =
width *
sizeof(T);
873 Ncv32u pitchBytes = alignUp(widthBytes, alignment);
877 if (!bSkipPitchCheck)
879 ncvAssertPrintReturn(
pitch >= pitchBytes &&
880 (
pitch & (alignment - 1)) == 0,
881 "NCVMatrixReuse ctor:: incorrect pitch passed", );
885 ncvAssertPrintReturn(
pitch >= widthBytes,
"NCVMatrixReuse ctor:: incorrect pitch passed", );
890 ncvAssertPrintReturn(pitchBytes *
height <= memSegment.
size, \
891 "NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );
895 this->
_pitch = pitchBytes;
899 this->bReused =
true;
904 this->bReused =
false;
907 ncvAssertPrintReturn(roi.
x < mat.
width() && roi.
y < mat.
height() && \
909 "NCVMatrixReuse ctor:: memory binding failed due to mismatching ROI and source matrix dims", );
917 this->bReused =
true;
922 return this->bReused;
933 NCV_EXPORTS NCVStatus ncvGroupRectangles_host(
NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses,
936 NCV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
937 NcvRect32u *h_rects, Ncv32u numRects, Ncv8u color);
939 NCV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
940 NcvRect32u *h_rects, Ncv32u numRects, Ncv32u color);
942 NCV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
943 NcvRect32u *d_rects, Ncv32u numRects, Ncv8u color, cudaStream_t cuStream);
945 NCV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
946 NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
948 #define CLAMP(x,a,b) ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) )
949 #define CLAMP_TOP(x, a) (((x) > (a)) ? (a) : (x))
950 #define CLAMP_BOTTOM(x, a) (((x) < (a)) ? (a) : (x))
951 #define CLAMP_0_255(x) CLAMP(x,0,255)
953 #define SUB_BEGIN(type, name) struct { __inline type name
954 #define SUB_END(name) } name;
955 #define SUB_CALL(name) name.name
957 #define SQR(x) ((x)*(x))
959 #define ncvSafeMatAlloc(name, type, alloc, width, height, err) \
960 NCVMatrixAlloc<type> name(alloc, width, height); \
961 ncvAssertReturn(name.isMemAllocated(), err);
963 #endif // PCL_GPU_PEOPLE__NCV_HPP_