41 #ifndef PCL_GPU_PEOPLE__NCV_HPP_
42 #define PCL_GPU_PEOPLE__NCV_HPP_
44 #if (defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS
45 #define NCV_EXPORTS __declspec(dllexport)
51 #define WIN32_LEAN_AND_MEAN
54 #include <cuda_runtime.h>
57 #include <pcl/console/print.h>
80 #define NCV_CT_PREP_PASTE_AUX(a,b) a##b
81 #define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b)
86 #define NCV_CT_ASSERT(X) \
87 typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
88 NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
96 #if !defined(__align__) && !defined(__CUDACC__)
97 #if defined(_WIN32) || defined(_WIN64)
98 #define __align__(n) __declspec(align(n))
99 #elif defined(__unix__)
100 #define __align__(n) __attribute__((__aligned__(n)))
110 using NcvBool = bool;
111 using Ncv64s =
long long;
113 #if defined(__APPLE__) && !defined(__CUDACC__)
114 using Ncv64u = std::uint64_t;
116 using Ncv64u =
unsigned long long;
120 using Ncv32u =
unsigned int;
121 using Ncv16s = short;
122 using Ncv16u =
unsigned short;
124 using Ncv8u =
unsigned char;
125 using Ncv32f = float;
126 using Ncv64f = double;
191 NCV_CT_ASSERT(
sizeof(NcvBool) <= 4);
192 NCV_CT_ASSERT(
sizeof(Ncv64s) == 8);
193 NCV_CT_ASSERT(
sizeof(Ncv64u) == 8);
194 NCV_CT_ASSERT(
sizeof(Ncv32s) == 4);
195 NCV_CT_ASSERT(
sizeof(Ncv32u) == 4);
196 NCV_CT_ASSERT(
sizeof(Ncv16s) == 2);
197 NCV_CT_ASSERT(
sizeof(Ncv16u) == 2);
198 NCV_CT_ASSERT(
sizeof(Ncv8s) == 1);
199 NCV_CT_ASSERT(
sizeof(Ncv8u) == 1);
200 NCV_CT_ASSERT(
sizeof(Ncv32f) == 4);
201 NCV_CT_ASSERT(
sizeof(Ncv64f) == 8);
202 NCV_CT_ASSERT(
sizeof(
NcvRect8u) ==
sizeof(Ncv32u));
203 NCV_CT_ASSERT(
sizeof(
NcvRect32s) == 4 *
sizeof(Ncv32s));
204 NCV_CT_ASSERT(
sizeof(
NcvRect32u) == 4 *
sizeof(Ncv32u));
205 NCV_CT_ASSERT(
sizeof(
NcvSize32u) == 2 *
sizeof(Ncv32u));
215 constexpr Ncv32u K_WARP_SIZE = 32;
216 constexpr Ncv32u K_LOG2_WARP_SIZE = 5;
224 NCV_EXPORTS
void ncvDebugOutput(
const std::string &msg);
226 using NCVDebugOutputHandler = void (
const std::string &);
228 NCV_EXPORTS
void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
230 #define ncvAssertPrintCheck(pred, msg) \
235 std::ostringstream oss; \
236 oss << "NCV Assertion Failed: " << msg << ", file=" << __FILE__ << ", line=" << __LINE__ << std::endl; \
237 ncvDebugOutput(oss.str()); \
241 #define ncvAssertPrintReturn(pred, msg, err) \
244 ncvAssertPrintCheck(pred, msg); \
245 if (!(pred)) return err; \
248 #define ncvAssertReturn(pred, err) \
249 ncvAssertPrintReturn(pred, "retcode=" << (int)err, err)
251 #define ncvAssertReturnNcvStat(ncvOp) \
254 NCVStatus _ncvStat = ncvOp; \
255 ncvAssertPrintReturn(NCV_SUCCESS==_ncvStat, "NcvStat=" << (int)_ncvStat, _ncvStat); \
258 #define ncvAssertCUDAReturn(cudacall, errCode) \
261 cudaError_t res = cudacall; \
262 ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \
265 #define ncvAssertCUDALastErrorReturn(errCode) \
268 cudaError_t res = cudaGetLastError(); \
269 ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \
286 NCV_INCONSISTENT_INPUT,
287 NCV_TEXTURE_BIND_ERROR,
288 NCV_DIMENSIONS_INVALID,
294 NCV_ALLOCATOR_NOT_INITIALIZED,
295 NCV_ALLOCATOR_BAD_ALLOC,
296 NCV_ALLOCATOR_BAD_DEALLOC,
297 NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,
298 NCV_ALLOCATOR_DEALLOC_ORDER,
299 NCV_ALLOCATOR_BAD_REUSE,
302 NCV_MEM_RESIDENCE_ERROR,
303 NCV_MEM_INSUFFICIENT_CAPACITY,
305 NCV_HAAR_INVALID_PIXEL_STEP,
306 NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,
307 NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,
308 NCV_HAAR_TOO_LARGE_FEATURES,
309 NCV_HAAR_XML_LOADING_EXCEPTION,
311 NCV_NOIMPL_HAAR_TILTED_FEATURES,
314 NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
317 NPPST_SUCCESS = NCV_SUCCESS,
319 NPPST_CUDA_KERNEL_EXECUTION_ERROR,
320 NPPST_NULL_POINTER_ERROR,
321 NPPST_TEXTURE_BIND_ERROR,
330 NPPST_MEM_INSUFFICIENT_BUFFER,
331 NPPST_MEM_RESIDENCE_ERROR,
332 NPPST_MEM_INTERNAL_ERROR,
337 using NCVStatus = Ncv32u;
339 #define NCV_SET_SKIP_COND(x) \
340 bool __ncv_skip_cond = x
342 #define NCV_RESET_SKIP_COND(x) \
345 #define NCV_SKIP_COND_BEGIN \
346 if (!__ncv_skip_cond) {
348 #define NCV_SKIP_COND_END \
362 NCV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
371 NCVMemoryTypeHostPageable,
372 NCVMemoryTypeHostPinned,
444 NCVMemoryType _memType;
449 std::size_t currentSize;
450 std::size_t _maxSize;
451 NcvBool bReusesMemory;
480 NCVMemoryType _memType;
482 std::size_t currentSize;
483 std::size_t _maxSize;
490 NCV_EXPORTS NCVStatus memSegCopyHelper(
void *dst, NCVMemoryType dstType,
491 const void *src, NCVMemoryType srcType,
492 std::size_t sz, cudaStream_t cuStream);
495 NCV_EXPORTS NCVStatus memSegCopyHelper2D(
void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
496 const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
497 Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream);
526 ncvAssertReturn(dst.
_length == this->_length, NCV_MEM_COPY_ERROR);
527 howMuch = this->
_length *
sizeof(T);
531 ncvAssertReturn(dst.
_length *
sizeof(T) >= howMuch &&
532 this->_length *
sizeof(T) >= howMuch &&
533 howMuch > 0, NCV_MEM_COPY_ERROR);
535 ncvAssertReturn((this->
_ptr !=
nullptr || this->
_memtype == NCVMemoryTypeNone) &&
536 (dst.
_ptr !=
nullptr || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
538 NCVStatus ncvStat = NCV_SUCCESS;
539 if (this->
_memtype != NCVMemoryTypeNone)
542 this->_ptr, this->_memtype,
580 this->allocatedMem.
clear();
582 ncvStat = allocator.
alloc(this->allocatedMem,
length *
sizeof(T));
583 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS,
"NCVVectorAlloc ctor:: alloc failed", );
587 this->_memtype = this->allocatedMem.begin.memtype;
594 ncvStat = allocator.
dealloc(this->allocatedMem);
595 ncvAssertPrintCheck(ncvStat == NCV_SUCCESS,
"NCVVectorAlloc dtor:: dealloc failed");
634 this->bReused =
false;
641 this->bReused =
true;
646 this->bReused =
false;
649 ncvAssertPrintReturn(
length *
sizeof(T) <= memSegment.
size, \
650 "NCVVectorReuse ctor:: memory binding failed due to size mismatch", );
656 this->bReused =
true;
661 return this->bReused;
696 return _pitch /
sizeof(T);
704 ncvAssertReturn(dst.
_pitch == this->_pitch &&
705 dst.
_height == this->_height, NCV_MEM_COPY_ERROR);
711 this->_pitch * this->_height >= howMuch &&
712 howMuch > 0, NCV_MEM_COPY_ERROR);
714 ncvAssertReturn((this->
_ptr !=
nullptr || this->
_memtype == NCVMemoryTypeNone) &&
715 (dst.
_ptr !=
nullptr || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
717 NCVStatus ncvStat = NCV_SUCCESS;
718 if (this->
_memtype != NCVMemoryTypeNone)
721 this->_ptr, this->_memtype,
730 ncvAssertReturn(this->
width() >= roi.
width && this->height() >= roi.
height &&
732 ncvAssertReturn((this->
_ptr != NULL || this->
_memtype == NCVMemoryTypeNone) &&
733 (dst.
_ptr != NULL || dst.
_memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
735 NCVStatus ncvStat = NCV_SUCCESS;
736 if (this->
_memtype != NCVMemoryTypeNone)
739 this->_ptr, this->_pitch, this->_memtype,
746 T &
at(Ncv32u x, Ncv32u y)
const
748 NcvBool bOutRange = (x >= this->
_width || y >= this->
_height);
749 ncvAssertPrintCheck(!bOutRange,
"Error addressing matrix at [" << x <<
", " << y <<
"]");
754 return ((T *)((Ncv8u *)this->
_ptr + y * this->
_pitch))[x];
792 this->allocatedMem.
clear();
794 Ncv32u widthBytes =
width *
sizeof(T);
795 Ncv32u pitchBytes = alignUp(widthBytes, allocator.
alignment());
799 ncvAssertPrintReturn(
pitch >= pitchBytes &&
801 "NCVMatrixAlloc ctor:: incorrect pitch passed", );
805 Ncv32u requiredAllocSize = pitchBytes *
height;
807 ncvStat = allocator.
alloc(this->allocatedMem, requiredAllocSize);
808 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS,
"NCVMatrixAlloc ctor:: alloc failed", );
811 this->_width =
width;
813 this->_pitch = pitchBytes;
814 this->_memtype = this->allocatedMem.begin.memtype;
821 ncvStat = allocator.
dealloc(this->allocatedMem);
822 ncvAssertPrintCheck(ncvStat == NCV_SUCCESS,
"NCVMatrixAlloc dtor:: dealloc failed");
862 this->bReused =
false;
865 Ncv32u widthBytes =
width *
sizeof(T);
866 Ncv32u pitchBytes = alignUp(widthBytes, alignment);
870 if (!bSkipPitchCheck)
872 ncvAssertPrintReturn(
pitch >= pitchBytes &&
873 (
pitch & (alignment - 1)) == 0,
874 "NCVMatrixReuse ctor:: incorrect pitch passed", );
878 ncvAssertPrintReturn(
pitch >= widthBytes,
"NCVMatrixReuse ctor:: incorrect pitch passed", );
883 ncvAssertPrintReturn(pitchBytes *
height <= memSegment.
size, \
884 "NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );
888 this->
_pitch = pitchBytes;
892 this->bReused =
true;
897 this->bReused =
false;
900 ncvAssertPrintReturn(roi.
x < mat.
width() && roi.
y < mat.
height() && \
902 "NCVMatrixReuse ctor:: memory binding failed due to mismatching ROI and source matrix dims", );
910 this->bReused =
true;
915 return this->bReused;
926 NCV_EXPORTS NCVStatus ncvGroupRectangles_host(
NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses,
929 NCV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
930 NcvRect32u *h_rects, Ncv32u numRects, Ncv8u color);
932 NCV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
933 NcvRect32u *h_rects, Ncv32u numRects, Ncv32u color);
935 NCV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
936 NcvRect32u *d_rects, Ncv32u numRects, Ncv8u color, cudaStream_t cuStream);
938 NCV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
939 NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
941 #define CLAMP(x,a,b) ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) )
942 #define CLAMP_TOP(x, a) (((x) > (a)) ? (a) : (x))
943 #define CLAMP_BOTTOM(x, a) (((x) < (a)) ? (a) : (x))
944 #define CLAMP_0_255(x) CLAMP(x,0,255)
946 #define SUB_BEGIN(type, name) struct { __inline type name
947 #define SUB_END(name) } name;
948 #define SUB_CALL(name) name.name
950 #define SQR(x) ((x)*(x))
952 #define ncvSafeMatAlloc(name, type, alloc, width, height, err) \
953 NCVMatrixAlloc<type> name(alloc, width, height); \
954 ncvAssertReturn(name.isMemAllocated(), err);
INCVMemAllocator (Interface)
virtual NcvBool isInitialized() const =0
virtual NCVStatus alloc(NCVMemSegment &seg, std::size_t size)=0
virtual NCVStatus dealloc(NCVMemSegment &seg)=0
virtual std::size_t maxSize() const =0
virtual Ncv32u alignment() const =0
virtual ~INCVMemAllocator()=default
virtual NcvBool isCounting() const =0
virtual NCVMemoryType memType() const =0
NcvBool isMemAllocated() const
Ncv32u getAllocatorsAlignment() const
NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)
NCVMemSegment getSegment() const
NCVMemoryType memType() const
NCVStatus copySolid(NCVMatrix< T > &dst, cudaStream_t cuStream, std::size_t howMuch=0) const
T & at(Ncv32u x, Ncv32u y) const
NCVStatus copy2D(NCVMatrix< T > &dst, NcvSize32u roi, cudaStream_t cuStream) const
NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false)
NCVMatrixReuse(const NCVMatrix< T > &mat, NcvRect32u roi)
NcvBool isMemReused() const
virtual ~NCVMemNativeAllocator()
NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment)
virtual NCVStatus alloc(NCVMemSegment &seg, std::size_t size)
virtual NcvBool isInitialized() const
virtual Ncv32u alignment() const
virtual NCVStatus dealloc(NCVMemSegment &seg)
virtual std::size_t maxSize() const
virtual NcvBool isCounting() const
virtual NCVMemoryType memType() const
NCVMemStackAllocator(NCVMemoryType memT, std::size_t capacity, Ncv32u alignment, void *reusePtr=nullptr)
virtual ~NCVMemStackAllocator()
virtual NcvBool isCounting() const
virtual NCVStatus alloc(NCVMemSegment &seg, std::size_t size)
virtual NcvBool isInitialized() const
virtual NCVStatus dealloc(NCVMemSegment &seg)
virtual std::size_t maxSize() const
NCVMemStackAllocator(Ncv32u alignment)
virtual NCVMemoryType memType() const
virtual Ncv32u alignment() const
Ncv32u getAllocatorsAlignment() const
NcvBool isMemAllocated() const
NCVVectorAlloc(INCVMemAllocator &allocator, Ncv32u length)
NCVMemSegment getSegment() const
NCVMemoryType memType() const
NCVStatus copySolid(NCVVector< T > &dst, cudaStream_t cuStream, std::size_t howMuch=0) const
std::size_t length() const
NcvBool isMemReused() const
NCVVectorReuse(const NCVMemSegment &memSegment)
NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)
Compile-time assert namespace.
__host__ __device__ NcvPoint2D32s()
__host__ __device__ NcvPoint2D32s(Ncv32s x, Ncv32s y)
__host__ __device__ NcvPoint2D32u()
__host__ __device__ NcvPoint2D32u(Ncv32u x, Ncv32u y)
Ncv32s height
Rectangle height.
Ncv32s width
Rectangle width.
__host__ __device__ NcvRect32s()
Ncv32s x
x-coordinate of upper left corner.
Ncv32s y
y-coordinate of upper left corner.
__host__ __device__ NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height)
Ncv32u x
x-coordinate of upper left corner.
Ncv32u height
Rectangle height.
__host__ __device__ NcvRect32u()
Ncv32u width
Rectangle width.
Ncv32u y
y-coordinate of upper left corner.
__host__ __device__ NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height)
__host__ __device__ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height)
__host__ __device__ NcvRect8u()
__host__ __device__ NcvSize32s()
Ncv32s height
Rectangle height.
__host__ __device__ NcvSize32s(Ncv32s width, Ncv32s height)
Ncv32s width
Rectangle width.
__host__ __device__ NcvSize32u(Ncv32u width, Ncv32u height)
__host__ __device__ bool operator==(const NcvSize32u &another) const
__host__ __device__ NcvSize32u()
Ncv32u height
Rectangle height.
Ncv32u width
Rectangle width.