Point Cloud Library (PCL)  1.14.1-dev
NCV.hpp
1 /*
2  * Software License Agreement (BSD License)
3  *
4  * Point Cloud Library (PCL) - www.pointclouds.org
5  * Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
6  * Third party copyrights are property of their respective owners.
7  *
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * * Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16  * * Redistributions in binary form must reproduce the above
17  * copyright notice, this list of conditions and the following
18  * disclaimer in the documentation and/or other materials provided
19  * with the distribution.
20  * * Neither the name of Willow Garage, Inc. nor the names of its
21  * contributors may be used to endorse or promote products derived
22  * from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  *
37  * $Id: $
38  * Ported to PCL by Koen Buys : Attention Work in progress!
39  */
40 
41 #ifndef PCL_GPU_PEOPLE__NCV_HPP_
42 #define PCL_GPU_PEOPLE__NCV_HPP_
43 
44 #if (defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS
45  #define NCV_EXPORTS __declspec(dllexport)
46 #else
47  #define NCV_EXPORTS
48 #endif
49 
50 #ifdef _WIN32
51  #define WIN32_LEAN_AND_MEAN
52 #endif
53 
54 #include <cuda_runtime.h>
55 #include <sstream>
56 #include <iostream>
57 #include <pcl/console/print.h>
58 
59 //==============================================================================
60 //
61 // Compile-time assert functionality
62 //
63 //==============================================================================
64 
65 /**
66 * Compile-time assert namespace
67 */
68 namespace NcvCTprep
69 {
70  template <bool x>
72 
73  template <>
74  struct CT_ASSERT_FAILURE<true> {};
75 
76  template <int x>
77  struct assertTest{};
78 }
79 
80 #define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro
81 #define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro
82 
83 /**
84 * Performs compile-time assertion of a condition on the file scope
85 */
86 #define NCV_CT_ASSERT(X) \
87  typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
88  NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
89 
90 //==============================================================================
91 //
92 // Alignment macros
93 //
94 //==============================================================================
95 
96 #if !defined(__align__) && !defined(__CUDACC__)
97  #if defined(_WIN32) || defined(_WIN64)
98  #define __align__(n) __declspec(align(n))
99  #elif defined(__unix__)
100  #define __align__(n) __attribute__((__aligned__(n)))
101  #endif
102 #endif
103 
104 //==============================================================================
105 //
106 // Integral and compound types of guaranteed size
107 //
108 //==============================================================================
109 
110 using NcvBool = bool;
111 using Ncv64s = long long;
112 
113 #if defined(__APPLE__) && !defined(__CUDACC__)
114  using Ncv64u = std::uint64_t;
115 #else
116  using Ncv64u = unsigned long long;
117 #endif
118 
119 using Ncv32s = int;
120 using Ncv32u = unsigned int;
121 using Ncv16s = short;
122 using Ncv16u = unsigned short;
123 using Ncv8s = char;
124 using Ncv8u = unsigned char;
125 using Ncv32f = float;
126 using Ncv64f = double;
127 
128 struct NcvRect8u
129 {
130  Ncv8u x;
131  Ncv8u y;
132  Ncv8u width;
133  Ncv8u height;
134  __host__ __device__ NcvRect8u() : x(0), y(0), width(0), height(0) {};
135  __host__ __device__ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) : x(x), y(y), width(width), height(height) {}
136 };
137 
139 {
140  Ncv32s x; ///< x-coordinate of upper left corner.
141  Ncv32s y; ///< y-coordinate of upper left corner.
142  Ncv32s width; ///< Rectangle width.
143  Ncv32s height; ///< Rectangle height.
144  __host__ __device__ NcvRect32s() : x(0), y(0), width(0), height(0) {};
145  __host__ __device__ NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height) : x(x), y(y), width(width), height(height) {}
146 };
147 
149 {
150  Ncv32u x; ///< x-coordinate of upper left corner.
151  Ncv32u y; ///< y-coordinate of upper left corner.
152  Ncv32u width; ///< Rectangle width.
153  Ncv32u height; ///< Rectangle height.
154  __host__ __device__ NcvRect32u() : x(0), y(0), width(0), height(0) {};
155  __host__ __device__ NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height) : x(x), y(y), width(width), height(height) {}
156 };
157 
159 {
160  Ncv32s width; ///< Rectangle width.
161  Ncv32s height; ///< Rectangle height.
162  __host__ __device__ NcvSize32s() : width(0), height(0) {};
163  __host__ __device__ NcvSize32s(Ncv32s width, Ncv32s height) : width(width), height(height) {}
164 };
165 
167 {
168  Ncv32u width; ///< Rectangle width.
169  Ncv32u height; ///< Rectangle height.
170  __host__ __device__ NcvSize32u() : width(0), height(0) {};
171  __host__ __device__ NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {}
172  __host__ __device__ bool operator == (const NcvSize32u &another) const {return this->width == another.width && this->height == another.height;}
173 };
174 
176 {
177  Ncv32s x; ///< Point X.
178  Ncv32s y; ///< Point Y.
179  __host__ __device__ NcvPoint2D32s() : x(0), y(0) {};
180  __host__ __device__ NcvPoint2D32s(Ncv32s x, Ncv32s y) : x(x), y(y) {}
181 };
182 
184 {
185  Ncv32u x; ///< Point X.
186  Ncv32u y; ///< Point Y.
187  __host__ __device__ NcvPoint2D32u() : x(0), y(0) {};
188  __host__ __device__ NcvPoint2D32u(Ncv32u x, Ncv32u y) : x(x), y(y) {}
189 };
190 
191 NCV_CT_ASSERT(sizeof(NcvBool) <= 4);
192 NCV_CT_ASSERT(sizeof(Ncv64s) == 8);
193 NCV_CT_ASSERT(sizeof(Ncv64u) == 8);
194 NCV_CT_ASSERT(sizeof(Ncv32s) == 4);
195 NCV_CT_ASSERT(sizeof(Ncv32u) == 4);
196 NCV_CT_ASSERT(sizeof(Ncv16s) == 2);
197 NCV_CT_ASSERT(sizeof(Ncv16u) == 2);
198 NCV_CT_ASSERT(sizeof(Ncv8s) == 1);
199 NCV_CT_ASSERT(sizeof(Ncv8u) == 1);
200 NCV_CT_ASSERT(sizeof(Ncv32f) == 4);
201 NCV_CT_ASSERT(sizeof(Ncv64f) == 8);
202 NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
203 NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
204 NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
205 NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
206 NCV_CT_ASSERT(sizeof(NcvPoint2D32u) == 2 * sizeof(Ncv32u));
207 
208 
209 //==============================================================================
210 //
211 // Persistent constants
212 //
213 //==============================================================================
214 
215 constexpr Ncv32u K_WARP_SIZE = 32;
216 constexpr Ncv32u K_LOG2_WARP_SIZE = 5;
217 
218 //==============================================================================
219 //
220 // Error handling
221 //
222 //==============================================================================
223 
224 NCV_EXPORTS void ncvDebugOutput(const std::string &msg);
225 
226 using NCVDebugOutputHandler = void (const std::string &);
227 
228 NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
229 
230 #define ncvAssertPrintCheck(pred, msg) \
231  do \
232  { \
233  if (!(pred)) \
234  { \
235  std::ostringstream oss; \
236  oss << "NCV Assertion Failed: " << msg << ", file=" << __FILE__ << ", line=" << __LINE__ << std::endl; \
237  ncvDebugOutput(oss.str()); \
238  } \
239  } while (0)
240 
241 #define ncvAssertPrintReturn(pred, msg, err) \
242  do \
243  { \
244  ncvAssertPrintCheck(pred, msg); \
245  if (!(pred)) return err; \
246  } while (0)
247 
248 #define ncvAssertReturn(pred, err) \
249  ncvAssertPrintReturn(pred, "retcode=" << (int)err, err)
250 
251 #define ncvAssertReturnNcvStat(ncvOp) \
252  do \
253  { \
254  NCVStatus _ncvStat = ncvOp; \
255  ncvAssertPrintReturn(NCV_SUCCESS==_ncvStat, "NcvStat=" << (int)_ncvStat, _ncvStat); \
256  } while (0)
257 
258 #define ncvAssertCUDAReturn(cudacall, errCode) \
259  do \
260  { \
261  cudaError_t res = cudacall; \
262  ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \
263  } while (0)
264 
265 #define ncvAssertCUDALastErrorReturn(errCode) \
266  do \
267  { \
268  cudaError_t res = cudaGetLastError(); \
269  ncvAssertPrintReturn(cudaSuccess==res, "cudaError_t=" << res, errCode); \
270  } while (0)
271 
272 /**
273 * \brief Return-codes for status notification, errors and warnings
274 */
275 enum
276 {
277  //NCV statuses
278  NCV_SUCCESS,
279  NCV_UNKNOWN_ERROR,
280 
281  NCV_CUDA_ERROR,
282  NCV_NPP_ERROR,
283  NCV_FILE_ERROR,
284 
285  NCV_NULL_PTR,
286  NCV_INCONSISTENT_INPUT,
287  NCV_TEXTURE_BIND_ERROR,
288  NCV_DIMENSIONS_INVALID,
289 
290  NCV_INVALID_ROI,
291  NCV_INVALID_STEP,
292  NCV_INVALID_SCALE,
293 
294  NCV_ALLOCATOR_NOT_INITIALIZED,
295  NCV_ALLOCATOR_BAD_ALLOC,
296  NCV_ALLOCATOR_BAD_DEALLOC,
297  NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,
298  NCV_ALLOCATOR_DEALLOC_ORDER,
299  NCV_ALLOCATOR_BAD_REUSE,
300 
301  NCV_MEM_COPY_ERROR,
302  NCV_MEM_RESIDENCE_ERROR,
303  NCV_MEM_INSUFFICIENT_CAPACITY,
304 
305  NCV_HAAR_INVALID_PIXEL_STEP,
306  NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,
307  NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,
308  NCV_HAAR_TOO_LARGE_FEATURES,
309  NCV_HAAR_XML_LOADING_EXCEPTION,
310 
311  NCV_NOIMPL_HAAR_TILTED_FEATURES,
312  NCV_NOT_IMPLEMENTED,
313 
314  NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
315 
316  //NPP statuses
317  NPPST_SUCCESS = NCV_SUCCESS, ///< Successful operation (same as NPP_NO_ERROR)
318  NPPST_ERROR, ///< Unknown error
319  NPPST_CUDA_KERNEL_EXECUTION_ERROR, ///< CUDA kernel execution error
320  NPPST_NULL_POINTER_ERROR, ///< NULL pointer argument error
321  NPPST_TEXTURE_BIND_ERROR, ///< CUDA texture binding error or non-zero offset returned
322  NPPST_MEMCPY_ERROR, ///< CUDA memory copy error
323  NPPST_MEM_ALLOC_ERR, ///< CUDA memory allocation error
324  NPPST_MEMFREE_ERR, ///< CUDA memory deallocation error
325 
326  //NPPST statuses
327  NPPST_INVALID_ROI, ///< Invalid region of interest argument
328  NPPST_INVALID_STEP, ///< Invalid image lines step argument (check sign, alignment, relation to image width)
329  NPPST_INVALID_SCALE, ///< Invalid scale parameter passed
330  NPPST_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer
331  NPPST_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)
332  NPPST_MEM_INTERNAL_ERROR, ///< Internal memory management error
333 
334  NCV_LAST_STATUS ///< Marker to continue error numeration in other files
335 };
336 
337 using NCVStatus = Ncv32u;
338 
339 #define NCV_SET_SKIP_COND(x) \
340  bool __ncv_skip_cond = x
341 
342 #define NCV_RESET_SKIP_COND(x) \
343  __ncv_skip_cond = x
344 
345 #define NCV_SKIP_COND_BEGIN \
346  if (!__ncv_skip_cond) {
347 
348 #define NCV_SKIP_COND_END \
349  }
350 
351 
352 //==============================================================================
353 //
354 // Memory management classes template compound types
355 //
356 //==============================================================================
357 
358 
359 /**
360 * Calculates the aligned top bound value
361 */
362 NCV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
363 
364 
365 /**
366 * NCVMemoryType
367 */
368 enum NCVMemoryType
369 {
370  NCVMemoryTypeNone,
371  NCVMemoryTypeHostPageable,
372  NCVMemoryTypeHostPinned,
373  NCVMemoryTypeDevice
374 };
375 
376 
377 /**
378 * NCVMemPtr
379 */
380 struct NCV_EXPORTS NCVMemPtr
381 {
382  void *ptr;
383  NCVMemoryType memtype;
384  void clear();
385 };
386 
387 
388 /**
389 * NCVMemSegment
390 */
391 struct NCV_EXPORTS NCVMemSegment
392 {
394  std::size_t size;
395  void clear();
396 };
397 
398 
399 /**
400 * INCVMemAllocator (Interface)
401 */
402 class NCV_EXPORTS INCVMemAllocator
403 {
404 public:
405  virtual ~INCVMemAllocator() = default;
406 
407  virtual NCVStatus alloc(NCVMemSegment &seg, std::size_t size) = 0;
408  virtual NCVStatus dealloc(NCVMemSegment &seg) = 0;
409 
410  virtual NcvBool isInitialized() const = 0;
411  virtual NcvBool isCounting() const = 0;
412 
413  virtual NCVMemoryType memType() const = 0;
414  virtual Ncv32u alignment() const = 0;
415  virtual std::size_t maxSize() const = 0;
416 };
417 
418 /**
419 * NCVMemStackAllocator
420 */
421 class NCV_EXPORTS NCVMemStackAllocator : public INCVMemAllocator
422 {
425 
426 public:
427 
428  explicit NCVMemStackAllocator(Ncv32u alignment);
429  NCVMemStackAllocator(NCVMemoryType memT, std::size_t capacity, Ncv32u alignment, void *reusePtr=nullptr);
431 
432  virtual NCVStatus alloc(NCVMemSegment &seg, std::size_t size);
433  virtual NCVStatus dealloc(NCVMemSegment &seg);
434 
435  virtual NcvBool isInitialized() const;
436  virtual NcvBool isCounting() const;
437 
438  virtual NCVMemoryType memType() const;
439  virtual Ncv32u alignment() const;
440  virtual std::size_t maxSize() const;
441 
442 private:
443 
444  NCVMemoryType _memType;
445  Ncv32u _alignment;
446  Ncv8u *allocBegin;
447  Ncv8u *begin;
448  Ncv8u *end;
449  std::size_t currentSize;
450  std::size_t _maxSize;
451  NcvBool bReusesMemory;
452 };
453 
454 
455 /**
456 * NCVMemNativeAllocator
457 */
458 class NCV_EXPORTS NCVMemNativeAllocator : public INCVMemAllocator
459 {
460 public:
461 
462  NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment);
464 
465  virtual NCVStatus alloc(NCVMemSegment &seg, std::size_t size);
466  virtual NCVStatus dealloc(NCVMemSegment &seg);
467 
468  virtual NcvBool isInitialized() const;
469  virtual NcvBool isCounting() const;
470 
471  virtual NCVMemoryType memType() const;
472  virtual Ncv32u alignment() const;
473  virtual std::size_t maxSize() const;
474 
475 private:
476 
479 
480  NCVMemoryType _memType;
481  Ncv32u _alignment;
482  std::size_t currentSize;
483  std::size_t _maxSize;
484 };
485 
486 
487 /**
488 * Copy dispatchers
489 */
490 NCV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
491  const void *src, NCVMemoryType srcType,
492  std::size_t sz, cudaStream_t cuStream);
493 
494 
495 NCV_EXPORTS NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
496  const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
497  Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream);
498 
499 
500 /**
501 * NCVVector (1D)
502 */
503 template <class T>
505 {
506  NCVVector(const NCVVector &) = delete;
507 
508 public:
509 
511  {
512  clear();
513  }
514 
515  void clear()
516  {
517  _ptr = nullptr;
518  _length = 0;
519  _memtype = NCVMemoryTypeNone;
520  }
521 
522  NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, std::size_t howMuch=0) const
523  {
524  if (howMuch == 0)
525  {
526  ncvAssertReturn(dst._length == this->_length, NCV_MEM_COPY_ERROR);
527  howMuch = this->_length * sizeof(T);
528  }
529  else
530  {
531  ncvAssertReturn(dst._length * sizeof(T) >= howMuch &&
532  this->_length * sizeof(T) >= howMuch &&
533  howMuch > 0, NCV_MEM_COPY_ERROR);
534  }
535  ncvAssertReturn((this->_ptr != nullptr || this->_memtype == NCVMemoryTypeNone) &&
536  (dst._ptr != nullptr || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
537 
538  NCVStatus ncvStat = NCV_SUCCESS;
539  if (this->_memtype != NCVMemoryTypeNone)
540  {
541  ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
542  this->_ptr, this->_memtype,
543  howMuch, cuStream);
544  }
545 
546  return ncvStat;
547  }
548 
549  T *ptr() const {return this->_ptr;}
550  std::size_t length() const {return this->_length;}
551  NCVMemoryType memType() const {return this->_memtype;}
552 
553 protected:
554 
555  T *_ptr;
556  std::size_t _length;
557  NCVMemoryType _memtype;
558 };
559 
560 
561 /**
562 * NCVVectorAlloc
563 */
564 template <class T>
565 class NCVVectorAlloc : public NCVVector<T>
566 {
567  NCVVectorAlloc() = delete;
568  NCVVectorAlloc(const NCVVectorAlloc &) = delete;
569  NCVVectorAlloc& operator=(const NCVVectorAlloc<T>&) = delete;
570 
571 public:
572 
574  :
575  allocator(allocator)
576  {
577  NCVStatus ncvStat;
578 
579  this->clear();
580  this->allocatedMem.clear();
581 
582  ncvStat = allocator.alloc(this->allocatedMem, length * sizeof(T));
583  ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVVectorAlloc ctor:: alloc failed", );
584 
585  this->_ptr = (T *)this->allocatedMem.begin.ptr;
586  this->_length = length;
587  this->_memtype = this->allocatedMem.begin.memtype;
588  }
589 
591  {
592  NCVStatus ncvStat;
593 
594  ncvStat = allocator.dealloc(this->allocatedMem);
595  ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVVectorAlloc dtor:: dealloc failed");
596 
597  this->clear();
598  }
599 
600  NcvBool isMemAllocated() const
601  {
602  return (this->allocatedMem.begin.ptr != nullptr) || (this->allocator.isCounting());
603  }
604 
605  Ncv32u getAllocatorsAlignment() const
606  {
607  return allocator.alignment();
608  }
609 
611  {
612  return allocatedMem;
613  }
614 
615 private:
616  INCVMemAllocator &allocator;
617  NCVMemSegment allocatedMem;
618 };
619 
620 
621 /**
622 * NCVVectorReuse
623 */
624 template <class T>
625 class NCVVectorReuse : public NCVVector<T>
626 {
627  NCVVectorReuse() = delete;
628  NCVVectorReuse(const NCVVectorReuse &) = delete;
629 
630 public:
631 
632  explicit NCVVectorReuse(const NCVMemSegment &memSegment)
633  {
634  this->bReused = false;
635  this->clear();
636 
637  this->_length = memSegment.size / sizeof(T);
638  this->_ptr = (T *)memSegment.begin.ptr;
639  this->_memtype = memSegment.begin.memtype;
640 
641  this->bReused = true;
642  }
643 
644  NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)
645  {
646  this->bReused = false;
647  this->clear();
648 
649  ncvAssertPrintReturn(length * sizeof(T) <= memSegment.size, \
650  "NCVVectorReuse ctor:: memory binding failed due to size mismatch", );
651 
652  this->_length = length;
653  this->_ptr = (T *)memSegment.begin.ptr;
654  this->_memtype = memSegment.begin.memtype;
655 
656  this->bReused = true;
657  }
658 
659  NcvBool isMemReused() const
660  {
661  return this->bReused;
662  }
663 
664 private:
665 
666  NcvBool bReused;
667 };
668 
669 
670 /**
671 * NCVMatrix (2D)
672 */
673 template <class T>
675 {
676  NCVMatrix(const NCVMatrix &) = delete;
677 
678 public:
679 
681  {
682  clear();
683  }
684 
685  void clear()
686  {
687  _ptr = nullptr;
688  _pitch = 0;
689  _width = 0;
690  _height = 0;
691  _memtype = NCVMemoryTypeNone;
692  }
693 
694  Ncv32u stride() const
695  {
696  return _pitch / sizeof(T);
697  }
698 
699  //a side effect of this function is that it copies everything in a single chunk, so the "padding" will be overwritten
700  NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, std::size_t howMuch=0) const
701  {
702  if (howMuch == 0)
703  {
704  ncvAssertReturn(dst._pitch == this->_pitch &&
705  dst._height == this->_height, NCV_MEM_COPY_ERROR);
706  howMuch = this->_pitch * this->_height;
707  }
708  else
709  {
710  ncvAssertReturn(dst._pitch * dst._height >= howMuch &&
711  this->_pitch * this->_height >= howMuch &&
712  howMuch > 0, NCV_MEM_COPY_ERROR);
713  }
714  ncvAssertReturn((this->_ptr != nullptr || this->_memtype == NCVMemoryTypeNone) &&
715  (dst._ptr != nullptr || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
716 
717  NCVStatus ncvStat = NCV_SUCCESS;
718  if (this->_memtype != NCVMemoryTypeNone)
719  {
720  ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
721  this->_ptr, this->_memtype,
722  howMuch, cuStream);
723  }
724 
725  return ncvStat;
726  }
727 
728  NCVStatus copy2D(NCVMatrix<T> &dst, NcvSize32u roi, cudaStream_t cuStream) const
729  {
730  ncvAssertReturn(this->width() >= roi.width && this->height() >= roi.height &&
731  dst.width() >= roi.width && dst.height() >= roi.height, NCV_MEM_COPY_ERROR);
732  ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
733  (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
734 
735  NCVStatus ncvStat = NCV_SUCCESS;
736  if (this->_memtype != NCVMemoryTypeNone)
737  {
738  ncvStat = memSegCopyHelper2D(dst._ptr, dst._pitch, dst._memtype,
739  this->_ptr, this->_pitch, this->_memtype,
740  roi.width * sizeof(T), roi.height, cuStream);
741  }
742 
743  return ncvStat;
744  }
745 
746  T &at(Ncv32u x, Ncv32u y) const
747  {
748  NcvBool bOutRange = (x >= this->_width || y >= this->_height);
749  ncvAssertPrintCheck(!bOutRange, "Error addressing matrix at [" << x << ", " << y << "]");
750  if (bOutRange)
751  {
752  return *this->_ptr;
753  }
754  return ((T *)((Ncv8u *)this->_ptr + y * this->_pitch))[x];
755  }
756 
757  T *ptr() const {return this->_ptr;}
758  Ncv32u width() const {return this->_width;}
759  Ncv32u height() const {return this->_height;}
760  NcvSize32u size() const {return NcvSize32u(this->_width, this->_height);}
761  Ncv32u pitch() const {return this->_pitch;}
762  NCVMemoryType memType() const {return this->_memtype;}
763 
764 protected:
765 
766  T *_ptr;
767  Ncv32u _width;
768  Ncv32u _height;
769  Ncv32u _pitch;
770  NCVMemoryType _memtype;
771 };
772 
773 
774 /**
775 * NCVMatrixAlloc
776 */
777 template <class T>
778 class NCVMatrixAlloc : public NCVMatrix<T>
779 {
780  NCVMatrixAlloc() = delete;
781  NCVMatrixAlloc(const NCVMatrixAlloc &) = delete;
782  NCVMatrixAlloc& operator=(const NCVMatrixAlloc &) = delete;
783 public:
784 
785  NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)
786  :
787  allocator(allocator)
788  {
789  NCVStatus ncvStat;
790 
791  this->clear();
792  this->allocatedMem.clear();
793 
794  Ncv32u widthBytes = width * sizeof(T);
795  Ncv32u pitchBytes = alignUp(widthBytes, allocator.alignment());
796 
797  if (pitch != 0)
798  {
799  ncvAssertPrintReturn(pitch >= pitchBytes &&
800  (pitch & (allocator.alignment() - 1)) == 0,
801  "NCVMatrixAlloc ctor:: incorrect pitch passed", );
802  pitchBytes = pitch;
803  }
804 
805  Ncv32u requiredAllocSize = pitchBytes * height;
806 
807  ncvStat = allocator.alloc(this->allocatedMem, requiredAllocSize);
808  ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc ctor:: alloc failed", );
809 
810  this->_ptr = (T *)this->allocatedMem.begin.ptr;
811  this->_width = width;
812  this->_height = height;
813  this->_pitch = pitchBytes;
814  this->_memtype = this->allocatedMem.begin.memtype;
815  }
816 
818  {
819  NCVStatus ncvStat;
820 
821  ncvStat = allocator.dealloc(this->allocatedMem);
822  ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc dtor:: dealloc failed");
823 
824  this->clear();
825  }
826 
827  NcvBool isMemAllocated() const
828  {
829  return (this->allocatedMem.begin.ptr != nullptr) || (this->allocator.isCounting());
830  }
831 
832  Ncv32u getAllocatorsAlignment() const
833  {
834  return allocator.alignment();
835  }
836 
838  {
839  return allocatedMem;
840  }
841 
842 private:
843 
844  INCVMemAllocator &allocator;
845  NCVMemSegment allocatedMem;
846 };
847 
848 
849 /**
850 * NCVMatrixReuse
851 */
852 template <class T>
853 class NCVMatrixReuse : public NCVMatrix<T>
854 {
855  NCVMatrixReuse() = delete;
856  NCVMatrixReuse(const NCVMatrixReuse &) = delete;
857 
858 public:
859 
860  NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false)
861  {
862  this->bReused = false;
863  this->clear();
864 
865  Ncv32u widthBytes = width * sizeof(T);
866  Ncv32u pitchBytes = alignUp(widthBytes, alignment);
867 
868  if (pitch != 0)
869  {
870  if (!bSkipPitchCheck)
871  {
872  ncvAssertPrintReturn(pitch >= pitchBytes &&
873  (pitch & (alignment - 1)) == 0,
874  "NCVMatrixReuse ctor:: incorrect pitch passed", );
875  }
876  else
877  {
878  ncvAssertPrintReturn(pitch >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", );
879  }
880  pitchBytes = pitch;
881  }
882 
883  ncvAssertPrintReturn(pitchBytes * height <= memSegment.size, \
884  "NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );
885 
886  this->_width = width;
887  this->_height = height;
888  this->_pitch = pitchBytes;
889  this->_ptr = (T *)memSegment.begin.ptr;
890  this->_memtype = memSegment.begin.memtype;
891 
892  this->bReused = true;
893  }
894 
896  {
897  this->bReused = false;
898  this->clear();
899 
900  ncvAssertPrintReturn(roi.x < mat.width() && roi.y < mat.height() && \
901  roi.x + roi.width <= mat.width() && roi.y + roi.height <= mat.height(),
902  "NCVMatrixReuse ctor:: memory binding failed due to mismatching ROI and source matrix dims", );
903 
904  this->_width = roi.width;
905  this->_height = roi.height;
906  this->_pitch = mat.pitch();
907  this->_ptr = &mat.at(roi.x, roi.y);
908  this->_memtype = mat.memType();
909 
910  this->bReused = true;
911  }
912 
913  NcvBool isMemReused() const
914  {
915  return this->bReused;
916  }
917 
918 private:
919 
920  NcvBool bReused;
921 };
922 
923 /**
924 * Operations with rectangles
925 */
926 NCV_EXPORTS NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses,
927  Ncv32u minNeighbors, Ncv32f intersectEps, NCVVector<Ncv32u> *hypothesesWeights);
928 
929 NCV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
930  NcvRect32u *h_rects, Ncv32u numRects, Ncv8u color);
931 
932 NCV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
933  NcvRect32u *h_rects, Ncv32u numRects, Ncv32u color);
934 
935 NCV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
936  NcvRect32u *d_rects, Ncv32u numRects, Ncv8u color, cudaStream_t cuStream);
937 
938 NCV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
939  NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
940 
941 #define CLAMP(x,a,b) ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) )
942 #define CLAMP_TOP(x, a) (((x) > (a)) ? (a) : (x))
943 #define CLAMP_BOTTOM(x, a) (((x) < (a)) ? (a) : (x))
944 #define CLAMP_0_255(x) CLAMP(x,0,255)
945 
946 #define SUB_BEGIN(type, name) struct { __inline type name
947 #define SUB_END(name) } name;
948 #define SUB_CALL(name) name.name
949 
950 #define SQR(x) ((x)*(x))
951 
952 #define ncvSafeMatAlloc(name, type, alloc, width, height, err) \
953  NCVMatrixAlloc<type> name(alloc, width, height); \
954  ncvAssertReturn(name.isMemAllocated(), err);
955 
956 #endif // PCL_GPU_PEOPLE__NCV_HPP_
INCVMemAllocator (Interface)
Definition: NCV.hpp:403
virtual NcvBool isInitialized() const =0
virtual NCVStatus alloc(NCVMemSegment &seg, std::size_t size)=0
virtual NCVStatus dealloc(NCVMemSegment &seg)=0
virtual std::size_t maxSize() const =0
virtual Ncv32u alignment() const =0
virtual ~INCVMemAllocator()=default
virtual NcvBool isCounting() const =0
virtual NCVMemoryType memType() const =0
NCVMatrixAlloc.
Definition: NCV.hpp:779
NcvBool isMemAllocated() const
Definition: NCV.hpp:827
Ncv32u getAllocatorsAlignment() const
Definition: NCV.hpp:832
NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)
Definition: NCV.hpp:785
~NCVMatrixAlloc()
Definition: NCV.hpp:817
NCVMemSegment getSegment() const
Definition: NCV.hpp:837
NCVMatrix (2D)
Definition: NCV.hpp:675
NCVMemoryType memType() const
Definition: NCV.hpp:762
NCVMemoryType _memtype
Definition: NCV.hpp:770
T * _ptr
Definition: NCV.hpp:766
Ncv32u _pitch
Definition: NCV.hpp:769
void clear()
Definition: NCV.hpp:685
NCVStatus copySolid(NCVMatrix< T > &dst, cudaStream_t cuStream, std::size_t howMuch=0) const
Definition: NCV.hpp:700
NcvSize32u size() const
Definition: NCV.hpp:760
Ncv32u width() const
Definition: NCV.hpp:758
Ncv32u _height
Definition: NCV.hpp:768
T * ptr() const
Definition: NCV.hpp:757
T & at(Ncv32u x, Ncv32u y) const
Definition: NCV.hpp:746
Ncv32u _width
Definition: NCV.hpp:767
NCVMatrix()
Definition: NCV.hpp:680
NCVStatus copy2D(NCVMatrix< T > &dst, NcvSize32u roi, cudaStream_t cuStream) const
Definition: NCV.hpp:728
Ncv32u height() const
Definition: NCV.hpp:759
Ncv32u stride() const
Definition: NCV.hpp:694
Ncv32u pitch() const
Definition: NCV.hpp:761
NCVMatrixReuse.
Definition: NCV.hpp:854
NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false)
Definition: NCV.hpp:860
NCVMatrixReuse(const NCVMatrix< T > &mat, NcvRect32u roi)
Definition: NCV.hpp:895
NcvBool isMemReused() const
Definition: NCV.hpp:913
NCVMemNativeAllocator.
Definition: NCV.hpp:459
virtual ~NCVMemNativeAllocator()
NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment)
virtual NCVStatus alloc(NCVMemSegment &seg, std::size_t size)
virtual NcvBool isInitialized() const
virtual Ncv32u alignment() const
virtual NCVStatus dealloc(NCVMemSegment &seg)
virtual std::size_t maxSize() const
virtual NcvBool isCounting() const
virtual NCVMemoryType memType() const
NCVMemStackAllocator.
Definition: NCV.hpp:422
NCVMemStackAllocator(NCVMemoryType memT, std::size_t capacity, Ncv32u alignment, void *reusePtr=nullptr)
virtual ~NCVMemStackAllocator()
virtual NcvBool isCounting() const
virtual NCVStatus alloc(NCVMemSegment &seg, std::size_t size)
virtual NcvBool isInitialized() const
virtual NCVStatus dealloc(NCVMemSegment &seg)
virtual std::size_t maxSize() const
NCVMemStackAllocator(Ncv32u alignment)
virtual NCVMemoryType memType() const
virtual Ncv32u alignment() const
NCVVectorAlloc.
Definition: NCV.hpp:566
Ncv32u getAllocatorsAlignment() const
Definition: NCV.hpp:605
~NCVVectorAlloc()
Definition: NCV.hpp:590
NcvBool isMemAllocated() const
Definition: NCV.hpp:600
NCVVectorAlloc(INCVMemAllocator &allocator, Ncv32u length)
Definition: NCV.hpp:573
NCVMemSegment getSegment() const
Definition: NCV.hpp:610
NCVVector (1D)
Definition: NCV.hpp:505
T * _ptr
Definition: NCV.hpp:555
NCVVector()
Definition: NCV.hpp:510
std::size_t _length
Definition: NCV.hpp:556
NCVMemoryType memType() const
Definition: NCV.hpp:551
void clear()
Definition: NCV.hpp:515
T * ptr() const
Definition: NCV.hpp:549
NCVStatus copySolid(NCVVector< T > &dst, cudaStream_t cuStream, std::size_t howMuch=0) const
Definition: NCV.hpp:522
NCVMemoryType _memtype
Definition: NCV.hpp:557
std::size_t length() const
Definition: NCV.hpp:550
NCVVectorReuse.
Definition: NCV.hpp:626
NcvBool isMemReused() const
Definition: NCV.hpp:659
NCVVectorReuse(const NCVMemSegment &memSegment)
Definition: NCV.hpp:632
NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)
Definition: NCV.hpp:644
Compile-time assert namespace.
Definition: NCV.hpp:69
NCVMemPtr.
Definition: NCV.hpp:381
NCVMemoryType memtype
Definition: NCV.hpp:383
void clear()
void * ptr
Definition: NCV.hpp:382
NCVMemSegment.
Definition: NCV.hpp:392
std::size_t size
Definition: NCV.hpp:394
NCVMemPtr begin
Definition: NCV.hpp:393
__host__ __device__ NcvPoint2D32s()
Definition: NCV.hpp:179
Ncv32s x
Point X.
Definition: NCV.hpp:177
__host__ __device__ NcvPoint2D32s(Ncv32s x, Ncv32s y)
Definition: NCV.hpp:180
Ncv32s y
Point Y.
Definition: NCV.hpp:178
Ncv32u x
Point X.
Definition: NCV.hpp:185
Ncv32u y
Point Y.
Definition: NCV.hpp:186
__host__ __device__ NcvPoint2D32u()
Definition: NCV.hpp:187
__host__ __device__ NcvPoint2D32u(Ncv32u x, Ncv32u y)
Definition: NCV.hpp:188
Ncv32s height
Rectangle height.
Definition: NCV.hpp:143
Ncv32s width
Rectangle width.
Definition: NCV.hpp:142
__host__ __device__ NcvRect32s()
Definition: NCV.hpp:144
Ncv32s x
x-coordinate of upper left corner.
Definition: NCV.hpp:140
Ncv32s y
y-coordinate of upper left corner.
Definition: NCV.hpp:141
__host__ __device__ NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height)
Definition: NCV.hpp:145
Ncv32u x
x-coordinate of upper left corner.
Definition: NCV.hpp:150
Ncv32u height
Rectangle height.
Definition: NCV.hpp:153
__host__ __device__ NcvRect32u()
Definition: NCV.hpp:154
Ncv32u width
Rectangle width.
Definition: NCV.hpp:152
Ncv32u y
y-coordinate of upper left corner.
Definition: NCV.hpp:151
__host__ __device__ NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height)
Definition: NCV.hpp:155
__host__ __device__ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height)
Definition: NCV.hpp:135
Ncv8u y
Definition: NCV.hpp:131
__host__ __device__ NcvRect8u()
Definition: NCV.hpp:134
Ncv8u width
Definition: NCV.hpp:132
Ncv8u height
Definition: NCV.hpp:133
Ncv8u x
Definition: NCV.hpp:130
__host__ __device__ NcvSize32s()
Definition: NCV.hpp:162
Ncv32s height
Rectangle height.
Definition: NCV.hpp:161
__host__ __device__ NcvSize32s(Ncv32s width, Ncv32s height)
Definition: NCV.hpp:163
Ncv32s width
Rectangle width.
Definition: NCV.hpp:160
__host__ __device__ NcvSize32u(Ncv32u width, Ncv32u height)
Definition: NCV.hpp:171
__host__ __device__ bool operator==(const NcvSize32u &another) const
Definition: NCV.hpp:172
__host__ __device__ NcvSize32u()
Definition: NCV.hpp:170
Ncv32u height
Rectangle height.
Definition: NCV.hpp:169
Ncv32u width
Rectangle width.
Definition: NCV.hpp:168