Caffe2 - C++ API
A deep learning, cross platform ML framework
mkl_memory.h
1 #ifndef CAFFE2_UTILS_MKL_MKL_MEMORY_H_
2 #define CAFFE2_UTILS_MKL_MKL_MEMORY_H_
3 
4 #include <string>
5 #include <vector>
6 #include <mutex>
7 
8 #include "caffe2/core/flags.h" // for TIndex
9 #include "caffe2/core/tensor.h" // for TIndex
10 #include "caffe2/mkl/utils/mkl_dnn_cppwrapper.h"
11 
12 // A global boolean variable that controls the behavior when we call View() on
13 // an MKLMemory: if it is set true, then the View() function will actually
14 // change the underlying storage. If it is set false, an implicit copy is
15 // triggered but the original storage is not affected.
16 CAFFE2_DECLARE_bool(caffe2_mkl_implicit_layout_change);
17 
18 namespace caffe2 {
19 namespace mkl {
20 
21 template <typename T>
23  public:
24  PrimitiveWrapper() {}
25  // Creates a primitive wrapper from an existing primitive. The wrapper
26  // takes over ownership.
27  explicit PrimitiveWrapper(dnnPrimitive_t primitive) : primitive_(primitive) {}
28 
29  template <typename Creator, typename FirstArg, typename... Args>
30  PrimitiveWrapper(Creator creator, FirstArg&& arg, Args&&... args) {
31  creator(&primitive_, arg, args...);
32  }
33 
34  ~PrimitiveWrapper() {
35  if (primitive_) {
36  MKLDNN_CHECK(dnnDelete<T>(primitive_));
37  }
38  }
39 
40  template <typename Creator, typename... Args>
41  void Reset(Creator creator, Args&&... args) {
42  if (primitive_) {
43  MKLDNN_SAFE_CALL(dnnDelete<T>(primitive_));
44  }
45  creator(&primitive_, args...);
46  }
47 
48  void Reset() {
49  if (primitive_) {
50  MKLDNN_SAFE_CALL(dnnDelete<T>(primitive_));
51  primitive_ = nullptr;
52  }
53  }
54 
55  operator dnnPrimitive_t() const {
56  return primitive_;
57  }
58 
59  private:
60  dnnPrimitive_t primitive_ = 0;
61  DISABLE_COPY_AND_ASSIGN(PrimitiveWrapper);
62 };
63 
64 template <typename T>
66  public:
67  LayoutWrapper() {}
68  // Create a user layout from a TensorCPU with the given shapes.
69  explicit LayoutWrapper(const TensorCPU& tensor) {
70  Reset(tensor);
71  }
72 
73  // Create an internal layout from the primitive and type.
74  LayoutWrapper(const dnnPrimitive_t primitive, const dnnResourceType_t type) {
75  Reset(primitive, type);
76  }
77 
78  // Create a user layout from the given dimension, size and strides.
80  const size_t dimension,
81  const size_t size[],
82  const size_t strides[]) {
83  Reset(dimension, size, strides);
84  }
85 
86  // Destructs the layout wrapper.
87  ~LayoutWrapper() {
88  if (layout_)
89  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
90  }
91 
92  // Create a user layout from a TensorCPU with the given shapes.
93  void Reset(const TensorCPU& tensor) {
94  if (layout_)
95  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
96  CAFFE_ENFORCE(tensor.size(), "Cannot reset with an empty tensor.");
97  size_t dimension = tensor.ndim();
98  size_t size[dimension];
99  size_t strides[dimension];
100  for (int i = 0; i < dimension; ++i) {
101  size[i] = tensor.dim(dimension - i - 1);
102  strides[i] = (i == 0) ? 1 : strides[i - 1] * size[i - 1];
103  }
104  MKLDNN_SAFE_CALL(dnnLayoutCreate<T>(&layout_, dimension, size, strides));
105  }
106 
107  // Create an internal layout from the primitive and type.
108  void Reset(const dnnPrimitive_t primitive, const dnnResourceType_t type) {
109  CAFFE_ENFORCE(primitive, "Cannot reset with an unknwon primitive.");
110  CAFFE_ENFORCE(
111  type != dnnResourceNumber,
112  "Cannot reset with an unknown resource number.");
113  if (layout_) {
114  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
115  }
116  MKLDNN_SAFE_CALL(
117  dnnLayoutCreateFromPrimitive<T>(&layout_, primitive, type));
118  }
119 
120  // Create a user layout from the given dimension, size and strides.
121  void
122  Reset(const size_t dimension, const size_t size[], const size_t strides[]) {
123  if (layout_)
124  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
125  MKLDNN_SAFE_CALL(dnnLayoutCreate<T>(&layout_, dimension, size, strides));
126  }
127 
128  void Reset() {
129  if (layout_) {
130  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
131  layout_ = nullptr;
132  }
133  }
134 
135  operator dnnLayout_t() const {
136  return layout_;
137  }
138 
139  private:
140  dnnLayout_t layout_ = 0;
141  DISABLE_COPY_AND_ASSIGN(LayoutWrapper);
142 };
143 
150 template <typename T>
151 class MKLMemory {
152  public:
153  // Initializes an empty MKLMemory.
154  MKLMemory() {}
155  // Initialize an MKLMemory with the given size, strides, dnn
156  // primitive and type.
157  MKLMemory(
158  const size_t dimension,
159  const size_t size[],
160  const size_t strides[],
161  const dnnPrimitive_t primitive = nullptr,
162  const dnnResourceType_t type = dnnResourceNumber,
163  bool share_mem_if_possible = false) {
164  Reset(dimension, size, strides, primitive, type, share_mem_if_possible);
165  }
166 
167  // Initialize an MKLMemory, with the given dimension assuming a C-contiguous
168  // storage.
169  template <typename IndexType>
170  explicit MKLMemory(
171  const vector<IndexType>& dims,
172  const dnnPrimitive_t primitive = nullptr,
173  const dnnResourceType_t type = dnnResourceNumber,
174  bool share_mem_if_possible = false) {
175  Reset(dims, primitive, type, share_mem_if_possible);
176  }
177 
178  // Initialize an MKLMemory with the given size, strides, dnn
179  // primitive and type.
180  void Reset(
181  const size_t dimension,
182  const size_t size[],
183  const size_t strides[],
184  const dnnPrimitive_t primitive = nullptr,
185  const dnnResourceType_t type = dnnResourceNumber,
186  bool share_mem_if_possible = false) {
187  buffer_.reset();
188  dims_.resize(dimension);
189  size_ = 1;
190  for (int i = 0; i < dimension; ++i) {
191  dims_[i] = size[dimension - 1 - i];
192  size_ *= dims_[i];
193  }
194  user_layout_.Reset(dimension, size, strides);
195  if (primitive) {
196  layout_.Reset(primitive, type);
197  } else {
198  layout_.Reset(dimension, size, strides);
199  }
200  convert_in_.Reset(dnnConversionCreate<T>, user_layout_, layout_);
201  convert_out_.Reset(dnnConversionCreate<T>, layout_, user_layout_);
202  share_mem_if_possible_ = share_mem_if_possible;
203  layout_is_user_layout_ = dnnLayoutCompare<T>(layout_, user_layout_);
204  VLOG(2) << "layout is user layout? " << layout_is_user_layout_;
205  if (!share_mem_if_possible_) {
206  // If we are not going to share memory, we will simply allocate
207  // memory upfront.
208  buffer();
209  }
210  }
211 
212  // Initialize an MKLMemory, with the given dimension assuming a C-contiguous
213  // storage.
214  template <typename IndexType>
215  void Reset(
216  const vector<IndexType>& dims,
217  const dnnPrimitive_t primitive = nullptr,
218  const dnnResourceType_t type = dnnResourceNumber,
219  bool share_mem_if_possible = false) {
220  buffer_.reset();
221  dims_.resize(dims.size());
222  size_ = 1;
223  for (int i = 0; i < dims.size(); ++i) {
224  dims_[i] = dims[i];
225  size_ *= dims_[i];
226  }
227  size_t dimension = dims.size();
228  vector<size_t> size(dimension);
229  vector<size_t> strides(dimension);
230  for (int i = 0; i < dimension; ++i) {
231  size[i] = dims[dimension - i - 1];
232  strides[i] = (i == 0) ? 1 : strides[i - 1] * size[i - 1];
233  }
234  user_layout_.Reset(dims.size(), size.data(), strides.data());
235  if (primitive) {
236  layout_.Reset(primitive, type);
237  } else {
238  layout_.Reset(dimension, size.data(), strides.data());
239  }
240  convert_in_.Reset(dnnConversionCreate<T>, user_layout_, layout_);
241  convert_out_.Reset(dnnConversionCreate<T>, layout_, user_layout_);
242  share_mem_if_possible_ = share_mem_if_possible;
243  layout_is_user_layout_ = dnnLayoutCompare<T>(layout_, user_layout_);
244  VLOG(2) << "layout is user layout? " << layout_is_user_layout_;
245  if (!share_mem_if_possible_) {
246  // If we are not going to share memory, we will simply allocate
247  // memory upfront.
248  buffer();
249  }
250  }
251 
252  void Reset() {
253  buffer_.reset();
254  dims_.clear();
255  size_ = 0;
256  user_layout_.Reset();
257  layout_.Reset();
258  convert_in_.Reset();
259  convert_out_.Reset();
260  }
261 
266  template <typename IndexType>
267  void Reshape(const vector<IndexType>& dims) {
268  CAFFE_ENFORCE(
269  layout_is_user_layout_,
270  "Reshape is not allowed for custom layouts. "
271  "Convert to plain layout before invoking Reshape().");
272 
273  TIndex new_size = 1;
274  for (auto i = 0; i < dims.size(); ++i) {
275  CAFFE_ENFORCE_GE_WITH_CALLER(dims[i], 0);
276  new_size *= dims[i];
277  }
278  CAFFE_ENFORCE_WITH_CALLER(
279  new_size == size_,
280  "New size and old size are not equal. Reshape is not possible.");
281 
282  vector<TIndex> new_dims(dims.size());
283  vector<size_t> size(dims.size());
284  vector<size_t> strides(dims.size());
285  for (int i = 0; i < dims.size(); ++i) {
286  new_dims[i] = dims[i];
287  size[i] = dims[dims.size() - i - 1];
288  strides[i] = (i == 0) ? 1 : strides[i - 1] * size[i - 1];
289  }
290  dims_ = new_dims;
291  user_layout_.Reset(dims.size(), size.data(), strides.data());
292  layout_.Reset(dims.size(), size.data(), strides.data());
293  convert_in_.Reset(dnnConversionCreate<T>, user_layout_, layout_);
294  convert_out_.Reset(dnnConversionCreate<T>, layout_, user_layout_);
295  }
296 
297  // Destructs the MKLMemory.
298  ~MKLMemory() {}
299 
300  void CopyFrom(const void* ptr) {
301  if (share_mem_if_possible_ && layout_is_user_layout_) {
302  VLOG(2) << "Sharing underlying memory and skip copy.";
303  buffer_.reset(const_cast<void*>(ptr), [](void*) -> void {});
304  } else if (size_ == 0) {
305  VLOG(2) << "Cannot copy into empty MKL buffer.";
306  } else {
307  VLOG(2) << "Copying external content.";
308  MKLDNN_SAFE_CALL(dnnConversionExecute<T>(
309  convert_in_, const_cast<void*>(ptr), buffer()));
310  }
311  }
312 
313  void CopyFrom(const TensorCPU& tensor) {
314  CAFFE_ENFORCE_EQ(
315  tensor.dims(),
316  dims_,
317  "Dims does not match the expected dims of the resource.");
318  CopyFrom(tensor.template data<T>());
319  }
320 
321  void CopyFrom(const MKLMemory<T>& other) {
322  CAFFE_ENFORCE_EQ(
323  other.dims(),
324  dims_,
325  "Dims does not match the expected dims of the resource.");
326 
327  if (share_mem_if_possible_ && dnnLayoutCompare<T>(other.layout_, layout_)) {
328  buffer_ = other.buffer_;
329  } else if (size_ == 0) {
330  VLOG(2) << "Cannot copy between empty MKL buffers";
331  } else {
332  PrimitiveWrapper<T> convert(
333  dnnConversionCreate<T>, other.layout_, layout_);
334  MKLDNN_SAFE_CALL(
335  dnnConversionExecute<T>(convert, other.buffer(), buffer()));
336  }
337  }
338 
339  bool ShareFromRaw(const void* ptr) {
340  if (share_mem_if_possible_ && layout_is_user_layout_) {
341  buffer_.reset(const_cast<void*>(ptr), [](void*) -> void {});
342  return true;
343  } else {
344  return false;
345  }
346  }
347 
348  bool ShareFromTensor(const TensorCPU& tensor) {
349  CAFFE_ENFORCE_EQ(
350  tensor.dims(),
351  dims_,
352  "Dims does not match the expected dims of the resource.");
353  return ShareFromRaw(tensor.template data<T>());
354  }
355 
356  bool ShareFrom(const MKLMemory<T>& other) {
357  if (share_mem_if_possible_ && dnnLayoutCompare<T>(other.layout_, layout_)) {
358  VLOG(2) << "Sharing underlying memory.";
359  buffer_ = other.buffer_;
360  if (!buffer_.get()) {
361  VLOG(2) << "Warning: the source MKLMemory has no content yet, so the "
362  "sharing actually has no effect.";
363  }
364  return true;
365  } else {
366  VLOG(2) << "Not sharing underlying memory.";
367  return false;
368  }
369  }
370 
371  void CopyTo(void* ptr) const {
372  if (buffer_.get() == ptr) {
373  // This is already mapping to the same memory region. Skip copy.
374  VLOG(2) << "CopyTo does not need actual copying, as we are sharing "
375  "memory with the output.";
376  return;
377  }
378  CAFFE_ENFORCE(
379  buffer_.get(), "Canot copy out from an uninitialized MKLMemory.");
380  VLOG(2) << "Copy to external memory.";
381  MKLDNN_SAFE_CALL(dnnConversionExecute<T>(convert_out_, buffer_.get(), ptr));
382  }
383 
384  void CopyTo(TensorCPU* tensor) const {
385  if (tensor->size() > 0 && buffer_.get() == tensor->mutable_data<T>()) {
386  // This is already mapping to the same memory region. Skip copy.
387  VLOG(2) << "CopyTo does not need actual copying, as we are sharing "
388  "memory with the output.";
389  return;
390  }
391  tensor->Resize(dims_);
392  CopyTo(tensor->mutable_data<T>());
393  }
394 
395  // Copies to another MKL memory.
396  //
397  // This function
398  void CopyTo(
399  MKLMemory<T>* other,
400  const dnnPrimitive_t primitive = nullptr,
401  const dnnResourceType_t type = dnnResourceNumber) {
402  if (buffer_ && buffer_.get() == other->buffer_.get()) {
403  CAFFE_ENFORCE(
404  dnnLayoutCompare<T>(other->layout_, layout_),
405  "MKLMemory layout does not match, despite in-place buffers");
406  CAFFE_ENFORCE(
407  other->dims() == dims(),
408  "MKLMemory dimensions do not match, despite in-place buffers");
409  VLOG(2) << "CopyTo does not need actual copying, as we are sharing "
410  "memory with the output.";
411  // This is already mapping to the same memory region. Skip copy.
412  return;
413  }
414  // TODO(jiayq): if primitive creation is a big overhead and we will be
415  // consistently copying stuff with fixed src and dst layouts, consider
416  // making a cache for the primitive below.
417  VLOG(2) << "CopyTo requires copying. Performing direct copy.";
418  if (dims() != other->dims()) {
419  other->Reset(dims(), primitive, type);
420  }
421  if (size_ == 0) {
422  VLOG(2) << "Cannot copy between empty MKL buffers.";
423  return;
424  }
425  CAFFE_ENFORCE(
426  buffer_.get(), "Cannot copy out from an uninitialized MKLMemory.");
427  PrimitiveWrapper<T> convert(
428  dnnConversionCreate<T>, layout_, other->layout_);
429  MKLDNN_SAFE_CALL(
430  dnnConversionExecute<T>(convert, buffer_.get(), other->buffer()));
431  }
432 
433  inline void* buffer() {
434  if (buffer_ == nullptr) {
435  CAFFE_ENFORCE(
436  layout_ != nullptr, "Trying to allocate buffer but layout is empty.");
437  if (size_ == 0) {
438  VLOG(2) << "Cannot allocate empty MKL buffer.";
439  return buffer_.get();
440  }
441  void* allocated = nullptr;
442  MKLDNN_SAFE_CALL(dnnAllocateBuffer<T>(&allocated, layout_));
443  buffer_.reset(allocated, [](void* ptr) -> void {
444  MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr));
445  });
446  }
447  return buffer_.get();
448  }
449 
450  // MKLDNN does not use const void* even for the inputs, so we will
451  // have to use void* and rely on the underlying implementation to make
452  // sure that the buffer is actually not changed.
453  inline void* buffer() const {
454  CAFFE_ENFORCE(
455  buffer_ != nullptr, "Trying to refer to an unallocated buffer.");
456  return buffer_.get();
457  }
458 
459  inline const vector<TIndex>& dims() const {
460  return dims_;
461  }
462 
463  inline const int ndim() const { return dims_.size(); }
464 
465  inline int dim32(const int i) const {
466  CAFFE_ENFORCE_LT(dims_.at(i), std::numeric_limits<int>::max());
467  return static_cast<int>(dims_[i]);
468  }
469 
473  inline TIndex size() const {
474  return size_;
475  }
476 
482  inline TIndex dim(const int i) const {
483  return dims_.at(i);
484  }
485 
486  inline const LayoutWrapper<T>& layout() const {
487  return layout_;
488  }
489 
490  inline bool is_user_layout() const {
491  return layout_is_user_layout_;
492  }
493 
494  // Returns a view of the content. We mark this function const, but be noted
495  // that the returned std::shared_ptr is not const protected - user discretion
496  // is recommended for correctness.
497  std::shared_ptr<void> View(
498  dnnLayout_t layout_wanted,
499  dnnPrimitive_t primitive = nullptr,
500  dnnResourceType_t type = dnnResourceNumber) const {
501  std::lock_guard<std::mutex> lock(buffer_lock_);
502  if (dnnLayoutCompare<T>(layout_wanted, layout_)) {
503  // If they are the same, return the original content.
504  VLOG(2) << "Creating a view without the need of copying.";
505  return std::shared_ptr<void>(buffer_);
506  } else {
507  void* temp_buffer;
508  VLOG(2) << "Creating a view with copying.";
509  MKLDNN_SAFE_CALL(dnnAllocateBuffer<T>(&temp_buffer, layout_wanted));
510  PrimitiveWrapper<T> convert(
511  dnnConversionCreate<T>, layout_, layout_wanted);
512  MKLDNN_SAFE_CALL(dnnConversionExecute<T>(
513  convert, buffer_.get(), temp_buffer));
514  if (primitive && FLAGS_caffe2_mkl_implicit_layout_change) {
515  VLOG(2) << "Implicit layout change set. "
516  "Changing the underlying storage.";
517  // We will need to call Reset to set up all the member variables.
518  // This is not thread safe, so we might want to double check if this
519  // makes sense in actual use cases.
520  const_cast<MKLMemory<T>*>(this)->Reset(
521  dims_, primitive, type, share_mem_if_possible_);
522  CAFFE_ENFORCE(dnnLayoutCompare<T>(layout_wanted, layout_),
523  "You passed in a target layout that is not "
524  "generated by the given primitive and type.");
525  buffer_.reset(temp_buffer, [](void* ptr) -> void {
526  MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr));
527  });
528  return std::shared_ptr<void>(buffer_);
529  } else {
530  return std::shared_ptr<void>(temp_buffer, [](void* ptr) -> void {
531  MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr));
532  });
533  }
534  }
535  }
536 
537  private:
538  bool share_mem_if_possible_;
539  bool layout_is_user_layout_;
540  // The internal buffer in the specific dnn layout.
541  // It is marked mutable but any modification in a const function should
542  // be accompanied by the buffer lock, see the View() function.
543  mutable std::shared_ptr<void> buffer_;
544  // A mutex to control the access of buffer in the View() function.
545  mutable std::mutex buffer_lock_;
546  // The dimensions in the same order as Caffe2 does. This is used to
547  // interface with C2.
548  vector<TIndex> dims_;
549  // Number of items in the buffer.
550  TIndex size_ = -1;
551  // The user dnn layout.
552  LayoutWrapper<T> user_layout_;
553  // The internal dnn layout.
554  LayoutWrapper<T> layout_;
555  // The primitive to use to convert from user layout to internal layout
556  PrimitiveWrapper<T> convert_in_;
557  // The primitive to use to convert from internal layout to user layout
558  PrimitiveWrapper<T> convert_out_;
559 
560  DISABLE_COPY_AND_ASSIGN(MKLMemory);
561 };
562 
563 template <typename T>
565  public:
566  MKLWorkspace(const LayoutWrapper<T>& layout) {
567  MKLDNN_SAFE_CALL(mkl::dnnAllocateBuffer<T>(&buffer_, layout));
568  }
569  ~MKLWorkspace() {
570  dnnReleaseBuffer<T>(buffer_);
571  }
572  T* buffer() {
573  return reinterpret_cast<T*>(buffer_);
574  }
575 
576  private:
577  void* buffer_;
578  DISABLE_COPY_AND_ASSIGN(MKLWorkspace);
579 };
580 
581 } // namespace mkl
582 } // namespace caffe2
583 
584 #endif // CAFFE2_UTILS_MKL_MKL_MEMORY_H_
TIndex dim(const int i) const
Returns the i-th dimension of the tensor.
Definition: tensor.h:671
TIndex dim(const int i) const
Returns the i-th dimension of the tensor.
Definition: mkl_memory.h:482
TIndex size() const
Returns the size (i.e.
Definition: tensor.h:593
T * mutable_data()
Returns a typed pointer of the underlying storage.
Definition: tensor.h:578
const vector< TIndex > & dims() const
Returns the dimensions of the tensor as a vector.
Definition: tensor.h:611
void Resize(Ts...dim_source)
Resizes a tensor.
Definition: tensor.h:288
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
A wrapper around an opaque MKL internal resource that has certain layouts and convertion primitives s...
Definition: mkl_memory.h:151
void Reshape(const vector< IndexType > &dims)
Resizes the tensor without touching underlying storage.
Definition: mkl_memory.h:267
TIndex size() const
Returns the size (i.e., the number of items) in the buffer.
Definition: mkl_memory.h:473
Commandline flags support for Caffe2.
int ndim() const
Returns the number of dimensions of the data.
Definition: tensor.h:589