Caffe2 - C++ API
A deep learning, cross platform ML framework
math.h
1 #ifndef CAFFE2_UTILS_MATH_H_
2 #define CAFFE2_UTILS_MATH_H_
3 // This is a simple translation from the old Caffe math interfaces. We aim to
4 // still keep it simple, so all platforms would be able to support it fairly
5 // easily.
6 
7 // We include the cblas header here so that we can obtain the macros from cblas.
8 extern "C" {
9 #include "caffe2/utils/cblas.h"
10 }
11 
12 #ifdef CAFFE2_USE_ACCELERATE
13 #include <Accelerate/Accelerate.h>
14 #endif // CAFFE2_USE_ACCELERATE
15 
16 #include "caffe2/core/common.h"
17 #include "caffe2/core/types.h"
18 
19 #ifndef __CUDACC__
20 #include "Eigen/Core"
21 #include "Eigen/Dense"
22 #endif
23 
24 namespace caffe2 {
25 
26 template <class Context>
27 class Tensor;
28 
29 // An empty class as a placeholder for a math function that has no specific
30 // engine specified.
31 class DefaultEngine {};
32 
33 #ifndef __CUDACC__
34 // Common Eigen types that we will often use
35 template <typename T>
36 using EigenMatrixMap =
37  Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> >;
38 template <typename T>
39 using EigenArrayMap =
40  Eigen::Map<Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> >;
41 template <typename T>
42 using EigenVectorMap = Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1> >;
43 template <typename T>
44 using EigenVectorArrayMap = Eigen::Map<Eigen::Array<T, Eigen::Dynamic, 1> >;
45 template <typename T>
46 using ConstEigenMatrixMap =
47  Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> >;
48 template <typename T>
49 using ConstEigenArrayMap =
50  Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> >;
51 template <typename T>
52 using ConstEigenVectorMap =
53  Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1> >;
54 template <typename T>
55 using ConstEigenVectorArrayMap =
56  Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, 1> >;
57 #endif
58 
59 namespace math {
60 
61 template <typename T, class Context>
62 void Exp(const int N, const T* x, T* y, Context* context);
63 template <typename T, class Context>
64 void Log(const int N, const T* x, T* y, Context* context);
65 template <typename T, class Context>
66 void Cos(const int N, const T* x, T* y, Context* context);
67 template <typename T, class Context>
68 void Sin(const int N, const T* x, T* y, Context* context);
69 template <typename T, class Context>
70 void SinCos(const int N, const T* x, T* ys, T* yc, Context* context);
71 template <typename T, class Context>
72 void Abs(const int N, const T* x, T* y, Context* context);
73 template <typename T, class Context>
74 void Sqrt(const int N, const T* x, T* y, Context* context);
75 template <typename T, class Context>
76 void InvSqrt(const int N, const T* x, T* y, Context* context);
77 template <typename T, class Context>
78 void Sqr(const int N, const T* x, T* y, Context* context);
79 
80 template <typename T, class Context>
81 void Not(const int N, const T* x, T* y, Context* context);
82 
83 template <typename T, class Context>
84 void Powx(const int N, const T* a, const T b, T* y, Context* context);
85 
86 #define CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(name) \
87  template <typename T, class Context> \
88  void name(const int N, const T* a, const T* b, bool* y, Context* context); \
89  template <typename T, class Context> \
90  void name##ToRow( \
91  const int M, \
92  const int N, \
93  const T* a, \
94  const T* b, \
95  bool* y, \
96  Context* context);
97 
98 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(LT);
99 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(LE);
100 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(GT);
101 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(GE);
102 
103 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(And);
104 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(Or);
105 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(Xor);
106 
107 #undef CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT
108 
109 #define CAFFE2_DECLARE_BINARY_OP(name) \
110  template <typename T, class Context> \
111  void name(const int N, const T* a, const T* b, T* y, Context* context); \
112  template <typename T, class Context> \
113  void name##ToRow( \
114  const int M, \
115  const int N, \
116  const T* a, \
117  const T* b, \
118  T* y, \
119  Context* context); \
120  template <typename T, class Context> \
121  void name##ToRow( \
122  const int M, const int N, const T* x, T* y, Context* context); \
123  template <typename T, class Context> \
124  void name##ToCol( \
125  const int M, const int N, const T* x, T* y, Context* context);
126 
127 CAFFE2_DECLARE_BINARY_OP(Add);
128 CAFFE2_DECLARE_BINARY_OP(Sub);
129 CAFFE2_DECLARE_BINARY_OP(Mul);
130 CAFFE2_DECLARE_BINARY_OP(Div);
131 
132 #undef CAFFE2_DECLARE_BINARY_OP
133 
134 template <typename T, class Context>
135 void ReduceMin(
136  const int N,
137  const T* x,
138  T* y,
139  Tensor<Context>* scratch_ptr,
140  Context* context);
141 template <typename T, class Context>
142 void ReduceMax(
143  const int N,
144  const T* x,
145  T* y,
146  Tensor<Context>* scratch_ptr,
147  Context* context);
148 
149 // Adds batch sub-tensors elementwise to output. Stripe is the stripe length
150 // and N is the number of elements to add (size of Y).
151 template <typename T, class Context>
152 void AddStripedBatch(
153  const int N,
154  const T* first,
155  T* y,
156  const int stripe,
157  const int batch,
158  Context* context);
159 
160 // Compute the row-wise max of a N*D matrix X, and write it to a N
161 // dimensional vector y.
162 template <typename T, class Context>
163 void RowwiseMax(const int N, const int D, const T* x, T* y,
164  Context* context);
165 
166 // Compute the column-wise max of a N*D matrix X, and write it to a D
167 // dimensional vector y.
168 template <typename T, class Context>
169 void ColwiseMax(const int N, const int D, const T* x, T* y,
170  Context* context);
171 
172 // Elemwise maximum of vector x and vector y. z[i] = max(x[i], y[i])
173 template <typename T, class Context>
174 void ElemwiseMax(const int N, const T* x, const T* y, T* z, Context* context);
175 
176 // Elemwise maximum of vector x and scalar alpha. y[i] = max(x[i], alpha)
177 template <typename T, class Context>
178 void Maximum(
179  const int N,
180  const float alpha,
181  const T* x,
182  T* y,
183  Context* context);
184 
185 // Transpose tensor X with x_dims by axes and write the result to tensor Y with
186 // y_dims.
187 template <typename T, class Context>
188 void Transpose(
189  const int num_axes,
190  const int* x_dims,
191  const int* y_dims,
192  const int* axes,
193  const int data_size,
194  const T* X,
195  T* Y,
196  Context* context);
197 
198 // Decaf gemm provides a simpler interface to the gemm functions, with the
199 // limitation that the data has to be contiguous in memory.
200 template <typename T, class Context, class Engine = DefaultEngine>
201 void Gemm(
202  const CBLAS_TRANSPOSE TransA,
203  const CBLAS_TRANSPOSE TransB,
204  const int M,
205  const int N,
206  const int K,
207  const float alpha,
208  const T* A,
209  const T* B,
210  const float beta,
211  T* C,
212  Context* context,
213  TensorProto::DataType math_type = TensorProto_DataType_FLOAT);
214 
215 // We also provide a gemm that has explicit lda, ldb and ldc specified.
216 // In most cases you probably want to use the function above, though.
217 template <typename T, class Context, class Engine = DefaultEngine>
218 void GemmEx(
219  const CBLAS_TRANSPOSE TransA,
220  const CBLAS_TRANSPOSE TransB,
221  const int M,
222  const int N,
223  const int K,
224  const T alpha,
225  const T* A,
226  const int lda,
227  const T* B,
228  const int ldb,
229  const T beta,
230  T* C,
231  const int ldc,
232  Context* context);
233 
234 // GemmBatched provides a simple abstraction into library routines
235 template <typename T, class Context, class Engine = DefaultEngine>
236 void GemmBatched(
237  const CBLAS_TRANSPOSE TransA,
238  const CBLAS_TRANSPOSE TransB,
239  const int batch_size,
240  const int M,
241  const int N,
242  const int K,
243  const float alpha,
244  const T* A,
245  const T* B,
246  const float beta,
247  T* C,
248  Context* context,
249  Tensor<Context>* scratch = nullptr,
250  TensorProto::DataType math_type = TensorProto_DataType_FLOAT);
251 
252 // Gemv always takes in a M*N matrix A, and depending on whether we set TransA
253 // to Trans, the output is:
254 // CblasNoTrans: x is an N dim vector and y is an M dim vector.
255 // CblasTrans: x is an M dim vector and y is an N dim vector.
256 template <typename T, class Context, class Engine = DefaultEngine>
257 void Gemv(
258  const CBLAS_TRANSPOSE TransA,
259  const int M,
260  const int N,
261  const float alpha,
262  const T* A,
263  const T* x,
264  const float beta,
265  T* y,
266  Context* context,
267  TensorProto::DataType math_type = TensorProto_DataType_FLOAT);
268 
269 template <typename T, class Context>
270 void Set(const size_t N, const T alpha, T* X, Context* context);
271 
272 template <typename T, class Context>
273 void RandUniform(const size_t n, const T a, const T b, T* r, Context* context);
274 
275 template <typename T, class Context>
276 void RandUniformUnique(
277  const size_t n,
278  const T a,
279  const T b,
280  T* r,
281  const size_t m,
282  const T* avoid,
283  Context* context);
284 
285 template <typename T, class Context>
286 void RandGaussian(
287  const size_t n,
288  const T mean,
289  const T std,
290  T* r,
291  Context* context);
292 
293 // Dot matrix of vector a and b, and writes the result to a single value y.
294 template <typename T, class Context>
295 void Dot(const int N, const T* a, const T* b, T* y, Context* context);
296 
297 // Sum of vector x, and writes the result to a single value y.
298 template <typename T, class Context>
299 void Sum(const int N, const T* x, T* y, Context* context,
300  Tensor<Context>* scratch_ptr = nullptr);
301 
302 // Sum of squares of vector x, and writes the result to a single value y.
303 template <typename T, class Context>
304 void SumSqr(
305  const int N,
306  const T* x,
307  T* y,
308  Context* context,
309  Tensor<Context>* scratch_ptr = nullptr);
310 
311 // Select does index selection of the rows a N*D matrix x, and gives the N
312 // dimensional vector y that contains the selected data.
313 template <typename T, class Context>
314 void Select(const int N, const int D, const T* x, const int* idx, T* y,
315  Context* context);
316 
317 template <typename T, class Context>
318 void Scale(const int N, const float alpha, const T* x, T* y, Context* context);
319 
320 // Different from the Scale function above, if alpha is passed in
321 // as a pointer, we will assume that it lives on the Context device,
322 // for example on GPU.
323 template <typename T, class Context>
324 void Scale(const int N, const float* alpha, const T* x, T* y, Context* context);
325 
326 template <typename T, class Context>
327 void Axpy(const int N, const float alpha, const T* x, T* y, Context* context);
328 
329 // Different from the Axpy function above, if alpha is passed in
330 // as a pointer, we will assume that it lives on the Context device,
331 // for example on GPU.
332 template <typename T, class Context>
333 void Axpy(const int N, const float* alpha, const T* x, T* y, Context* context);
334 
335 template <typename T, class Context>
336 void Axpby(
337  const int N,
338  const float alpha,
339  const T* x,
340  const T b,
341  T* y,
342  Context* context);
343 
344 template <typename T, class Context, int order>
345 void Im2colNd(
346  const T* data_img,
347  const int* im_shape,
348  const int* col_shape,
349  const int img_size,
350  const int col_size,
351  const int* kernel_shape,
352  const int* stride,
353  const int* dilation,
354  const int* pad,
355  const int N,
356  T* data_col,
357  Context* context,
358  bool accumulate_output = false);
359 
360 template <typename T, class Context, int order>
361 void Col2imNd(
362  const T* data_col,
363  const int* img_shape,
364  const int* col_shape,
365  const int img_size,
366  const int col_size,
367  const int* kernel_shape,
368  const int* stride,
369  const int* dilation,
370  const int* pad,
371  const int N,
372  T* data_img,
373  Context* context);
374 
375 template <typename T, class Context, int order>
376 void Im2col(
377  const T* data_im,
378  const int channels,
379  const int height,
380  const int width,
381  const int kernel_h,
382  const int kernel_w,
383  const int dilation_h,
384  const int dilation_w,
385  const int pad_t,
386  const int pad_l,
387  const int pad_b,
388  const int pad_r,
389  const int stride_h,
390  const int stride_w,
391  T* data_col,
392  Context* context);
393 
394 template <typename T, class Context, int order>
395 void Col2im(
396  const T* data_col,
397  const int channels,
398  const int height,
399  const int width,
400  const int patch_h,
401  const int patch_w,
402  const int dilation_h,
403  const int dilation_w,
404  const int pad_t,
405  const int pad_l,
406  const int pad_b,
407  const int pad_r,
408  const int stride_h,
409  const int stride_w,
410  T* data_im,
411  Context* context);
412 
413 // Applies a per-channel bias value to each channel of the input
414 // image. image_size is H * W
415 template <typename T, class Context>
416 void BiasCHW(
417  const T* bias,
418  const int bias_channels,
419  const int image_size,
420  T* image,
421  Context* context);
422 
423 template <class Context>
424 void CopyMatrix(
425  const size_t item_size,
426  const int M,
427  const int N,
428  const void* A,
429  const int lda,
430  void* B,
431  const int ldb,
432  Context* context,
433  TypeMeta::TypedCopy copy = nullptr);
434 
435 template <typename T, class Context>
436 void CopyVector(const int N, const T* A, T* B, Context* context);
437 
438 // Function uses casting from int to unsigned to compare if value of
439 // parameter a is greater or equal to zero and lower than value of
440 // parameter b. The b parameter is of type signed and is always
441 // positive,
442 // therefore its value is always lower than 0x800... where casting
443 // negative value of a parameter converts it to value higher than
444 // 0x800...
445 // The casting allows to use one condition instead of two.
446 inline bool is_a_ge_zero_and_a_lt_b(int a, int b) {
447  return static_cast<unsigned>(a) < static_cast<unsigned>(b);
448 }
449 
450 // Calculates ceil(a / b). User must be careful to ensure that there
451 // is no overflow or underflow in the calculation.
452 template <typename T>
453 constexpr T divUp(T a, T b) {
454  return (a + b - (T) 1) / b;
455 }
456 
457 // Rounds a up to the next highest multiple of b. User must be careful
458 // to ensure that there is no overflow or underflow in the calculation
459 // of divUp.
460 template <typename T>
461 constexpr T roundUp(T a, T b) {
462  return divUp<T>(a, b) * b;
463 }
464 
465 // Returns log2(n) for a positive integer type
466 template <typename T>
467 constexpr int integerLog2(T n, int p = 0) {
468  return (n <= 1) ? p : integerLog2(n / 2, p + 1);
469 }
470 
471 // Returns the next highest power-of-2 for an integer type
472 template <typename T>
473 constexpr T integerNextHighestPowerOf2(T v) {
474  return (integerIsPowerOf2(v) ? (T)2 * v : ((T)1 << (integerLog2(v) + 1)));
475 }
476 
477 } // namespace math
478 } // namespace caffe2
479 
480 #include "caffe2/utils/math-detail.h"
481 #endif // CAFFE2_UTILS_MATH_H_
Definition: types.h:72
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
Definition: tensor.h:93
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...