Caffe2 - C++ API
A deep learning, cross platform ML framework
pow_op.cc
1 #include "caffe2/operators/pow_op.h"
2 #include "caffe2/utils/math.h"
3 // definition of NumericTypes and SameTypeAsInput is in below header file
4 //#include "caffe2/operators/elementwise_op.h"
5 #include <Eigen/Core>
6 
7 namespace caffe2 {
8 
9 #define EIGEN_POW(x, y) (x.pow(y))
10 
12  template <int b_is_scalar, typename T1, typename T2, typename R>
13  inline void
14  Run(size_t n, const T1* a, const T2* b, T2 e, R* out, CPUContext*) {
15  if (b == NULL) {
16  EigenVectorArrayMap<R>(out, n) =
17  EIGEN_POW((ConstEigenVectorArrayMap<T1>(a, n)), (e));
18  } else {
19  if (b_is_scalar) {
20  EigenVectorArrayMap<R>(out, n) =
21  EIGEN_POW((ConstEigenVectorArrayMap<T1>(a, n)), (b[0]));
22  } else {
23  EigenVectorArrayMap<R>(out, n) = EIGEN_POW(
24  (ConstEigenVectorArrayMap<T1>(a, n)),
25  (ConstEigenVectorArrayMap<T2>(b, n)));
26  }
27  }
28  }
29  template <typename T1, typename T2, typename R>
30  void RunWithBroadcast(
31  const T1* a,
32  const T2* b,
33  R* out,
34  size_t pre,
35  size_t n,
36  CPUContext*) {
37  EigenArrayMap<R>(out, n, pre) = EIGEN_POW(
38  (ConstEigenArrayMap<T1>(a, n, pre)),
39  (ConstEigenVectorArrayMap<T2>(b, n)).rowwise().replicate(pre));
40  /*
41  //below code only allows elementary ops, such as +, -, * and /,
42  //and does not allow operations, such as pow, exp and log
43  EIGEN_POW(
44  (ConstEigenArrayMap<T>(a, n, pre).colwise()),
45  (ConstEigenVectorArrayMap<T>(b, n)));
46  */
47  }
48  template <typename T1, typename T2, typename R>
49  void RunWithBroadcast2(
50  const T1* a,
51  const T2* b,
52  R* out,
53  size_t pre,
54  size_t n,
55  size_t post,
56  CPUContext*) {
57  for (int i = 0; i < pre; ++i) {
58  EigenArrayMap<R>(out + i * n * post, post, n) = EIGEN_POW(
59  (ConstEigenArrayMap<T1>(a + i * n * post, post, n)),
60  (Eigen::Map<const Eigen::Array<T2, 1, Eigen::Dynamic>>(b, n))
61  .colwise()
62  .replicate(post));
63  /*
64  //below code only allows elementary ops, such as +, -, * and /,
65  //and does not allow for operations, such as pow, exp and log
66  EIEGN_POW(
67  (ConstEigenArrayMap<T>(a + i * n * post, post, n).rowwise()),
68  (Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>(b, n)));
69  */
70  }
71  }
72 };
73 
74 REGISTER_CPU_OPERATOR(
75  Pow,
76  PowOp<
77  TensorTypes<float> /*NumericTypes*/,
78  CPUContext,
81 
82 OPERATOR_SCHEMA(Pow)
83  .NumInputs(1, 2)
84  .NumOutputs(1)
85  .Arg("exponent", "The exponent of the power function.")
86  .AllowInplace({{0, 0}, {1, 0}})
87  .IdenticalTypeAndShapeOfInput(0)
88  .SetDoc(R"DOC(
89 Pow takes input data (Tensor<T>) and an argument exponent, which can be a
90 scalar or another tensor. It produces one output data (Tensor<T>), where
91 the function `f(x) = x^exponent` is applied to the data tensor elementwise.
92 )DOC")
93  .Input(0, "X", "Input tensor of any shape")
94  .Input(1, "exponent", "The exponent of the power function.")
95  .Output(0, "Y", "Output tensor (same size as X)");
96 
97 class GetPowGradient : public GradientMakerBase {
98  using GradientMakerBase::GradientMakerBase;
99  vector<OperatorDef> GetGradientDefs() override {
100  ArgumentHelper arg_helper(def_);
101  if (arg_helper.HasArgument("exponent")) { // second input is a scalar
102  // function f(w,a) = w^a
103  // gradient operator with respect to first input tensor
104  // df/dw = a * w^(a-1) (all operations are component-wise)
105  float exponent = arg_helper.GetSingleArgument<float>("exponent", 0.0);
106  Argument scale_arg;
107  scale_arg.set_name("scale");
108  scale_arg.set_f(exponent);
109  Argument pow_arg;
110  pow_arg.set_name("exponent");
111  if (I(0) != O(0)) {
112  pow_arg.set_f(exponent - 1);
113  } else {
114  LOG(WARNING) << "In-place Pow gradient, possible loss of precision";
115  constexpr float kEps = 1e-12f;
116  CAFFE_ENFORCE(std::fabs(exponent) > kEps);
117  pow_arg.set_f((exponent - 1) / exponent);
118  }
119  return vector<OperatorDef>{CreateOperatorDef(
120  "Pow",
121  "",
122  std::vector<string>{I(0)},
123  std::vector<string>{GI(0)},
124  std::vector<Argument>{pow_arg}),
125  CreateOperatorDef(
126  "Mul",
127  "",
128  std::vector<string>{GI(0), GO(0)},
129  std::vector<string>{GI(0)}),
130  CreateOperatorDef(
131  "Scale",
132  "",
133  std::vector<string>{GI(0)},
134  std::vector<string>{GI(0)},
135  std::vector<Argument>{scale_arg})};
136  /*
137  // Alternative gradient computation
138  return vector<OperatorDef>{CreateOperatorDef(
139  "Div",
140  "",
141  std::vector<string>{O(0), I(0)},
142  std::vector<string>{GI(0)}),
143  CreateOperatorDef(
144  "Mul",
145  "",
146  std::vector<string>{GI(0), GO(0)},
147  std::vector<string>{GI(0)}),
148  CreateOperatorDef(
149  "Scale",
150  "",
151  std::vector<string>{GI(0)},
152  std::vector<string>{GI(0)},
153  std::vector<Argument>{scale_arg})};
154  */
155  } else { // second input is a tensor
156  CAFFE_ENFORCE(
157  Def().input(0) != Def().output(0) &&
158  Def().input(1) != Def().output(0),
159  "Gradient computation cannot be carried out if Pow uses in-place "
160  "computation: ",
161  ProtoDebugString(Def()));
162  vector<OperatorDef> grad_ops;
163  Argument one_arg;
164  one_arg.set_name("value");
165  one_arg.set_f(1);
166  Argument broadcast, axis, axis_str, order;
167  bool bflag = ArgumentHelper::HasArgument(Def(), "broadcast");
168 
169  if (bflag) {
170  if (ArgumentHelper::HasArgument(Def(), "broadcast")) {
171  broadcast = GetArgument(Def(), "broadcast");
172  } else {
173  broadcast = MakeArgument<int>("broadcast", 0);
174  }
175  if (ArgumentHelper::HasArgument(Def(), "axis")) {
176  axis = GetArgument(Def(), "axis");
177  } else {
178  axis = MakeArgument<int>("axis", -1);
179  }
180  if (ArgumentHelper::HasArgument(Def(), "axis_str")) {
181  axis_str = GetArgument(Def(), "axis_str");
182  } else {
183  axis_str = MakeArgument<string>("axis_str", "");
184  }
185  if (ArgumentHelper::HasArgument(Def(), "order")) {
186  order = GetArgument(Def(), "order");
187  } else {
188  order = MakeArgument<string>("order", "NCHW");
189  }
190  }
191 
192  // function f(w,a) = w^a
193  // gradient operator with respect to first input tensor
194  // df/dw = a * w^(a-1) (all operations are component-wise)
195  grad_ops.push_back(CreateOperatorDef(
196  "ConstantFill",
197  "",
198  std::vector<string>{I(1)},
199  std::vector<string>{GI(1)},
200  std::vector<Argument>{one_arg}));
201  grad_ops.push_back(CreateOperatorDef(
202  "Sub",
203  "",
204  std::vector<string>{I(1), GI(1)},
205  std::vector<string>{GI(1)}));
206  if (bflag) {
207  grad_ops.push_back(CreateOperatorDef(
208  "Pow",
209  "",
210  std::vector<string>{I(0), GI(1)},
211  std::vector<string>{GI(0)},
212  vector<Argument>{broadcast, axis, axis_str, order}));
213  } else {
214  grad_ops.push_back(CreateOperatorDef(
215  "Pow",
216  "",
217  std::vector<string>{I(0), GI(1)},
218  std::vector<string>{GI(0)}));
219  }
220 
221  grad_ops.push_back(CreateOperatorDef(
222  "Mul",
223  "",
224  std::vector<string>{GI(0), GO(0)},
225  std::vector<string>{GI(0)}));
226  if (bflag) {
227  grad_ops.push_back(CreateOperatorDef(
228  "Mul",
229  "",
230  std::vector<string>{GI(0), I(1)},
231  std::vector<string>{GI(0)},
232  vector<Argument>{broadcast, axis, axis_str, order}));
233  } else {
234  grad_ops.push_back(CreateOperatorDef(
235  "Mul",
236  "",
237  std::vector<string>{GI(0), I(1)},
238  std::vector<string>{GI(0)}));
239  }
240  /*
241  // Alternative gradient computation (no broadcast support)
242  grad_ops.push_back(CreateOperatorDef(
243  "Div",
244  "",
245  std::vector<string>{O(0), I(0)},
246  std::vector<string>{GI(0)}));
247  grad_ops.push_back(CreateOperatorDef(
248  "Mul",
249  "",
250  std::vector<string>{GI(0), GO(0)},
251  std::vector<string>{GI(0)}));
252  grad_ops.push_back(CreateOperatorDef(
253  "Mul",
254  "",
255  std::vector<string>{GI(0), I(1)},
256  std::vector<string>{GI(0)}));
257  */
258  // gradient operator for with respect to second input tensor
259  // df/da = w^a * ln w (all operations are component-wise)
260  /*
261  // reset GI(1) to zero
262  Argument zero_arg;
263  zero_arg.set_name("value");
264  zero_arg.set_f(0);
265  grad_ops.push_back(CreateOperatorDef(
266  "ConstantFill",
267  "",
268  std::vector<string>{I(1)},
269  std::vector<string>{GI(1)},
270  std::vector<Argument>{zero_arg}));
271  */
272  grad_ops.push_back(CreateOperatorDef(
273  "Log",
274  "",
275  std::vector<string>{I(0)},
276  std::vector<string>{GI(1) + "_autogen_pre_red"}));
277  grad_ops.push_back(CreateOperatorDef(
278  "Mul",
279  "",
280  std::vector<string>{GI(1) + "_autogen_pre_red", O(0)},
281  std::vector<string>{GI(1) + "_autogen_pre_red"}));
282  if (bflag) {
283  grad_ops.push_back(CreateOperatorDef(
284  "Mul",
285  "",
286  std::vector<string>{GI(1) + "_autogen_pre_red", GO(0)},
287  std::vector<string>{GI(1) + "_autogen_pre_red"}));
288  grad_ops.push_back(CreateOperatorDef(
289  "SumReduceLike",
290  "",
291  vector<string>{GI(1) + "_autogen_pre_red", I(1)},
292  vector<string>{GI(1)},
293  vector<Argument>{axis, axis_str, order}));
294  } else {
295  grad_ops.push_back(CreateOperatorDef(
296  "Mul",
297  "",
298  std::vector<string>{GI(1) + "_autogen_pre_red", GO(0)},
299  std::vector<string>{GI(1)}));
300  }
301 
302  return grad_ops;
303  }
304  }
305 
306  // Argument `shape` is no longer needed in backprop.
307  bool CopyArguments() const override {
308  return false;
309  }
310 };
311 
312 REGISTER_GRADIENT(Pow, GetPowGradient);
313 
314 } // namespace caffe2
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:66
A helper class to index into arguments.
Definition: proto_utils.h:198
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...