1 #include "caffe2/operators/softmax_op.h" 2 #include "caffe2/operators/softmax_shared.h" 8 bool SoftmaxOp<float, CPUContext>::RunOnDevice() {
11 const auto canonical_axis = X.canonical_axis_index(axis_);
12 const int N = X.size_to_dim(canonical_axis);
13 const int D = X.size_from_dim(canonical_axis);
15 float* Ydata = Y->mutable_data<
float>();
17 if (scale_.size() != N) {
20 if (rowmax_.size() != N) {
23 if (sum_multiplier_.size() != D) {
24 sum_multiplier_.Resize(D);
25 math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<
float>(),
35 scale_.mutable_data<
float>(),
36 sum_multiplier_.data<
float>(),
38 rowmax_.mutable_data<
float>());
44 bool SoftmaxGradientOp<float, CPUContext>::RunOnDevice() {
48 const auto canonical_axis = Y.canonical_axis_index(axis_);
49 const int N = Y.size_to_dim(canonical_axis);
50 const int D = Y.size_from_dim(canonical_axis);
52 if (scale_.size() != N) {
55 if (sum_multiplier_.size() != D) {
56 sum_multiplier_.Resize(D);
57 math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<
float>(),
61 const float* Ydata = Y.data<
float>();
62 const float* dYdata = dY.data<
float>();
63 float* dXdata = dX->mutable_data<
float>();
64 context_.Copy<float, CPUContext, CPUContext>(Y.size(), dYdata, dXdata);
65 float* scaledata = scale_.mutable_data<
float>();
66 for (
int i = 0; i < N; ++i) {
67 math::Dot<float, CPUContext>(D, Ydata + i * D, dYdata + i * D,
68 scaledata + i, &context_);
70 math::Gemm<float, CPUContext>(CblasNoTrans, CblasNoTrans, N, D, 1, -1,
71 scaledata, sum_multiplier_.data<
float>(), 1,
73 math::Mul<float, CPUContext>(Y.size(), dXdata, Ydata, dXdata,
78 REGISTER_CPU_OPERATOR(Softmax, SoftmaxOp<float, CPUContext>);
79 REGISTER_CPU_OPERATOR(SoftmaxGradient, SoftmaxGradientOp<float, CPUContext>);
81 OPERATOR_SCHEMA(Softmax)
84 .IdenticalTypeAndShape()
86 The operator computes the softmax normalized values for each layer in the batch 87 of the given input. The input is a 2-D tensor (Tensor<float>) of size 88 (batch_size x input_feature_dimensions). The output tensor has the same shape 89 and contains the softmax normalized values of the corresponding input. 91 X does not need to explicitly be a 2D vector; rather, it will be 92 coerced into one. For an arbitrary n-dimensional tensor 93 X \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is 94 the axis provided, then X will be coerced into a 2-dimensional tensor with 95 dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1}]. For the default 96 case where axis=1, this means the X tensor will be coerced into a 2D tensor 97 of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size. 98 In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D. 99 Each of these dimensions must be matched correctly, or else the operator 103 "(int) default to 1; describes the axis of the inputs when coerced " 104 "to 2D; defaults to one because the 0th axis most likely describes " 107 "The input tensor that's coerced into a 2D matrix of size (NxD) " 108 "as described above.")
109 .Output(0,
"output",
"The softmax normalized output values with the same " 110 "shape as input tensor.")
111 .InheritOnnxSchema(
"Softmax");
114 OPERATOR_SCHEMA(SoftmaxGradient).NumInputs(2).NumOutputs(1);
117 using GradientMakerBase::GradientMakerBase;
118 vector<OperatorDef> GetGradientDefs()
override {
120 def_.type() +
"Gradient",
"",
121 vector<string>{O(0), GO(0)},
122 vector<string>{GI(0)});
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...