Caffe2 - C++ API
A deep learning, cross platform ML framework
spatial_softmax_with_loss_op.cc
1 #include "spatial_softmax_with_loss_op.h"
2 #include "softmax_shared.h"
3 
4 namespace caffe2 {
5 
6 REGISTER_CPU_OPERATOR(
7  SpatialSoftmaxWithLoss,
8  SpatialSoftmaxWithLossOp<float, CPUContext>);
9 REGISTER_CPU_OPERATOR(
10  SpatialSoftmaxWithLossGradient,
11  SpatialSoftmaxWithLossGradientOp<float, CPUContext>);
12 
13 // Input: X (logits), T (labels); Output: P (probs), Y
14 OPERATOR_SCHEMA(SpatialSoftmaxWithLoss)
15  .NumInputs(2, 3)
16  .NumOutputs(2)
17  .TensorInferenceFunction(
18  [](const OperatorDef& def, const vector<TensorShape>& in) {
19  ArgumentHelper helper(def);
20  vector<TensorShape> out(2);
21 
22  auto logits = in[0]; // Tensor with Shape [batch_size, num_classes]
23  auto labels = in[1]; // Tensor with shape [batch_size, ]
24  auto batch_size = logits.dims().Get(0);
25  auto num_classes = logits.dims().Get(1);
26 
27  CAFFE_ENFORCE_EQ(logits.dims_size(), 4);
28  CAFFE_ENFORCE_EQ(labels.dims_size(), 3);
29  out[0].set_data_type(logits.data_type());
30  out[0].add_dims(batch_size);
31  out[0].add_dims(num_classes);
32  out[0].add_dims(in[0].dims(2));
33  out[0].add_dims(in[0].dims(3));
34  // Output 2 is scalar shape, so no dims added
35  return out;
36  })
37  .SetDoc(R"DOC(
38 Combined Spatial Softmax and Cross-Entropy loss operator.
39 Similar to SoftmaxWithLoss, this operator computes the spatial softmax
40 normalized values for each layer in the batch of the given input, after which
41 cross-entropy loss is computed. This operator is numerically more stable than
42 separate Softmax and CrossEntropy ops. The inputs are a 2-D tensor
43 (Tensor<float>) of size (batch_size x input_feature_dimensions) and tensor of
44 labels (ground truth).
45 Output is tensor with the probability for each label in a pixel for each example
46 (N x D x W x H) and averaged loss (scalar).
47 For spatial softmax, weighting is by x,y position of the input.
48 )DOC")
49  .Input(0, "logits", "Unscaled log probabilities")
50  .Input(1, "labels", "Ground truth")
51  .Input(
52  2,
53  "weight_tensor",
54  "Optional blob to be used to weight the samples for the loss. With\
55  spatial set, weighting is by x,y of the input")
56  .Output(0, "softmax", "Tensor with softmax cross entropy loss")
57  .Output(1, "loss", "Average loss");
58 
59 // Input: X, T, P, dY; Output: dX
60 OPERATOR_SCHEMA(SpatialSoftmaxWithLossGradient).NumOutputs(1);
61 
62 #define DONT_CARE (-1)
63 
64 template <>
65 bool SpatialSoftmaxWithLossOp<float, CPUContext>::RunOnDevice() {
66  auto& X = Input(0); // Logits
67  auto& T = Input(1); // Labels / targets
68  auto* P = Output(0); // Probabilities from softmax
69  auto* avg_loss = Output(1); // Average loss
70  int N, D;
71  N = X.dim32(0);
72  D = X.dim32(1);
73  P->ResizeLike(X);
74 
75  if (sum_multiplier_.size() != D) {
76  sum_multiplier_.Resize(D);
77  math::Set<float, CPUContext>(
78  D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
79  }
80 
81  float* Pdata = P->mutable_data<float>();
82  const float* weights = (InputSize() > 2 ? Input(2).data<float>() : nullptr);
83  CAFFE_ENFORCE_EQ(X.ndim(), 4);
84  CAFFE_ENFORCE_EQ(T.ndim(), 3);
85  CAFFE_ENFORCE_EQ(T.dim32(0), N);
86 
87  int H = X.dim32(2);
88  int W = X.dim32(3);
89 
90  const float* Xdata = X.data<float>();
91 
92  for (int i = 0; i < N; ++i) {
93  for (int y = 0; y < H; ++y) {
94  for (int x = 0; x < W; ++x) {
95  // Subtract max on each cell for numerical reasons
96  float max_val = (-1e20f);
97  for (int c = 0; c < D; ++c) {
98  // TODO optimize
99  int idx = i * (H * W * D) + c * (H * W) + y * W + x;
100  max_val = std::max(max_val, Xdata[idx]);
101  }
102 
103  // Exponentiate
104  float expsum = 0.0f;
105  for (int c = 0; c < D; ++c) {
106  int idx = i * (H * W * D) + c * (H * W) + y * W + x;
107  float expx = exp(Xdata[idx] - max_val);
108  Pdata[idx] = expx;
109  expsum += expx;
110  }
111 
112  // Normalize
113  for (int c = 0; c < D; ++c) {
114  int idx = i * (H * W * D) + c * (H * W) + y * W + x;
115  Pdata[idx] /= expsum;
116  }
117  }
118  }
119  }
120 
121  // Compute the avg cross-entropy loss
122  avg_loss->Resize(vector<TIndex>());
123  float* avg_loss_data = avg_loss->mutable_data<float>();
124  const int* label_data = T.data<int>();
125 
126  float sum_label_xent = 0.0f;
127  float total_weight = 0.0;
128 
129  for (int y = 0; y < H; y++) {
130  for (int x = 0; x < W; x++) {
131  for (int i = 0; i < N; i++) {
132  int label_idx = i * H * W + y * W + x;
133  int label = label_data[label_idx];
134  if (label != DONT_CARE) {
135  CAFFE_ENFORCE(
136  label < D && label >= 0,
137  "Label seems incorrect:label value larger than number of classes",
138  label_data[i],
139  " vs ",
140  D);
141  int idx = i * (H * W * D) + label * (H * W) + y * W + x;
142  float w = weights ? weights[label_idx] : 1.0;
143  total_weight += w;
144  sum_label_xent += -log(std::max(Pdata[idx], 1e-20f)) * w;
145  }
146  }
147  }
148  }
149  if (total_weight != 0.0) {
150  *avg_loss_data = sum_label_xent / total_weight;
151  } else {
152  *avg_loss_data = 0.0;
153  }
154  return true;
155 }
156 
157 template <>
158 bool SpatialSoftmaxWithLossGradientOp<float, CPUContext>::RunOnDevice() {
159  auto& X = Input(0); // Logits
160  auto& T = Input(1); // Labels / targets
161  // Input(2) is weights if given
162  auto& P = Input(InputSize() - 2); // Probabilities from softmax
163  auto& d_avg_loss = Input(InputSize() - 1); // Gradient w.r.t. avg loss
164  auto* dX = Output(0);
165  const float* weights = (InputSize() > 4 ? Input(2).data<float>() : nullptr);
166  int N, D;
167  N = X.dim32(0);
168  D = X.dim32(1);
169  dX->ResizeLike(X);
170  CAFFE_ENFORCE_EQ(T.dim32(0), N);
171  CAFFE_ENFORCE_EQ(X.ndim(), 4);
172  CAFFE_ENFORCE_EQ(T.ndim(), 3);
173 
174  int H = X.dim32(2);
175  int W = X.dim32(3);
176 
177  const float* Pdata = P.data<float>();
178  float* dX_data = dX->mutable_data<float>();
179  const int* label_data = T.data<int>();
180 
181  // Copy softmax probabilities into dX. All but the neuron
182  // corresponding to the correct label has gradient equaling e(x_j)
183  // which is the probability under softmax.
184  context_.Copy<float, CPUContext, CPUContext>(P.size(), Pdata, dX_data);
185 
186  float total_weight = 0.0f;
187  for (int y = 0; y < H; ++y) {
188  for (int x = 0; x < W; ++x) {
189  for (int i = 0; i < N; ++i) {
190  int label_idx = i * H * W + y * W + x;
191  int label = label_data[label_idx];
192 
193  if (label != DONT_CARE) {
194  int idx = i * (H * W * D) + label * (H * W) + y * W + x;
195 
196  dX_data[idx] = (dX_data[idx] - 1.0);
197 
198  if (weights != nullptr) {
199  float weight = weights[label_idx];
200  for (int c = 0; c < D; ++c) {
201  int k = i * (H * W * D) + c * (H * W) + y * W + x;
202  dX_data[k] *= weight;
203  }
204  total_weight += weight;
205  } else {
206  total_weight += 1.0;
207  }
208  } else {
209  // Set gradient to zero for coordinates where we have dont care
210  for (int c = 0; c < D; ++c) {
211  int idx = i * (H * W * D) + c * (H * W) + y * W + x;
212  dX_data[idx] = 0;
213  }
214  }
215  }
216  }
217  }
218 
219  if (total_weight > 0) {
220  math::Scale<float, CPUContext>(
221  dX->size(),
222  scale_ / total_weight,
223  dX->data<float>(),
224  dX_data,
225  &context_);
226  }
227  math::Scale<float, CPUContext>(
228  dX->size(),
229  d_avg_loss.data<float>(),
230  dX->data<float>(),
231  dX->mutable_data<float>(),
232  &context_);
233  return true;
234 }
235 
236 namespace {
237 class GetSoftmaxWithLossGradient : public GradientMakerBase {
238  using GradientMakerBase::GradientMakerBase;
239  vector<OperatorDef> GetGradientDefs() override {
240  vector<string> blob_names{
241  {I(0), I(1), O(0), GO(1)},
242  };
243 
244  // Add weight blob, if given
245  if (def_.input_size() == 3) {
246  blob_names.emplace(blob_names.begin() + 2, I(2));
247  }
248  return SingleGradientDef(
249  "SpatialSoftmaxWithLossGradient",
250  "",
251  blob_names,
252  vector<string>{GI(0)});
253  }
254 };
255 
256 REGISTER_GRADIENT(SpatialSoftmaxWithLoss, GetSoftmaxWithLossGradient);
257 }
258 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...