Caffe2 - C++ API
A deep learning, cross platform ML framework
boolean_mask_ops.cc
1 #include "caffe2/operators/boolean_mask_ops.h"
2 #include "caffe2/core/operator.h"
3 #include "caffe2/core/tensor.h"
4 
5 namespace caffe2 {
6 namespace {
7 
8 template <class Context>
9 class BooleanMaskLengthsOp final : public Operator<Context> {
10  public:
11  USE_OPERATOR_CONTEXT_FUNCTIONS;
12  BooleanMaskLengthsOp(const OperatorDef& operator_def, Workspace* ws)
13  : Operator<Context>(operator_def, ws) {}
14 
15  bool RunOnDevice() override {
16  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
17  }
18 
19  template <typename T>
20  bool DoRunWithType() {
21  auto& lengths = Input(0);
22  auto& mask = Input(1);
23  auto* lengthsOut = Output(0);
24  CAFFE_ENFORCE(lengths.ndim() == 1);
25  CAFFE_ENFORCE(mask.ndim() == 1);
26  const auto* lengthsPtr = lengths.template data<T>();
27  const auto* maskPtr = mask.template data<bool>();
28  auto totalLength =
29  std::accumulate(lengthsPtr, lengthsPtr + lengths.size(), 0);
30  CAFFE_ENFORCE(mask.size() == totalLength);
31  lengthsOut->ResizeLike(lengths);
32  auto* lengthsOutPtr = lengthsOut->template mutable_data<T>();
33  int p = 0;
34  for (int i = 0; i < lengths.size(); ++i) {
35  T lengthOut = 0;
36  for (int j = 0; j < lengthsPtr[i]; ++j) {
37  if (maskPtr[p++]) {
38  ++lengthOut;
39  }
40  }
41  lengthsOutPtr[i] = lengthOut;
42  }
43  return true;
44  }
45 };
46 } // namespace
47 
48 template <>
49 bool BooleanMaskOp<CPUContext>::RunOnDevice() {
50  auto& data = Input(0);
51  auto& mask = Input(1);
52  auto* dataOut = Output(0);
53  CAFFE_ENFORCE(data.ndim() >= 1);
54  CAFFE_ENFORCE_EQ(mask.ndim(), 1);
55  CAFFE_ENFORCE(data.dims()[0] == mask.dims()[0]);
56 
57  const auto* maskPtr = mask.template data<bool>();
58  int numOutputs = 0;
59  int outerSize = mask.size();
60  for (int i = 0; i < outerSize; ++i) {
61  if (maskPtr[i]) {
62  ++numOutputs;
63  }
64  }
65  std::vector<TIndex> outShape;
66  outShape.push_back(numOutputs);
67  outShape.insert(outShape.end(), data.dims().begin() + 1, data.dims().end());
68  dataOut->Resize(outShape);
69  auto* outPtr = (char*)dataOut->raw_mutable_data(data.meta());
70 
71  int64_t* out_vec = nullptr;
72  if (OutputSize() == 2) {
73  auto* indicesOut = Output(1);
74  indicesOut->Resize(numOutputs);
75  out_vec = indicesOut->template mutable_data<int64_t>();
76  }
77 
78  if (numOutputs == 0) {
79  return true;
80  }
81  const auto innerSize = data.size_from_dim(1);
82  const auto innerSizeBytes = innerSize * data.meta().itemsize();
83 
84  TIndex lastStart = -1;
85  const auto* inPtr = (char*)data.raw_data();
86  TIndex outStart = 0;
87 
88  for (TIndex i = 0;; ++i) {
89  // mask was true and either a) became false, or b) sequence finished
90  if (lastStart != -1 && ((i >= outerSize) || !maskPtr[i])) {
91  const auto* src = inPtr + lastStart * innerSizeBytes;
92  auto* dst = outPtr + outStart * innerSizeBytes;
93  int numItems = i - lastStart;
94  context_.template CopyItems<CPUContext, CPUContext>(
95  data.meta(), numItems * innerSize, src, dst);
96  outStart += numItems;
97  lastStart = -1;
98  }
99  if (i >= outerSize) {
100  break;
101  }
102  // mask was false and became true
103  if (lastStart == -1 && maskPtr[i]) {
104  lastStart = i;
105  }
106  if (maskPtr[i] && OutputSize() == 2) {
107  *(out_vec++) = i;
108  }
109  }
110  return true;
111 }
112 
113 REGISTER_CPU_OPERATOR(BooleanMask, BooleanMaskOp<CPUContext>);
114 REGISTER_CPU_OPERATOR(BooleanMaskLengths, BooleanMaskLengthsOp<CPUContext>);
115 
116 OPERATOR_SCHEMA(BooleanMask)
117  .NumInputs(2)
118  .NumOutputs(1, 2)
119  .SetDoc(R"DOC(
120 Given a data tensor and a 1D boolean mask tensor, returns a tensor containing
121 only the elements corresponding to positions where the mask is true.
122 )DOC")
123  .Input(0, "data", "The 1D, original data tensor.")
124  .Input(1, "mask", "A tensor of bools of same shape as `data`.")
125  .Output(0, "masked_data", "A tensor of same type as `data`.")
126  .Output(1, "masked_indices", "A tensor for indices.");
127 
128 OPERATOR_SCHEMA(BooleanMaskLengths)
129  .NumInputs(2)
130  .NumOutputs(1)
131  .SetDoc(R"DOC(
132 Given a tensor of int32 segment lengths and a mask (boolean) tensor, return
133 the segment lengths of a corresponding segmented tensor after BooleanMask is
134 applied.
135 )DOC")
136  .Input(0, "lengths", "A 1D int32 tensor representing segment lengths.")
137  .Input(1, "mask", "A 1D bool tensor of values to keep.")
138  .Output(0, "masked_lengths", "Segment lengths of a masked tensor.");
139 
140 NO_GRADIENT(BooleanMask)
141 NO_GRADIENT(BooleanMaskLengths);
142 
143 const float minf = -1.0f * std::numeric_limits<float>::infinity();
144 
145 // Template this on a functor object so we can generate different
146 // implementations at compile time and have a better chance of inlining
147 template <typename Functor>
148 void MaskWithFunctor(
149  size_t N,
150  size_t M,
151  int B,
152  const float* in,
153  Functor fn,
154  float fill_val,
155  float* out) {
156  if (B >= 0) { // with batching
157  // collapse tensor to 3-dim view [B, N, M] where:
158  // B is product of dims up to and including batch
159  // N is product of dims between batch and axis, exclusive
160  // M is product of dimensions at/after axis
161  // then mask each batch [i, :, :] (note that this is N x M matrix)
162  for (int i = 0; i < B; ++i) {
163  for (int j = 0; j < N; ++j) {
164  for (int k = 0; k < M; ++k) {
165  // when [i, :, :] is laid out in row major order
166  // N * M * i + M * j + k is index of entry in N x M matrix
167  // with coordinates (row = j, col = k)
168  auto val = in[N * M * i + M * j + k];
169  out[N * M * i + M * j + k] = (fn(j, k, val) ? fill_val : val);
170  }
171  }
172  }
173  } else { // without batching
174  // TODO(T20952436): vector implementation
175  // collapse tensor to 2-dim view [N, M], where
176  // N is product of dimensions before axis
177  // M is product of dimensions at/after axis
178  // and mask N by M matrix
179  for (int i = 0; i < N; ++i) {
180  for (int j = 0; j < M; ++j) {
181  auto val = in[M * i + j];
182  out[M * i + j] = (fn(i, j, val) ? fill_val : val);
183  }
184  }
185  }
186 }
187 
188 // Repeat masking along continuous segments (right axes) of size D
189 template <typename Functor>
190 void RepeatedMaskWithFunctor(
191  size_t N,
192  size_t M,
193  int D,
194  const float* in,
195  Functor fn,
196  float fill_val,
197  float* out) {
198  for (int i = 0; i < N; ++i) {
199  for (int j = 0; j < M; ++j) {
200  for (int k = 0; k < D; ++k) {
201  auto val = in[M * D * i + D * j + k];
202  out[M * D * i + D * j + k] = (fn(i, j, val) ? fill_val : val);
203  }
204  }
205  }
206 }
207 
208 namespace {
209 
210 class SequenceFunctor {
211  public:
212  explicit SequenceFunctor(const int* sl, const size_t len)
213  : sl_(sl), len_(len) {}
214  bool operator()(int i, int j, float /* val*/) {
215  CAFFE_ENFORCE(i < len_, "Out of bound.");
216  return j >= sl_[i];
217  }
218 
219  private:
220  const int* sl_;
221  const size_t len_;
222 };
223 
224 class WindowFunctor {
225  public:
226  explicit WindowFunctor(const int* c, int r) : c(c), r(r) {}
227  bool operator()(int i, int j, float /* val*/) {
228  return j > c[i] + r || j < c[i] - r;
229  }
230 
231  private:
232  const int* c;
233  const int r;
234 };
235 
236 class UpperFunctor {
237  public:
238  bool operator()(int i, int j, float /* val */) {
239  return j > i;
240  }
241 };
242 
243 class LowerFunctor {
244  public:
245  bool operator()(int i, int j, float /* val */) {
246  return j < i;
247  }
248 };
249 
250 class UpperDiagFunctor {
251  public:
252  bool operator()(int i, int j, float /* val */) {
253  return j >= i;
254  }
255 };
256 
257 class LowerDiagFunctor {
258  public:
259  bool operator()(int i, int j, float /* val */) {
260  return j <= i;
261  }
262 };
263 
264 } // namespace
265 
266 template <>
267 bool SequenceMaskOp<CPUContext>::RunOnDevice() {
268  return DispatchHelper<TensorTypes<float>>::call(this, Input(0));
269 }
270 
271 template <>
272 template <class T>
273 bool SequenceMaskOp<CPUContext>::DoRunWithType() {
274  const Tensor<CPUContext>* input = &Input(0);
275  const Tensor<CPUContext>* sequence_lengths = nullptr;
276  const Tensor<CPUContext>* window_centers = nullptr;
277 
278  if (mode_ == "sequence") {
279  sequence_lengths = &Input(1);
280  } else if (mode_ == "window") {
281  window_centers = &Input(1);
282  }
283 
284  auto* output = Output(0);
285  output->ResizeLike(*input);
286 
287  const auto canonical_axis = input->canonical_axis_index(axis_);
288 
289  // canonical_batch is non-negative if batching, -1 otherwise
290  int canonical_batch = -1;
291  if ((HasArgument("batch"))) {
292  canonical_batch = input->canonical_axis_index(batch_);
293  }
294 
295  // make sure batch < axis
296  if (canonical_batch >= 0) {
297  CAFFE_ENFORCE_LT(canonical_batch, canonical_axis);
298  }
299 
300  // if no batch, then left is product of dims up to axis
301  // otherwise, left is product of dims between batch and axis
302  const int left =
303  (canonical_batch >= 0
304  ? input->size_between_dim(canonical_batch, canonical_axis)
305  : input->size_to_dim(canonical_axis));
306  const int right = input->size_from_dim(canonical_axis);
307 
308  // product of dims from 1 to batch
309  const int batch_dim =
310  (canonical_batch >= 0
311  ? input->size_to_dim(canonical_batch) * input->dim(canonical_batch)
312  : -1);
313 
314  T fill_val = convert::To<float, T>(grad_ ? 0.0f : fill_val_);
315  if (mode_ == "sequence") {
316  CAFFE_ENFORCE(
317  sequence_lengths, "Sequence length not provided for mode 'sequence'!");
318  if (HasArgument("repeat_from_axis")) {
319  const int canonical_repeat_from =
320  input->canonical_axis_index(repeat_from_);
321  const int repeated_dims = input->size_from_dim(canonical_repeat_from);
322  const int masked_dims = right / repeated_dims;
323  RepeatedMaskWithFunctor(
324  left,
325  masked_dims,
326  repeated_dims,
327  input->data<T>(),
328  SequenceFunctor(
329  sequence_lengths->data<int>(), sequence_lengths->size()),
330  fill_val,
331  output->mutable_data<T>());
332  } else {
333  MaskWithFunctor(
334  left,
335  right,
336  batch_dim,
337  input->data<T>(),
338  SequenceFunctor(
339  sequence_lengths->data<int>(), sequence_lengths->size()),
340  fill_val,
341  output->mutable_data<T>());
342  }
343  } else if (mode_ == "window") {
344  MaskWithFunctor(
345  left,
346  right,
347  batch_dim,
348  input->data<T>(),
349  WindowFunctor(window_centers->data<int>(), radius_),
350  fill_val,
351  output->mutable_data<T>());
352  } else if (mode_ == "upper") {
353  MaskWithFunctor(
354  left,
355  right,
356  batch_dim,
357  input->data<T>(),
358  UpperFunctor(),
359  fill_val,
360  output->mutable_data<T>());
361  } else if (mode_ == "lower") {
362  MaskWithFunctor(
363  left,
364  right,
365  batch_dim,
366  input->data<T>(),
367  LowerFunctor(),
368  fill_val,
369  output->mutable_data<T>());
370  } else if (mode_ == "upperdiag") {
371  MaskWithFunctor(
372  left,
373  right,
374  batch_dim,
375  input->data<T>(),
376  UpperDiagFunctor(),
377  fill_val,
378  output->mutable_data<T>());
379  } else if (mode_ == "lowerdiag") {
380  MaskWithFunctor(
381  left,
382  right,
383  batch_dim,
384  input->data<T>(),
385  LowerDiagFunctor(),
386  fill_val,
387  output->mutable_data<T>());
388  } else {
389  CAFFE_ENFORCE(false, "Unsupported mode for SequenceMaskOp!");
390  return false;
391  }
392 
393  return true;
394 }
395 
396 REGISTER_CPU_OPERATOR(SequenceMask, SequenceMaskOp<CPUContext>);
397 
398 OPERATOR_SCHEMA(SequenceMask)
399  .NumInputs(1, 2)
400  .NumOutputs(1)
401  .SetDoc(R"DOC(
402 Mask op designed for use in attention mechanisms for sequence modeling tasks.
403 Supports batching: given batch_dim, collapses dims 0 through batch_dim into a
404 single dimension, e.g. if tensor dims are [4,2,1,3,4] and batch_dim=2, first
405 collapse tensor to [4*2*1,3,4], then mask each batch [i,:,:].
406 
407 
408 Two current operating modes:
409 
410 
411 1) Given a 2D input tensor and 1D tensor of sequence lengths, for each row i in
412 the input tensor, set elements in that row to -inf if their column index
413 j >= sequence_lengths[i]. This mode takes two inputs and argument mode =
414 'sequence'
415 
416 
417 2) Triangular mask. Given row index i and column index j, set elements to -inf
418 given the following conditions:
419 
420  mode='upper', x_ij = -inf if j < i
421  mode='lower', x_ij = -inf if j > i
422  mode='upperdiag', x_ij = -inf if j <= i
423  mode='lowerdiag', x_ij = -inf if j >= i
424 
425 This mode takes one input.
426 
427 
428 3) Window Mask. Given a 2D input tensor and 1D tensor of window centers,
429 for each row i in the input tensor, set elements in that row to -inf
430 if their column index j outside [center - radius, center + radius].
431 This mode takes two inputs and argument mode = 'sequence'.
432 Argument 'radius' should be provided.
433 )DOC")
434  .Input(0, "input", "Tensor to apply masking to")
435  .Input(1, "sequence_lengths", "1D Tensor of sequence lengths for mode #1")
436  .Output(0, "masked_tensor", "Input tensor with masking applied")
437  .Arg(
438  "mode",
439  "(string) Mode selection. Possible values: "
440  "'sequence', 'upper', 'lower', 'upperdiag', 'lowerdiag'")
441  .Arg(
442  "axis",
443  "(int) Beginning axis of row elements. All dimensions to the left "
444  "will be treated as row indices and those to the right (inclusive) "
445  "will be treated as column indices in the 2D mask")
446  .Arg("grad", "(bool) operate in gradient mode")
447  .Arg("radius", "(int) radius of windows in window mode")
448  .Arg("batch", "(int) batch dimension of tensor (optional)")
449  .Arg(
450  "repeat_from_axis",
451  "(int) used when mask should be repeated for "
452  "one or more data dimensions (beginning at this axis). "
453  "(currently only supported for sequence mode without batch argument)");
454 
455 class GetSequenceMaskGradient : public GradientMakerBase {
456  using GradientMakerBase::GradientMakerBase;
457  vector<OperatorDef> GetGradientDefs() override {
458  vector<Argument> args;
459  args.reserve(Def().arg().size());
460  for (const auto& x : Def().arg()) {
461  args.push_back(x);
462  }
463  args.push_back(MakeArgument<bool>("grad", true));
464  if (def_.input_size() == 1) {
465  return SingleGradientDef(
466  "SequenceMask",
467  "",
468  vector<string>{GO(0)},
469  vector<string>{GI(0)},
470  args);
471  } else {
472  return SingleGradientDef(
473  "SequenceMask",
474  "",
475  vector<string>{GO(0), I(1)},
476  vector<string>{GI(0)},
477  args);
478  }
479  }
480 
481  bool CopyArguments() const override {
482  return false;
483  }
484 };
485 
486 REGISTER_GRADIENT(SequenceMask, GetSequenceMaskGradient);
487 
488 } // namespace caffe2
Definition: types.h:72
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...