Caffe2 - C++ API
A deep learning, cross platform ML framework
bbox_transform_op.cc
1 #include "bbox_transform_op.h"
2 #include "caffe2/operators/generate_proposals_op_util_boxes.h"
3 
4 #ifdef CAFFE2_USE_MKL
5 #include "caffe2/mkl/operators/operator_fallback_mkl.h"
6 #endif // CAFFE2_USE_MKL
7 
8 namespace caffe2 {
9 namespace {
10 
11 REGISTER_CPU_OPERATOR(BBoxTransform, BBoxTransformOp<float, CPUContext>);
12 
13 #ifdef CAFFE2_HAS_MKL_DNN
14 REGISTER_MKL_OPERATOR(
15  BBoxTransform,
16  mkl::MKLFallbackOp<BBoxTransformOp<float, CPUContext>>);
17 #endif // CAFFE2_HAS_MKL_DNN
18 
19 // Input: box, delta Output: box
20 OPERATOR_SCHEMA(BBoxTransform)
21  .NumInputs(3)
22  .NumOutputs(1, 2)
23  .SetDoc(R"DOC(
24 Transform proposal bounding boxes to target bounding box using bounding box
25  regression deltas.
26 )DOC")
27  .Arg("weights", "vector<float> weights [wx, wy, ww, wh] for the deltas")
28  .Arg(
29  "apply_scale",
30  "bool (default true), transform the boxes to the scaled image space"
31  " after applying the bbox deltas."
32  "Set to false to match the detectron code, set to true for keypoint"
33  " models and for backward compatibility")
34  .Arg(
35  "correct_transform_coords",
36  "bool (default false), Correct bounding box transform coordates,"
37  " see bbox_transform() in boxes.py "
38  "Set to true to match the detectron code, set to false for backward"
39  " compatibility")
40  .Input(
41  0,
42  "rois",
43  "Bounding box proposals in pixel coordinates, "
44  "Size (M, 4), format [x1, y1, x2, y2], or"
45  "Size (M, 5), format [batch_index, x1, y1, x2, y2]. "
46  "If proposals from multiple images in a batch are present, they "
47  "should be grouped sequentially and in incremental order.")
48  .Input(
49  1,
50  "deltas",
51  "bounding box translations and scales,"
52  "size (M, 4*K), format [dx, dy, dw, dh], K = # classes")
53  .Input(
54  2,
55  "im_info",
56  "Image dimensions, size (batch_size, 3), "
57  "format [img_height, img_width, img_scale]")
58  .Output(
59  0,
60  "box_out",
61  "Pixel coordinates of the transformed bounding boxes,"
62  "Size (M, 4*K), format [x1, y1, x2, y2]")
63  .Output(
64  1,
65  "roi_batch_splits",
66  "Tensor of shape (batch_size) with each element denoting the number "
67  "of RoIs belonging to the corresponding image in batch");
68 
69 SHOULD_NOT_DO_GRADIENT(BBoxTransform);
70 } // namespace
71 
72 template <>
73 bool BBoxTransformOp<float, CPUContext>::RunOnDevice() {
74  const auto& roi_in = Input(0);
75  const auto& delta_in = Input(1);
76  const auto& iminfo_in = Input(2);
77  auto* box_out = Output(0);
78 
79  const int N = roi_in.dim32(0);
80  CAFFE_ENFORCE_EQ(roi_in.ndim(), 2);
81  CAFFE_ENFORCE(roi_in.dim32(1) == 4 || roi_in.dim32(1) == 5);
82 
83  CAFFE_ENFORCE_EQ(delta_in.ndim(), 2);
84  CAFFE_ENFORCE_EQ(delta_in.dim32(0), N);
85  CAFFE_ENFORCE_EQ(delta_in.dim32(1) % 4, 0);
86  const int num_classes = delta_in.dim32(1) / 4;
87 
88  CAFFE_ENFORCE_EQ(iminfo_in.ndim(), 2);
89  CAFFE_ENFORCE_EQ(iminfo_in.dim32(1), 3);
90  const int batch_size = iminfo_in.dim32(0);
91 
92  DCHECK_EQ(weights_.size(), 4);
93 
94  Eigen::Map<const ERArrXXf> boxes0(
95  roi_in.data<float>(), roi_in.dim32(0), roi_in.dim32(1));
96  Eigen::Map<const ERArrXXf> deltas0(
97  delta_in.data<float>(), delta_in.dim32(0), delta_in.dim32(1));
98 
99  // Count the number of RoIs per batch
100  vector<int> num_rois_per_batch(batch_size, 0);
101  if (roi_in.dim32(1) == 4) {
102  CAFFE_ENFORCE_EQ(batch_size, 1);
103  num_rois_per_batch[0] = N;
104  } else {
105  const auto& roi_batch_ids = boxes0.col(0);
106  for (int i = 0; i < roi_batch_ids.size(); ++i) {
107  const int roi_batch_id = roi_batch_ids(i);
108  CAFFE_ENFORCE_LT(roi_batch_id, batch_size);
109  num_rois_per_batch[roi_batch_id]++;
110  }
111  }
112 
113  CAFFE_ENFORCE_EQ(iminfo_in.dims(), (vector<TIndex>{batch_size, 3}));
114  Eigen::Map<const ERArrXXf> iminfo(
115  iminfo_in.data<float>(), iminfo_in.dim(0), iminfo_in.dim(1));
116 
117  box_out->ResizeLike(delta_in);
118  Eigen::Map<ERArrXXf> new_boxes(
119  box_out->mutable_data<float>(), box_out->dim32(0), box_out->dim32(1));
120 
121  // We assume roi_in and delta_in over multiple batches are grouped
122  // together in increasing order as generated by GenerateProposalsOp
123  int offset = 0;
124  for (int i = 0; i < batch_size; ++i) {
125  const int num_rois = num_rois_per_batch[i];
126  const auto& cur_iminfo = iminfo.row(i);
127  const float scale_before = cur_iminfo(2);
128  const float scale_after = apply_scale_ ? cur_iminfo(2) : 1.0;
129  int img_h = int(cur_iminfo(0) / scale_before + 0.5);
130  int img_w = int(cur_iminfo(1) / scale_before + 0.5);
131 
132  const auto& cur_boxes =
133  boxes0.rightCols(4).block(offset, 0, num_rois, 4) / scale_before;
134  for (int k = 0; k < num_classes; k++) {
135  const auto& cur_deltas = deltas0.block(offset, k * 4, num_rois, 4);
136  const auto& trans_boxes = utils::bbox_transform(
137  cur_boxes,
138  cur_deltas,
139  weights_,
140  utils::BBOX_XFORM_CLIP_DEFAULT,
141  correct_transform_coords_);
142  const auto& clip_boxes = utils::clip_boxes(trans_boxes, img_h, img_w);
143  new_boxes.block(offset, k * 4, num_rois, 4) = clip_boxes * scale_after;
144  }
145 
146  offset += num_rois;
147  }
148 
149  if (OutputSize() > 1) {
150  auto* roi_batch_splits = Output(1);
151  roi_batch_splits->Resize(batch_size);
152  Eigen::Map<EArrXf> roi_batch_splits_map(
153  roi_batch_splits->mutable_data<float>(), batch_size);
154  roi_batch_splits_map =
155  Eigen::Map<const EArrXi>(num_rois_per_batch.data(), batch_size)
156  .cast<float>();
157  }
158 
159  return true;
160 }
161 
162 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...