Caffe2 - C++ API
A deep learning, cross platform ML framework
generate_proposals_op_util_boxes.h
1 #ifndef CAFFE2_OPERATORS_UTILS_BOXES_H_
2 #define CAFFE2_OPERATORS_UTILS_BOXES_H_
3 
4 #include "caffe2/utils/eigen_utils.h"
5 #include "caffe2/utils/math.h"
6 
7 // Bounding box utils for generate_proposals_op
8 // Reference: detectron/lib/utils/boxes.py
9 
10 namespace caffe2 {
11 namespace utils {
12 
13 // Default value for minimum bounding box width and height after bounding box
14 // transformation (bbox_transform()) in log-space
15 const float BBOX_XFORM_CLIP_DEFAULT = log(1000.0 / 16.0);
16 
17 // Forward transform that maps proposal boxes to ground-truth boxes using
18 // bounding-box regression deltas.
19 // boxes: pixel coordinates of the bounding boxes
20 // size (M, 4), format [x1; y1; x2; y2], x2 >= x1, y2 >= y1
21 // deltas: bounding box translations and scales
22 // size (M, 4), format [dx; dy; dw; dh]
23 // dx, dy: scale-invariant translation of the center of the bounding box
24 // dw, dh: log-space sclaing of the width and height of the bounding box
25 // weights: weights [wx, wy, ww, wh] for the deltas
26 // bbox_xform_clip: minimum bounding box width and height in log-space after
27 // transofmration
28 // correct_transform_coords: Correct bounding box transform coordates. Set to
29 // true to match the detectron code, set to false for backward compatibility
30 // return: pixel coordinates of the bounding boxes
31 // size (M, 4), format [x1; y1; x2; y2]
32 // see "Rich feature hierarchies for accurate object detection and semantic
33 // segmentation" Appendix C for more details
34 // reference: detectron/lib/utils/boxes.py bbox_transform()
35 template <class Derived1, class Derived2>
36 EArrXXt<typename Derived1::Scalar> bbox_transform(
37  const Eigen::ArrayBase<Derived1>& boxes,
38  const Eigen::ArrayBase<Derived2>& deltas,
39  const std::vector<typename Derived2::Scalar>& weights =
40  std::vector<typename Derived2::Scalar>{1.0, 1.0, 1.0, 1.0},
41  const float bbox_xform_clip = BBOX_XFORM_CLIP_DEFAULT,
42  const bool correct_transform_coords = false) {
43  using T = typename Derived1::Scalar;
44  using EArrXX = EArrXXt<T>;
45  using EArrX = EArrXt<T>;
46 
47  if (boxes.rows() == 0) {
48  return EArrXX::Zero(T(0), deltas.cols());
49  }
50 
51  CAFFE_ENFORCE_EQ(boxes.rows(), deltas.rows());
52  CAFFE_ENFORCE_EQ(boxes.cols(), 4);
53  CAFFE_ENFORCE_EQ(deltas.cols(), 4);
54 
55  EArrX widths = boxes.col(2) - boxes.col(0) + T(1.0);
56  EArrX heights = boxes.col(3) - boxes.col(1) + T(1.0);
57  auto ctr_x = boxes.col(0) + T(0.5) * widths;
58  auto ctr_y = boxes.col(1) + T(0.5) * heights;
59 
60  auto dx = deltas.col(0).template cast<T>() / weights[0];
61  auto dy = deltas.col(1).template cast<T>() / weights[1];
62  auto dw =
63  (deltas.col(2).template cast<T>() / weights[2]).cwiseMin(bbox_xform_clip);
64  auto dh =
65  (deltas.col(3).template cast<T>() / weights[3]).cwiseMin(bbox_xform_clip);
66 
67  EArrX pred_ctr_x = dx * widths + ctr_x;
68  EArrX pred_ctr_y = dy * heights + ctr_y;
69  EArrX pred_w = dw.exp() * widths;
70  EArrX pred_h = dh.exp() * heights;
71 
72  T offset(correct_transform_coords ? 1.0 : 0.0);
73 
74  EArrXX pred_boxes = EArrXX::Zero(deltas.rows(), deltas.cols());
75  // x1
76  pred_boxes.col(0) = pred_ctr_x - T(0.5) * pred_w;
77  // y1
78  pred_boxes.col(1) = pred_ctr_y - T(0.5) * pred_h;
79  // x2
80  pred_boxes.col(2) = pred_ctr_x + T(0.5) * pred_w - offset;
81  // y2
82  pred_boxes.col(3) = pred_ctr_y + T(0.5) * pred_h - offset;
83 
84  return pred_boxes;
85 }
86 
87 // Clip boxes to image boundaries
88 // boxes: pixel coordinates of bounding box, size (M * 4)
89 template <class Derived>
90 EArrXXt<typename Derived::Scalar>
91 clip_boxes(const Eigen::ArrayBase<Derived>& boxes, int height, int width) {
92  CAFFE_ENFORCE_EQ(boxes.cols(), 4);
93 
94  EArrXXt<typename Derived::Scalar> ret(boxes.rows(), boxes.cols());
95 
96  // x1 >= 0 && x1 < width
97  ret.col(0) = boxes.col(0).cwiseMin(width - 1).cwiseMax(0);
98  // y1 >= 0 && y1 < height
99  ret.col(1) = boxes.col(1).cwiseMin(height - 1).cwiseMax(0);
100  // x2 >= 0 && x2 < width
101  ret.col(2) = boxes.col(2).cwiseMin(width - 1).cwiseMax(0);
102  // y2 >= 0 && y2 < height
103  ret.col(3) = boxes.col(3).cwiseMin(height - 1).cwiseMax(0);
104 
105  return ret;
106 }
107 
108 // Only keep boxes with both sides >= min_size and center within the image.
109 // boxes: pixel coordinates of bounding box, size (M * 4)
110 // im_info: [height, width, img_scale]
111 // return: row indices for 'boxes'
112 template <class Derived>
113 std::vector<int> filter_boxes(
114  const Eigen::ArrayBase<Derived>& boxes,
115  double min_size,
116  const Eigen::Array3f& im_info) {
117  CAFFE_ENFORCE_EQ(boxes.cols(), 4);
118 
119  // Scale min_size to match image scale
120  min_size *= im_info[2];
121 
122  using T = typename Derived::Scalar;
123  using EArrX = EArrXt<T>;
124 
125  EArrX ws = boxes.col(2) - boxes.col(0) + T(1);
126  EArrX hs = boxes.col(3) - boxes.col(1) + T(1);
127  EArrX x_ctr = boxes.col(0) + ws / T(2);
128  EArrX y_ctr = boxes.col(1) + hs / T(2);
129 
130  EArrXb keep = (ws >= min_size) && (hs >= min_size) &&
131  (x_ctr < T(im_info[1])) && (y_ctr < T(im_info[0]));
132 
133  return GetArrayIndices(keep);
134 }
135 
136 } // namespace utils
137 } // namespace caffe2
138 
139 #endif // CAFFE2_OPERATORS_UTILS_BOXES_H_
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...