1 #include "caffe2/operators/generate_proposals_op.h" 2 #include "caffe2/operators/generate_proposals_op_util_boxes.h" 3 #include "generate_proposals_op_util_nms.h" 6 #include "caffe2/mkl/operators/operator_fallback_mkl.h" 7 #endif // CAFFE2_USE_MKL 15 size_t ComputeStartIndex(
16 const TensorCPU& tensor,
17 const std::vector<int>& index) {
18 DCHECK_EQ(index.size(), tensor.ndim());
21 for (
int i = 0; i < index.size(); i++) {
22 ret += index[i] * tensor.size_from_dim(i + 1);
30 utils::ConstTensorView<T> GetSubTensorView(
31 const TensorCPU& tensor,
32 int dim0_start_index) {
33 DCHECK_EQ(tensor.meta().itemsize(),
sizeof(T));
35 if (tensor.size() == 0) {
36 return utils::ConstTensorView<T>(
nullptr, {});
39 std::vector<int> start_dims(tensor.ndim(), 0);
40 start_dims.at(0) = dim0_start_index;
41 auto st_idx = ComputeStartIndex(tensor, start_dims);
42 auto ptr = tensor.data<T>() + st_idx;
44 auto& input_dims = tensor.dims();
45 std::vector<int> ret_dims(input_dims.begin() + 1, input_dims.end());
47 utils::ConstTensorView<T> ret(ptr, ret_dims);
55 ERMatXf ComputeAllAnchors(
56 const TensorCPU& anchors,
60 const auto K = height * width;
61 const auto A = anchors.dim(0);
63 ERMatXf shift_x = (ERVecXf::LinSpaced(width, 0.0, width - 1.0) * feat_stride)
64 .replicate(height, 1);
65 ERMatXf shift_y = (EVecXf::LinSpaced(height, 0.0, height - 1.0) * feat_stride)
67 Eigen::MatrixXf shifts(K, 4);
68 shifts << ConstEigenVectorMap<float>(shift_x.data(), shift_x.size()),
69 ConstEigenVectorMap<float>(shift_y.data(), shift_y.size()),
70 ConstEigenVectorMap<float>(shift_x.data(), shift_x.size()),
71 ConstEigenVectorMap<float>(shift_y.data(), shift_y.size());
79 ConstEigenMatrixMap<float> anchors_vec(
80 anchors.template data<float>(), 1, A * 4);
87 ERMatXf all_anchors_vec =
88 anchors_vec.replicate(K, 1) + shifts.rowwise().replicate(A);
93 return all_anchors_vec;
99 void GenerateProposalsOp<CPUContext>::ProposalsForOneImage(
100 const Eigen::Array3f& im_info,
101 const Eigen::Map<const ERMatXf>& all_anchors,
102 const utils::ConstTensorView<float>& bbox_deltas_tensor,
103 const utils::ConstTensorView<float>& scores_tensor,
105 EArrXf* out_probs)
const {
106 const auto& pre_nms_topN = rpn_pre_nms_topN_;
107 const auto& post_nms_topN = rpn_post_nms_topN_;
108 const auto& nms_thresh = rpn_nms_thresh_;
109 const auto& min_size = rpn_min_size_;
117 CAFFE_ENFORCE_EQ(bbox_deltas_tensor.ndim(), 3);
118 CAFFE_ENFORCE_EQ(bbox_deltas_tensor.dim(0) % 4, 0);
119 auto A = bbox_deltas_tensor.dim(0) / 4;
120 auto H = bbox_deltas_tensor.dim(1);
121 auto W = bbox_deltas_tensor.dim(2);
124 ERArrXXf bbox_deltas(H * W * A, 4);
125 Eigen::Map<ERMatXf>(bbox_deltas.data(), H * W, 4 * A) =
126 Eigen::Map<const ERMatXf>(bbox_deltas_tensor.data(), A * 4, H * W)
128 CAFFE_ENFORCE_EQ(bbox_deltas.rows(), all_anchors.rows());
134 CAFFE_ENFORCE_EQ(scores_tensor.ndim(), 3);
135 CAFFE_ENFORCE_EQ(scores_tensor.dims(), (vector<int>{A, H, W}));
138 EArrXf scores(scores_tensor.size());
139 Eigen::Map<ERMatXf>(scores.data(), H * W, A) =
140 Eigen::Map<const ERMatXf>(scores_tensor.data(), A, H * W).transpose();
143 static const std::vector<float> bbox_weights{1.0, 1.0, 1.0, 1.0};
144 auto proposals = utils::bbox_transform(
148 utils::BBOX_XFORM_CLIP_DEFAULT,
149 correct_transform_coords_);
153 proposals = utils::clip_boxes(proposals, im_info[0], im_info[1]);
156 auto keep = utils::filter_boxes(proposals, min_size, im_info);
157 DCHECK_LE(keep.size(), scores.size());
161 std::sort(keep.begin(), keep.end(), [&scores](
int lhs,
int rhs) {
162 return scores[lhs] > scores[rhs];
165 if (pre_nms_topN > 0 && pre_nms_topN < keep.size()) {
166 keep.resize(pre_nms_topN);
172 if (post_nms_topN > 0 && post_nms_topN < keep.size()) {
173 keep = utils::nms_cpu(proposals, scores, keep, nms_thresh, post_nms_topN);
175 keep = utils::nms_cpu(proposals, scores, keep, nms_thresh);
179 utils::GetSubArrayRows(proposals, utils::AsEArrXt(keep), out_boxes);
180 utils::GetSubArray(scores, utils::AsEArrXt(keep), out_probs);
184 bool GenerateProposalsOp<CPUContext>::RunOnDevice() {
185 const auto& scores = Input(0);
186 const auto& bbox_deltas = Input(1);
187 const auto& im_info_tensor = Input(2);
188 const auto& anchors = Input(3);
189 auto* out_rois = Output(0);
190 auto* out_rois_probs = Output(1);
192 CAFFE_ENFORCE_EQ(scores.ndim(), 4, scores.ndim());
193 CAFFE_ENFORCE(scores.template IsType<float>(), scores.meta().name());
194 const auto num_images = scores.dim(0);
195 const auto A = scores.dim(1);
196 const auto height = scores.dim(2);
197 const auto width = scores.dim(3);
198 const auto K = height * width;
202 bbox_deltas.dims(), (vector<TIndex>{num_images, 4 * A, height, width}));
205 CAFFE_ENFORCE_EQ(im_info_tensor.dims(), (vector<TIndex>{num_images, 3}));
207 im_info_tensor.template IsType<float>(), im_info_tensor.meta().name());
210 CAFFE_ENFORCE_EQ(anchors.dims(), (vector<TIndex>{A, 4}));
211 CAFFE_ENFORCE(anchors.template IsType<float>(), anchors.meta().name());
214 auto all_anchors_vec =
215 utils::ComputeAllAnchors(anchors, height, width, feat_stride_);
216 Eigen::Map<const ERMatXf> all_anchors(all_anchors_vec.data(), K * A, 4);
218 Eigen::Map<const ERArrXXf> im_info(
219 im_info_tensor.data<
float>(),
220 im_info_tensor.dim(0),
221 im_info_tensor.dim(1));
223 const int roi_col_count = 5;
224 out_rois->Resize(0, roi_col_count);
225 out_rois_probs->Resize(0);
228 for (
int i = 0; i < num_images; i++) {
229 auto cur_im_info = im_info.row(i);
230 auto cur_bbox_deltas = GetSubTensorView<float>(bbox_deltas, i);
231 auto cur_scores = GetSubTensorView<float>(scores, i);
235 ProposalsForOneImage(
243 int csz = im_i_boxes.rows();
244 int cur_start_idx = out_rois->dim(0);
246 out_rois->Extend(csz, 50, &context_);
247 out_rois_probs->Extend(csz, 50, &context_);
250 Eigen::Map<ERArrXXf> cur_rois(
251 out_rois->mutable_data<
float>() + cur_start_idx * roi_col_count,
254 cur_rois.col(0).setConstant(i);
255 cur_rois.block(0, 1, csz, 4) = im_i_boxes;
259 out_rois_probs->mutable_data<
float>() + cur_start_idx, csz) =
268 REGISTER_CPU_OPERATOR(GenerateProposals, GenerateProposalsOp<CPUContext>);
270 REGISTER_CPU_OPERATOR(GenerateProposalsCPP, GenerateProposalsOp<CPUContext>);
272 #ifdef CAFFE2_HAS_MKL_DNN 273 REGISTER_MKL_OPERATOR(
275 mkl::MKLFallbackOp<GenerateProposalsOp<CPUContext>>);
277 REGISTER_MKL_OPERATOR(
278 GenerateProposalsCPP,
279 mkl::MKLFallbackOp<GenerateProposalsOp<CPUContext>>);
280 #endif // CAFFE2_HAS_MKL_DNN 282 OPERATOR_SCHEMA(GenerateProposals)
286 Generate bounding box proposals for Faster RCNN. The propoasls are generated for 287 a list of images based on image score 'score', bounding box regression result 288 'deltas' as well as predefined bounding box shapes 'anchors'. Greedy 289 non-maximum suppression is applied to generate the final bounding boxes. 291 .Arg("spatial_scale",
"(float) spatial scale")
292 .Arg(
"pre_nms_topN",
"(int) RPN_PRE_NMS_TOP_N")
293 .Arg(
"post_nms_topN",
"(int) RPN_POST_NMS_TOP_N")
294 .Arg(
"nms_thresh",
"(float) RPN_NMS_THRESH")
295 .Arg(
"min_size",
"(float) RPN_MIN_SIZE")
296 .Input(0,
"scores",
"Scores from conv layer, size (img_count, A, H, W)")
300 "Bounding box deltas from conv layer, " 301 "size (img_count, 4 * A, H, W)")
305 "Image info, size (img_count, 3), " 306 "format (height, width, scale)")
307 .Input(3,
"anchors",
"Bounding box anchors, size (A, 4)")
311 "Proposals, size (n x 5), " 312 "format (image_index, x1, y1, x2, y2)")
313 .Output(1,
"rois_probs",
"scores of proposals, size (n)");
315 OPERATOR_SCHEMA(GenerateProposalsCPP).NumInputs(4).NumOutputs(2);
317 SHOULD_NOT_DO_GRADIENT(GenerateProposals);
319 SHOULD_NOT_DO_GRADIENT(GenerateProposalsCPP);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...