2 #ifndef CAFFE2_IMAGE_IMAGE_INPUT_OP_H_ 3 #define CAFFE2_IMAGE_IMAGE_INPUT_OP_H_ 5 #include <opencv2/opencv.hpp> 10 #include "caffe/proto/caffe.pb.h" 11 #include "caffe2/core/db.h" 12 #include "caffe2/utils/cast.h" 13 #include "caffe2/utils/math.h" 14 #include "caffe2/utils/thread_pool.h" 15 #include "caffe2/operators/prefetch_op.h" 16 #include "caffe2/image/transform_gpu.h" 22 template <
class Context>
33 MULTI_LABEL_SPARSE = 1,
34 MULTI_LABEL_DENSE = 2,
35 MULTI_LABEL_WEIGHTED_SPARSE = 3,
36 SINGLE_LABEL_WEIGHTED = 4
41 enum SCALE_JITTER_TYPE {
48 using OperatorBase::OutputSize;
57 bool Prefetch()
override;
58 bool CopyPrefetched()
override;
61 using BoundingBox =
struct {
72 using PerImageArg =
struct {
73 BoundingBox bounding_params;
76 bool GetImageAndLabelAndInfoFromDBValue(
77 const string& value, cv::Mat* img, PerImageArg& info,
int item_id,
78 std::mt19937* randgen);
79 void DecodeAndTransform(
80 const std::string& value,
float *image_data,
int item_id,
81 const int channels, std::size_t thread_index);
82 void DecodeAndTransposeOnly(
83 const std::string& value, uint8_t *image_data,
int item_id,
84 const int channels, std::size_t thread_index);
86 unique_ptr<db::DBReader> owned_reader_;
91 vector<TensorCPU> prefetched_additional_outputs_;
94 vector<Tensor<Context>> prefetched_additional_outputs_on_device_;
96 PerImageArg default_arg_;
98 LABEL_TYPE label_type_;
103 float img_saturation_;
104 float img_brightness_;
106 bool color_lighting_;
107 float color_lighting_std_;
108 std::vector<std::vector<float>> color_lighting_eigvecs_;
109 std::vector<float> color_lighting_eigvals_;
110 SCALE_JITTER_TYPE scale_jitter_type_;
118 std::vector<float> mean_;
119 std::vector<float> std_;
124 bool use_caffe_datum_;
126 bool mean_std_copied_ =
false;
129 int num_decode_threads_;
130 int additional_inputs_offset_;
131 int additional_inputs_count_;
132 std::shared_ptr<TaskThreadPool> thread_pool_;
135 TensorProto_DataType output_type_;
138 vector<int> random_scale_;
139 bool random_scaling_;
143 std::vector<std::mt19937> randgen_per_thread_;
146 template <
class Context>
148 const OperatorDef& operator_def,
152 prefetched_additional_outputs_(OutputSize() - 2),
153 prefetched_additional_outputs_on_device_(OutputSize() - 2),
155 OperatorBase::template GetSingleArgument<int>(
"batch_size", 0)),
156 label_type_(static_cast<LABEL_TYPE>(
157 OperatorBase::template GetSingleArgument<int>(
"label_type", 0))),
159 OperatorBase::template GetSingleArgument<int>(
"num_labels", 0)),
160 color_(OperatorBase::template GetSingleArgument<int>(
"color", 1)),
162 OperatorBase::template GetSingleArgument<int>(
"color_jitter", 0)),
163 img_saturation_(OperatorBase::template GetSingleArgument<float>(
166 img_brightness_(OperatorBase::template GetSingleArgument<float>(
170 OperatorBase::template GetSingleArgument<float>(
"img_contrast", 0.4)),
172 OperatorBase::template GetSingleArgument<int>(
"color_lighting", 0)),
173 color_lighting_std_(OperatorBase::template GetSingleArgument<float>(
174 "color_lighting_std",
176 scale_jitter_type_(static_cast<SCALE_JITTER_TYPE>(
177 OperatorBase::template GetSingleArgument<int>(
180 scale_(OperatorBase::template GetSingleArgument<int>(
"scale", -1)),
181 minsize_(OperatorBase::template GetSingleArgument<int>(
"minsize", -1)),
182 warp_(OperatorBase::template GetSingleArgument<int>(
"warp", 0)),
183 crop_(OperatorBase::template GetSingleArgument<int>(
"crop", -1)),
184 mirror_(OperatorBase::template GetSingleArgument<int>(
"mirror", 0)),
185 is_test_(OperatorBase::template GetSingleArgument<int>(
186 OpSchema::Arg_IsTest,
189 OperatorBase::template GetSingleArgument<int>(
"use_caffe_datum", 0)),
190 gpu_transform_(OperatorBase::template GetSingleArgument<int>(
194 OperatorBase::template GetSingleArgument<int>(
"decode_threads", 4)),
195 thread_pool_(std::make_shared<TaskThreadPool>(num_decode_threads_)),
198 cast::GetCastDataType(
ArgumentHelper(operator_def),
"output_type")),
200 OperatorBase::template GetRepeatedArgument<int>(
"random_scale", {-1,-1})) {
201 if ((random_scale_[0] == -1) || (random_scale_[1] == -1)) {
202 random_scaling_ =
false;
204 random_scaling_ =
true;
205 minsize_ = random_scale_[0];
208 mean_ = OperatorBase::template GetRepeatedArgument<float>(
210 {OperatorBase::template GetSingleArgument<float>(
"mean", 0.)});
212 std_ = OperatorBase::template GetRepeatedArgument<float>(
214 {OperatorBase::template GetSingleArgument<float>(
"std", 1.)});
216 vector<int> additional_output_sizes =
217 OperatorBase::template GetRepeatedArgument<int>(
218 "output_sizes", vector<int>(OutputSize() - 2, 1));
219 additional_inputs_count_ = OutputSize() - 2;
221 default_arg_.bounding_params = {
223 OperatorBase::template GetSingleArgument<int>(
"bounding_ymin", -1),
224 OperatorBase::template GetSingleArgument<int>(
"bounding_xmin", -1),
225 OperatorBase::template GetSingleArgument<int>(
"bounding_height", -1),
226 OperatorBase::template GetSingleArgument<int>(
"bounding_width", -1),
229 if (operator_def.input_size() == 0) {
230 LOG(ERROR) <<
"You are using an old ImageInputOp format that creates " 231 "a local db reader. Consider moving to the new style " 232 "that takes in a DBReader blob instead.";
234 OperatorBase::template GetSingleArgument<string>(
"db",
"");
235 CAFFE_ENFORCE_GT(db_name.size(), 0,
"Must specify a db name.");
237 OperatorBase::template GetSingleArgument<string>(
238 "db_type",
"leveldb"),
240 reader_ = owned_reader_.get();
244 color_lighting_eigvecs_.push_back(
245 std::vector<float>{-144.7125, 183.396, 102.2295});
246 color_lighting_eigvecs_.push_back(
247 std::vector<float>{-148.104, -1.1475, -207.57});
248 color_lighting_eigvecs_.push_back(
249 std::vector<float>{-148.818, -177.174, 107.1765});
251 color_lighting_eigvals_ = std::vector<float>{0.2175, 0.0188, 0.0045};
253 CAFFE_ENFORCE_GT(batch_size_, 0,
"Batch size should be nonnegative.");
254 if (use_caffe_datum_) {
255 CAFFE_ENFORCE(label_type_ == SINGLE_LABEL || label_type_ == SINGLE_LABEL_WEIGHTED,
256 "Caffe datum only supports single integer label");
258 if (label_type_ != SINGLE_LABEL && label_type_ != SINGLE_LABEL_WEIGHTED) {
259 CAFFE_ENFORCE_GT(num_labels_, 0,
260 "Number of labels must be set for using either sparse label indices or dense label embedding.");
262 if (label_type_ == MULTI_LABEL_WEIGHTED_SPARSE ||
263 label_type_ == SINGLE_LABEL_WEIGHTED) {
264 additional_inputs_offset_ = 3;
266 additional_inputs_offset_ = 2;
268 CAFFE_ENFORCE((scale_ > 0) != (minsize_ > 0),
269 "Must provide one and only one of scaling or minsize");
270 CAFFE_ENFORCE_GT(crop_, 0,
"Must provide the cropping value.");
272 scale_ > 0 ? scale_ : minsize_,
273 crop_,
"The scale/minsize value must be no smaller than the crop value.");
278 "The mean and std. dev vectors must be of the same size.");
279 CAFFE_ENFORCE(mean_.size() == 1 || mean_.size() == 3,
280 "The mean and std. dev vectors must be of size 1 or 3");
282 !use_caffe_datum_ || OutputSize() == 2,
283 "There can only be 2 outputs if the Caffe datum format is used");
285 additional_output_sizes.size() == OutputSize() - 2,
286 "If the output sizes are specified, they must be specified for all " 287 "additional outputs");
289 CAFFE_ENFORCE(random_scale_.size() == 2,
290 "Must provide [scale_min, scale_max]");
291 CAFFE_ENFORCE_GE(random_scale_[1], random_scale_[0],
292 "random scale must provide a range [min, max]");
294 if (default_arg_.bounding_params.ymin < 0
295 || default_arg_.bounding_params.xmin < 0
296 || default_arg_.bounding_params.height < 0
297 || default_arg_.bounding_params.width < 0) {
298 default_arg_.bounding_params.valid =
false;
300 default_arg_.bounding_params.valid =
true;
303 if (mean_.size() == 1) {
305 mean_.resize(3, mean_[0]);
306 std_.resize(3, std_[0]);
309 LOG(INFO) <<
"Creating an image input op with the following setting: ";
310 LOG(INFO) <<
" Using " << num_decode_threads_ <<
" CPU threads;";
311 if (gpu_transform_) {
312 LOG(INFO) <<
" Performing transformation on GPU";
314 LOG(INFO) <<
" Outputting in batches of " << batch_size_ <<
" images;";
315 LOG(INFO) <<
" Treating input image as " 316 << (color_ ?
"color " :
"grayscale ") <<
"image;";
317 if (default_arg_.bounding_params.valid) {
318 LOG(INFO) <<
" Applying a default bounding box of Y [" 319 << default_arg_.bounding_params.ymin <<
"; " 320 << default_arg_.bounding_params.ymin +
321 default_arg_.bounding_params.height
323 << default_arg_.bounding_params.xmin <<
"; " 324 << default_arg_.bounding_params.xmin +
325 default_arg_.bounding_params.width
328 if (scale_ > 0 && !random_scaling_) {
329 LOG(INFO) <<
" Scaling image to " << scale_
330 << (warp_ ?
" with " :
" without ") <<
"warping;";
332 if (random_scaling_) {
334 LOG(INFO) <<
" Randomly scaling shortest side between " 335 << random_scale_[0] <<
" and " 339 LOG(INFO) <<
" Ensuring minimum image size of " << minsize_
340 << (warp_ ?
" with " :
" without ") <<
"warping;";
343 LOG(INFO) <<
" " << (is_test_ ?
"Central" :
"Random")
344 <<
" cropping image to " << crop_
345 << (mirror_ ?
" with " :
" without ") <<
"random mirroring;";
346 LOG(INFO) <<
"Label Type: " << label_type_;
347 LOG(INFO) <<
"Num Labels: " << num_labels_;
349 auto mit = mean_.begin();
350 auto sit = std_.begin();
353 mit != mean_.end() && sit != std_.end();
355 LOG(INFO) <<
" Default [Channel " << i <<
"] Subtract mean " << *mit
356 <<
" and divide by std " << *sit <<
".";
360 LOG(INFO) <<
" Outputting images as " 361 << OperatorBase::template GetSingleArgument<string>(
"output_type",
"unknown") <<
".";
363 std::mt19937 meta_randgen(time(
nullptr));
364 for (
int i = 0; i < num_decode_threads_; ++i) {
365 randgen_per_thread_.emplace_back(meta_randgen());
371 TIndex(color_ ? 3 : 1));
372 if (label_type_ != SINGLE_LABEL && label_type_ != SINGLE_LABEL_WEIGHTED) {
373 prefetched_label_.
Resize(TIndex(batch_size_), TIndex(num_labels_));
375 prefetched_label_.
Resize(vector<TIndex>(1, batch_size_));
378 for (
int i = 0; i < additional_output_sizes.size(); ++i) {
379 prefetched_additional_outputs_[i].Resize(
380 TIndex(batch_size_), TIndex(additional_output_sizes[i]));
385 template <
class Context>
386 bool RandomSizedCropping(
389 std::mt19937* randgen
392 bool inception_scale_jitter =
false;
393 int im_height = img->rows, im_width = img->cols;
394 int area = im_height * im_width;
395 std::uniform_real_distribution<> area_dis(0.08, 1.0);
396 std::uniform_real_distribution<> aspect_ratio_dis(3.0 / 4.0, 4.0 / 3.0);
399 for (
int i = 0; i < 10; ++i) {
400 int target_area = int(ceil(area_dis(*randgen) * area));
401 float aspect_ratio = aspect_ratio_dis(*randgen);
402 int nh = floor(std::sqrt(((
float)target_area / aspect_ratio)));
403 int nw = floor(std::sqrt(((
float)target_area * aspect_ratio)));
404 if (nh >= 1 && nh <= im_height && nw >=1 && nw <= im_width) {
405 int height_offset = std::uniform_int_distribution<>(
406 0, im_height - nh)(*randgen);
407 int width_offset = std::uniform_int_distribution<>(
408 0,im_width - nw)(*randgen);
409 cv::Rect ROI(width_offset, height_offset, nw, nh);
410 cropping = (*img)(ROI);
414 cv::Size(crop, crop),
419 inception_scale_jitter =
true;
423 return inception_scale_jitter;
426 template <
class Context>
432 std::mt19937* randgen) {
442 if (use_caffe_datum_) {
445 CAFFE_ENFORCE(datum.ParseFromString(value));
447 prefetched_label_.
mutable_data<
int>()[item_id] = datum.label();
448 if (datum.encoded()) {
455 const_cast<char*
>(datum.data().data())),
456 color_ ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
459 CAFFE_ENFORCE(datum.channels() == 3 || datum.channels() == 1);
461 int src_c = datum.channels();
463 datum.height(), datum.width(), (src_c == 3) ? CV_8UC3 : CV_8UC1);
466 memcpy(src.ptr<uchar>(0), datum.data().data(), datum.data().size());
470 for (
int c = 0; c < 3; ++c) {
471 const char* datum_buffer =
472 datum.data().data() + datum.height() * datum.width() * c;
473 uchar* ptr = src.ptr<uchar>(0) + c;
474 for (
int h = 0; h < datum.height(); ++h) {
475 for (
int w = 0; w < datum.width(); ++w) {
476 *ptr = *(datum_buffer++);
486 CAFFE_ENFORCE(protos.ParseFromString(value));
487 const TensorProto& image_proto = protos.protos(0);
488 const TensorProto& label_proto = protos.protos(1);
489 vector<TensorProto> additional_output_protos;
490 int start = additional_inputs_offset_;
491 int end = start + additional_inputs_count_;
492 for (
int i = start; i < end; ++i) {
493 additional_output_protos.push_back(protos.protos(i));
496 if (protos.protos_size() == end + 1) {
498 const TensorProto& bounding_proto = protos.protos(end);
499 DCHECK_EQ(bounding_proto.data_type(), TensorProto::INT32);
500 DCHECK_EQ(bounding_proto.int32_data_size(), 4);
501 info.bounding_params.valid =
true;
502 info.bounding_params.ymin = bounding_proto.int32_data(0);
503 info.bounding_params.xmin = bounding_proto.int32_data(1);
504 info.bounding_params.height = bounding_proto.int32_data(2);
505 info.bounding_params.width = bounding_proto.int32_data(3);
508 if (image_proto.data_type() == TensorProto::STRING) {
510 DCHECK_EQ(image_proto.string_data_size(), 1);
511 const string& encoded_image_str = image_proto.string_data(0);
512 int encoded_size = encoded_image_str.size();
519 const_cast<char*>(encoded_image_str.data())),
520 color_ ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
521 }
else if (image_proto.data_type() == TensorProto::BYTE) {
523 int src_c = (image_proto.dims_size() == 3) ? image_proto.dims(2) : 1;
524 CAFFE_ENFORCE(src_c == 3 || src_c == 1);
529 (src_c == 3) ? CV_8UC3 : CV_8UC1);
532 image_proto.byte_data().data(),
533 image_proto.byte_data().size());
535 LOG(FATAL) <<
"Unknown image data type.";
538 if (label_proto.data_type() == TensorProto::FLOAT) {
539 if (label_type_ == SINGLE_LABEL || label_type_ == SINGLE_LABEL_WEIGHTED) {
540 DCHECK_EQ(label_proto.float_data_size(), 1);
542 label_proto.float_data(0);
543 }
else if (label_type_ == MULTI_LABEL_SPARSE) {
544 float* label_data = prefetched_label_.
mutable_data<
float>() +
545 item_id * num_labels_;
546 memset(label_data, 0,
sizeof(
float) * num_labels_);
547 for (
int i = 0; i < label_proto.float_data_size(); ++i) {
548 label_data[(int)label_proto.float_data(i)] = 1.0;
550 }
else if (label_type_ == MULTI_LABEL_WEIGHTED_SPARSE) {
551 const TensorProto& weight_proto = protos.protos(2);
553 prefetched_label_.
mutable_data<
float>() + item_id * num_labels_;
554 memset(label_data, 0,
sizeof(
float) * num_labels_);
555 for (
int i = 0; i < label_proto.float_data_size(); ++i) {
556 label_data[(int)label_proto.float_data(i)] =
557 weight_proto.float_data(i);
559 }
else if (label_type_ == MULTI_LABEL_DENSE) {
560 CAFFE_ENFORCE(label_proto.float_data_size() == num_labels_);
561 float* label_data = prefetched_label_.
mutable_data<
float>() +
562 item_id * num_labels_;
563 for (
int i = 0; i < label_proto.float_data_size(); ++i) {
564 label_data[i] = label_proto.float_data(i);
567 LOG(ERROR) <<
"Unknown label type:" << label_type_;
569 }
else if (label_proto.data_type() == TensorProto::INT32) {
570 if (label_type_ == SINGLE_LABEL || label_type_ == SINGLE_LABEL_WEIGHTED) {
571 DCHECK_EQ(label_proto.int32_data_size(), 1);
573 label_proto.int32_data(0);
574 }
else if (label_type_ == MULTI_LABEL_SPARSE) {
575 int* label_data = prefetched_label_.
mutable_data<
int>() +
576 item_id * num_labels_;
577 memset(label_data, 0,
sizeof(
int) * num_labels_);
578 for (
int i = 0; i < label_proto.int32_data_size(); ++i) {
579 label_data[label_proto.int32_data(i)] = 1;
581 }
else if (label_type_ == MULTI_LABEL_WEIGHTED_SPARSE) {
582 const TensorProto& weight_proto = protos.protos(2);
584 prefetched_label_.
mutable_data<
float>() + item_id * num_labels_;
585 memset(label_data, 0,
sizeof(
float) * num_labels_);
586 for (
int i = 0; i < label_proto.int32_data_size(); ++i) {
587 label_data[label_proto.int32_data(i)] = weight_proto.float_data(i);
589 }
else if (label_type_ == MULTI_LABEL_DENSE) {
590 CAFFE_ENFORCE(label_proto.int32_data_size() == num_labels_);
591 int* label_data = prefetched_label_.
mutable_data<
int>() +
592 item_id * num_labels_;
593 for (
int i = 0; i < label_proto.int32_data_size(); ++i) {
594 label_data[i] = label_proto.int32_data(i);
597 LOG(ERROR) <<
"Unknown label type:" << label_type_;
600 LOG(FATAL) <<
"Unsupported label data type.";
603 for (
int i = 0; i < additional_output_protos.size(); ++i) {
604 auto additional_output_proto = additional_output_protos[i];
606 if (additional_output_proto.data_type() == TensorProto::FLOAT) {
607 float* additional_output =
608 prefetched_additional_outputs_[i].template mutable_data<float>() +
609 item_id * additional_output_proto.float_data_size();
611 for (
int j = 0; j < additional_output_proto.float_data_size(); ++j) {
612 additional_output[j] = additional_output_proto.float_data(j);
614 }
else if (additional_output_proto.data_type() == TensorProto::INT32) {
615 int* additional_output =
616 prefetched_additional_outputs_[i].template mutable_data<int>() +
617 item_id * additional_output_proto.int32_data_size();
619 for (
int j = 0; j < additional_output_proto.int32_data_size(); ++j) {
620 additional_output[j] = additional_output_proto.int32_data(j);
622 }
else if (additional_output_proto.data_type() == TensorProto::INT64) {
623 int64_t* additional_output =
624 prefetched_additional_outputs_[i].template mutable_data<int64_t>() +
625 item_id * additional_output_proto.int64_data_size();
627 for (
int j = 0; j < additional_output_proto.int64_data_size(); ++j) {
628 additional_output[j] = additional_output_proto.int64_data(j);
632 LOG(FATAL) <<
"Unsupported output type.";
640 int out_c = color_ ? 3 : 1;
641 if (out_c == src.channels()) {
644 cv::cvtColor(src, *img, (out_c == 1) ? CV_BGR2GRAY : CV_GRAY2BGR);
648 CAFFE_ENFORCE(img->isContinuous());
653 if (info.bounding_params.valid
654 && (src.rows < info.bounding_params.ymin + info.bounding_params.height
655 || src.cols < info.bounding_params.xmin + info.bounding_params.width
657 info.bounding_params.valid =
false;
661 if (info.bounding_params.valid) {
663 cv::Rect bounding_box(info.bounding_params.xmin, info.bounding_params.ymin,
664 info.bounding_params.width, info.bounding_params.height);
665 *img = (*img)(bounding_box);
679 bool inception_scale_jitter =
false;
680 if (scale_jitter_type_ == INCEPTION_STYLE) {
683 inception_scale_jitter = RandomSizedCropping<Context>(img, crop_, randgen);
688 if ((scale_jitter_type_ == NO_SCALE_JITTER) ||
689 (scale_jitter_type_ == INCEPTION_STYLE && !inception_scale_jitter)) {
690 int scaled_width, scaled_height;
691 int scale_to_use = scale_ > 0 ? scale_ : minsize_;
694 if (random_scaling_) {
695 scale_to_use = std::uniform_int_distribution<>(random_scale_[0],
696 random_scale_[1])(*randgen);
700 scaled_width = scale_to_use;
701 scaled_height = scale_to_use;
702 }
else if (img->rows > img->cols) {
703 scaled_width = scale_to_use;
705 static_cast<float>(img->rows) * scale_to_use / img->cols;
707 scaled_height = scale_to_use;
709 static_cast<float>(img->cols) * scale_to_use / img->rows;
712 (scaled_height != img->rows || scaled_width != img->cols))
713 || (scaled_height > img->rows || scaled_width > img->cols)) {
723 cv::Size(scaled_width, scaled_height),
735 template <
class Context>
739 const float alpha_rand,
740 std::mt19937* randgen
743 std::uniform_real_distribution<float>(-alpha_rand, alpha_rand)(*randgen);
746 for (
int h = 0; h < img_size; ++h) {
747 for (
int w = 0; w < img_size; ++w) {
748 float gray_color = img[3 * p] * 0.114f + img[3 * p + 1] * 0.587f +
749 img[3 * p + 2] * 0.299f;
750 for (
int c = 0; c < 3; ++c) {
751 img[3 * p + c] = img[3 * p + c] * alpha + gray_color * (1.0f - alpha);
759 template <
class Context>
763 const float alpha_rand,
764 std::mt19937* randgen
767 std::uniform_real_distribution<float>(-alpha_rand, alpha_rand)(*randgen);
769 for (
int h = 0; h < img_size; ++h) {
770 for (
int w = 0; w < img_size; ++w) {
771 for (
int c = 0; c < 3; ++c) {
779 template <
class Context>
783 const float alpha_rand,
784 std::mt19937* randgen
788 for (
int h = 0; h < img_size; ++h) {
789 for (
int w = 0; w < img_size; ++w) {
791 gray_mean += img[3 * p] * 0.114f + img[3 * p + 1] * 0.587f +
792 img[3 * p + 2] * 0.299f;
796 gray_mean /= (img_size * img_size);
799 std::uniform_real_distribution<float>(-alpha_rand, alpha_rand)(*randgen);
801 for (
int h = 0; h < img_size; ++h) {
802 for (
int w = 0; w < img_size; ++w) {
803 for (
int c = 0; c < 3; ++c) {
804 img[p] = img[p] * alpha + gray_mean * (1.0f - alpha);
812 template <
class Context>
816 const float saturation,
817 const float brightness,
818 const float contrast,
819 std::mt19937* randgen
821 std::srand (
unsigned(std::time(0)));
822 std::vector<int> jitter_order{0, 1, 2};
824 unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
825 std::shuffle(jitter_order.begin(), jitter_order.end(),
826 std::default_random_engine(seed));
828 for (
int i = 0; i < 3; ++i) {
829 if (jitter_order[i] == 0) {
830 Saturation<Context>(img, img_size, saturation, randgen);
831 }
else if (jitter_order[i] == 1) {
832 Brightness<Context>(img, img_size, brightness, randgen);
834 Contrast<Context>(img, img_size, contrast, randgen);
840 template <
class Context>
844 const float alpha_std,
845 const std::vector<std::vector<float>>& eigvecs,
846 const std::vector<float>& eigvals,
847 std::mt19937* randgen
849 std::normal_distribution<float> d(0, alpha_std);
850 std::vector<float> alphas(3);
851 for (
int i = 0; i < 3; ++i) {
852 alphas[i] = d(*randgen);
855 std::vector<float> delta_rgb(3, 0.0);
856 for (
int i = 0; i < 3; ++i) {
857 for (
int j = 0; j < 3; ++j) {
858 delta_rgb[i] += eigvecs[i][j] * eigvals[j] * alphas[j];
863 for (
int h = 0; h < img_size; ++h) {
864 for (
int w = 0; w < img_size; ++w) {
865 for (
int c = 0; c < 3; ++c) {
866 img[p++] += delta_rgb[2 - c];
875 template <
class Context>
876 void ColorNormalization(
880 const std::vector<float>& mean,
881 const std::vector<float>&
std 884 for (
int h = 0; h < img_size; ++h) {
885 for (
int w = 0; w < img_size; ++w) {
886 for (
int c = 0; c < channels; ++c) {
887 img[p] = (img[p] - mean[c]) * std[c];
895 template <
class Context>
897 const cv::Mat& scaled_img,
900 const bool color_jitter,
901 const float saturation,
902 const float brightness,
903 const float contrast,
904 const bool color_lighting,
905 const float color_lighting_std,
906 const std::vector<std::vector<float>>& color_lighting_eigvecs,
907 const std::vector<float>& color_lighting_eigvals,
910 const std::vector<float>& mean,
911 const std::vector<float>& std,
912 std::mt19937* randgen,
913 std::bernoulli_distribution* mirror_this_image,
914 bool is_test =
false) {
916 scaled_img.rows, crop,
"Image height must be bigger than crop.");
918 scaled_img.cols, crop,
"Image width must be bigger than crop.");
921 int width_offset, height_offset;
923 width_offset = (scaled_img.cols - crop) / 2;
924 height_offset = (scaled_img.rows - crop) / 2;
927 std::uniform_int_distribution<>(0, scaled_img.cols - crop)(*randgen);
929 std::uniform_int_distribution<>(0, scaled_img.rows - crop)(*randgen);
932 float* image_data_ptr = image_data;
933 if (!is_test && mirror && (*mirror_this_image)(*randgen)) {
935 for (
int h = height_offset; h < height_offset + crop; ++h) {
936 for (
int w = width_offset + crop - 1; w >= width_offset; --w) {
937 const uint8_t* cv_data = scaled_img.ptr(h) + w * channels;
938 for (
int c = 0; c < channels; ++c) {
939 *(image_data_ptr++) = static_cast<float>(cv_data[c]);
945 for (
int h = height_offset; h < height_offset + crop; ++h) {
946 for (
int w = width_offset; w < width_offset + crop; ++w) {
947 const uint8_t* cv_data = scaled_img.ptr(h) + w * channels;
948 for (
int c = 0; c < channels; ++c) {
949 *(image_data_ptr++) = static_cast<float>(cv_data[c]);
955 if (color_jitter && channels == 3 && !is_test) {
956 ColorJitter<Context>(image_data, crop, saturation, brightness, contrast,
959 if (color_lighting && channels == 3 && !is_test) {
960 ColorLighting<Context>(image_data, crop, color_lighting_std,
961 color_lighting_eigvecs, color_lighting_eigvals, randgen);
966 ColorNormalization<Context>(image_data, crop, channels, mean, std);
971 template <
class Context>
972 void CropTransposeImage(
const cv::Mat& scaled_img,
const int channels,
973 uint8_t *cropped_data,
const int crop,
974 const bool mirror, std::mt19937 *randgen,
975 std::bernoulli_distribution *mirror_this_image,
976 bool is_test =
false) {
978 scaled_img.rows, crop,
"Image height must be bigger than crop.");
980 scaled_img.cols, crop,
"Image width must be bigger than crop.");
983 int width_offset, height_offset;
985 width_offset = (scaled_img.cols - crop) / 2;
986 height_offset = (scaled_img.rows - crop) / 2;
989 std::uniform_int_distribution<>(0, scaled_img.cols - crop)(*randgen);
991 std::uniform_int_distribution<>(0, scaled_img.rows - crop)(*randgen);
994 if (mirror && (*mirror_this_image)(*randgen)) {
996 for (
int h = height_offset; h < height_offset + crop; ++h) {
997 for (
int w = width_offset + crop - 1; w >= width_offset; --w) {
998 const uint8_t* cv_data = scaled_img.ptr(h) + w*channels;
999 for (
int c = 0; c < channels; ++c) {
1000 *(cropped_data++) = cv_data[c];
1006 for (
int h = height_offset; h < height_offset + crop; ++h) {
1007 for (
int w = width_offset; w < width_offset + crop; ++w) {
1008 const uint8_t* cv_data = scaled_img.ptr(h) + w*channels;
1009 for (
int c = 0; c < channels; ++c) {
1010 *(cropped_data++) = cv_data[c];
1019 template <
class Context>
1021 const std::string& value,
float *image_data,
int item_id,
1022 const int channels, std::size_t thread_index) {
1024 CAFFE_ENFORCE((
int)thread_index < num_decode_threads_);
1026 std::bernoulli_distribution mirror_this_image(0.5f);
1027 std::mt19937* randgen = &(randgen_per_thread_[thread_index]);
1032 CHECK(GetImageAndLabelAndInfoFromDBValue(value, &img, info, item_id,
1036 TransformImage<Context>(img, channels, image_data,
1037 color_jitter_, img_saturation_, img_brightness_, img_contrast_,
1038 color_lighting_, color_lighting_std_, color_lighting_eigvecs_,
1039 color_lighting_eigvals_, crop_, mirror_, mean_, std_,
1040 randgen, &mirror_this_image, is_test_);
1043 template <
class Context>
1045 const std::string& value, uint8_t *image_data,
int item_id,
1046 const int channels, std::size_t thread_index) {
1048 CAFFE_ENFORCE((
int)thread_index < num_decode_threads_);
1050 std::bernoulli_distribution mirror_this_image(0.5f);
1051 std::mt19937* randgen = &(randgen_per_thread_[thread_index]);
1056 CHECK(GetImageAndLabelAndInfoFromDBValue(value, &img, info, item_id,
1060 CropTransposeImage<Context>(img, channels, image_data, crop_, mirror_,
1061 randgen, &mirror_this_image, is_test_);
1065 template <
class Context>
1067 if (!owned_reader_.get()) {
1071 reader_ = &OperatorBase::Input<db::DBReader>(0);
1073 const int channels = color_ ? 3 : 1;
1075 if (gpu_transform_) {
1085 for (
int item_id = 0; item_id < batch_size_; ++item_id) {
1086 std::string key, value;
1090 reader_->
Read(&key, &value);
1093 if( item_id == 0 ) {
1094 if( use_caffe_datum_ ) {
1097 TensorProtos protos;
1098 CAFFE_ENFORCE(protos.ParseFromString(value));
1099 TensorProto_DataType labeldt = protos.protos(1).data_type();
1100 if( labeldt == TensorProto::INT32 ) {
1102 }
else if ( labeldt == TensorProto::FLOAT) {
1105 LOG(FATAL) <<
"Unsupported label type.";
1108 for (
int i = 0; i < additional_inputs_count_; ++i) {
1109 int index = additional_inputs_offset_ + i;
1110 TensorProto additional_output_proto = protos.protos(index);
1112 if (additional_output_proto.data_type() == TensorProto::FLOAT) {
1113 prefetched_additional_outputs_[i].template mutable_data<float>();
1115 additional_output_proto.data_type() == TensorProto::INT32) {
1116 prefetched_additional_outputs_[i].template mutable_data<int>();
1118 additional_output_proto.data_type() == TensorProto::INT64) {
1119 prefetched_additional_outputs_[i].template mutable_data<int64_t>();
1121 LOG(FATAL) <<
"Unsupported output type.";
1129 if (gpu_transform_) {
1131 uint8_t* image_data = prefetched_image_.
mutable_data<uint8_t>() +
1132 crop_ * crop_ * channels * item_id;
1133 thread_pool_->runTaskWithID(std::bind(
1140 std::placeholders::_1));
1142 float* image_data = prefetched_image_.
mutable_data<
float>() +
1143 crop_ * crop_ * channels * item_id;
1144 thread_pool_->runTaskWithID(std::bind(
1151 std::placeholders::_1));
1154 thread_pool_->waitWorkComplete();
1158 if (!std::is_same<Context, CPUContext>::value) {
1159 prefetched_image_on_device_.CopyFrom(prefetched_image_, &context_);
1160 prefetched_label_on_device_.CopyFrom(prefetched_label_, &context_);
1162 for (
int i = 0; i < prefetched_additional_outputs_on_device_.size(); ++i) {
1163 prefetched_additional_outputs_on_device_[i].CopyFrom(
1164 prefetched_additional_outputs_[i], &context_);
1170 template <
class Context>
1172 auto* image_output = OperatorBase::Output<Tensor<Context> >(0);
1173 auto* label_output = OperatorBase::Output<Tensor<Context> >(1);
1174 vector<Tensor<Context>*> additional_outputs_output;
1176 for (
int i = 2; i < OutputSize(); ++i) {
1177 additional_outputs_output.push_back(
1183 if (std::is_same<Context, CPUContext>::value) {
1184 image_output->CopyFrom(prefetched_image_, &context_);
1185 label_output->CopyFrom(prefetched_label_, &context_);
1187 for (
int i = 0; i < additional_outputs_output.size(); ++i) {
1188 additional_outputs_output[i]->CopyFrom(
1189 prefetched_additional_outputs_[i], &context_);
1193 if (gpu_transform_) {
1194 if (!mean_std_copied_) {
1195 mean_gpu_.Resize(mean_.size());
1196 std_gpu_.Resize(std_.size());
1198 context_.template Copy<float, CPUContext, Context>(
1199 mean_.size(), mean_.data(), mean_gpu_.template mutable_data<float>());
1200 context_.template Copy<float, CPUContext, Context>(
1201 std_.size(), std_.data(), std_gpu_.template mutable_data<float>());
1202 mean_std_copied_ =
true;
1205 if (output_type_ == TensorProto_DataType_FLOAT) {
1206 TransformOnGPU<uint8_t,float,Context>(prefetched_image_on_device_,
1207 image_output, mean_gpu_,
1208 std_gpu_, &context_);
1209 }
else if (output_type_ == TensorProto_DataType_FLOAT16) {
1210 TransformOnGPU<uint8_t,float16,Context>(prefetched_image_on_device_,
1211 image_output, mean_gpu_,
1212 std_gpu_, &context_);
1217 image_output->CopyFrom(prefetched_image_on_device_, &context_);
1219 label_output->CopyFrom(prefetched_label_on_device_, &context_);
1221 for (
int i = 0; i < additional_outputs_output.size(); ++i) {
1222 additional_outputs_output[i]->CopyFrom(
1223 prefetched_additional_outputs_on_device_[i], &context_);
1230 #endif // CAFFE2_IMAGE_IMAGE_INPUT_OP_H_ void Read(string *key, string *value) const
Read a set of key and value from the db and move to next.
A reader wrapper for DB that also allows us to serialize it.
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
A helper class to index into arguments.
T * mutable_data()
Returns a typed pointer of the underlying storage.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
void Resize(Ts...dim_source)
Resizes a tensor.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...