1 #include "caffe2/core/operator.h" 5 #include "caffe2/core/logging.h" 6 #include "caffe2/core/net.h" 7 #include "caffe2/core/operator_gradient.h" 8 #include "caffe2/core/tensor.h" 9 #include "caffe2/core/types.h" 10 #include "caffe2/core/workspace.h" 12 #include "caffe2/proto/caffe2.pb.h" 13 #include "caffe2/utils/proto_utils.h" 14 #include "caffe2/utils/string_utils.h" 17 caffe2_operator_max_engine_name_length,
19 "Maximum engine name length to be stored");
21 caffe2_disable_implicit_engine_preference,
23 "If set, disable implicit engine preferences. This is useful for unit " 24 "testing and debugging cases.");
28 OperatorBase::OperatorBase(
const OperatorDef& operator_def, Workspace* ws)
30 operator_def_(
std::make_shared<OperatorDef>(operator_def)),
32 operator_def.has_device_option() ? operator_def.device_option()
34 event_(
caffe2::make_unique<Event>(device_option_)) {
35 for (
const string& input_str : operator_def.input()) {
36 auto* blob = ws->GetBlob(input_str);
41 ": Encountered a non-existing input blob: ",
43 inputs_.push_back(blob);
46 GetOperatorLogger()(operator_def);
48 for (
const string& output_str : operator_def.output()) {
49 outputs_.push_back(CHECK_NOTNULL(ws->CreateBlob(output_str)));
53 vector<TensorShape> OperatorBase::InputTensorShapes() {
54 vector<TensorShape> tps;
55 for (
const auto& blob : inputs_) {
56 tps.push_back(GetTensorShapeOfBlob(blob));
63 PerOpEnginePrefType& g_per_op_engine_pref() {
64 static auto* g_per_op_engine_pref_ =
new PerOpEnginePrefType();
65 return *g_per_op_engine_pref_;
68 GlobalEnginePrefType& g_global_engine_pref() {
69 static auto* g_global_engine_pref_ =
70 new GlobalEnginePrefType{{DeviceType::CUDA, {
"CUDNN"}}};
71 return *g_global_engine_pref_;
74 unique_ptr<OperatorBase> TryCreateOperator(
75 const string& key,
const OperatorDef& operator_def, Workspace* ws) {
76 const auto& type = operator_def.device_option().device_type();
78 gDeviceTypeRegistry()->count(type),
82 OperatorRegistry* registry = gDeviceTypeRegistry()->at(type);
83 VLOG(1) <<
"Creating operator with device type " << type;
85 return registry->Create(key, operator_def, ws);
86 }
catch (
const UnsupportedOperatorFeature& err) {
87 LOG(WARNING) <<
"Operator " << operator_def.type()
88 <<
" does not support the requested feature. Msg: " 90 <<
". Proto is: " << ProtoDebugString(operator_def);
95 unique_ptr<OperatorBase> _CreateOperator(
96 const OperatorDef& operator_def,
98 static StaticLinkingProtector g_protector;
99 const auto& op_type = operator_def.type();
100 const auto& device_type = operator_def.device_option().device_type();
102 #ifndef CAFFE2_NO_OPERATOR_SCHEMA 104 auto* schema = OpSchemaRegistry::Schema(op_type);
107 schema->Verify(operator_def),
108 "Operator def did not pass schema checking: ",
109 ProtoDebugString(operator_def));
114 LOG(ERROR) <<
"Cannot find operator schema for " << op_type
115 <<
". Will skip schema checking.";
120 std::vector<std::string> engines{};
121 if (operator_def.engine().size()) {
122 const auto op_def_engines = split(
',', operator_def.engine());
123 engines.insert(engines.end(), op_def_engines.begin(), op_def_engines.end());
125 if (!FLAGS_caffe2_disable_implicit_engine_preference &&
126 g_per_op_engine_pref().count(device_type) &&
127 g_per_op_engine_pref()[device_type].count(op_type)) {
128 const auto& preferred_engines =
129 g_per_op_engine_pref()[device_type][op_type];
130 VLOG(2) <<
"Inserting per-op engine preference: " << preferred_engines;
132 engines.end(), preferred_engines.begin(), preferred_engines.end());
134 if (!FLAGS_caffe2_disable_implicit_engine_preference &&
135 g_global_engine_pref().count(device_type)) {
136 const auto& preferred_engines = g_global_engine_pref()[device_type];
137 VLOG(2) <<
"Inserting global engine preference: " << preferred_engines;
139 engines.end(), preferred_engines.begin(), preferred_engines.end());
141 for (
const auto& engine : engines) {
142 const std::string key = OpRegistryKey(op_type, engine);
143 VLOG(1) <<
"Trying to create operator " << op_type <<
" with engine " 145 auto op = TryCreateOperator(key, operator_def, ws);
147 if (engine.size() <= FLAGS_caffe2_operator_max_engine_name_length) {
148 op->annotate_engine(engine);
151 engine.substr(0, FLAGS_caffe2_operator_max_engine_name_length));
157 VLOG(1) <<
"Engine " << engine
158 <<
" is not available for operator " << op_type <<
".";
161 if (operator_def.engine().size() && !VLOG_IS_ON(1)) {
162 static int log_occurrences = 0;
163 if (log_occurrences <= 64) {
165 LOG(INFO) <<
"Engine " << operator_def.engine()
166 <<
" is not available for operator " << op_type <<
".";
169 VLOG(1) <<
"Using default implementation.";
172 auto op = TryCreateOperator(op_type, operator_def, ws);
175 "Cannot create operator of type '",
178 DeviceTypeName(device_type),
179 "'. Verify that implementation for the corresponding device exist. It " 180 "might also happen if the binary is not linked with the operator " 181 "implementation code. If Python frontend is used it might happen if " 182 "dyndep.InitOpsLibrary call is missing. Operator def: ",
183 ProtoDebugString(operator_def));
189 const std::string OpRegistryKey(
190 const std::string& op_type,
191 const std::string& engine) {
192 if (engine ==
"" || engine ==
"DEFAULT") {
195 return op_type +
"_ENGINE_" + engine;
199 void SetPerOpEnginePref(
const PerOpEnginePrefType& per_op_engine_pref) {
200 for (
const auto& device_pref_pair : per_op_engine_pref) {
201 const auto& device_type = device_pref_pair.first;
203 gDeviceTypeRegistry()->count(device_type),
207 auto* registry = gDeviceTypeRegistry()->at(device_type);
209 for (
const auto& op_pref_pair : device_pref_pair.second) {
210 const auto& op_type = op_pref_pair.first;
212 registry->Has(op_type),
215 " not registered in ",
220 g_per_op_engine_pref() = per_op_engine_pref;
223 void SetGlobalEnginePref(
const GlobalEnginePrefType& global_engine_pref) {
224 for (
const auto& device_pref_pair : global_engine_pref) {
225 const auto& device_type = device_pref_pair.first;
227 gDeviceTypeRegistry()->count(device_type),
232 g_global_engine_pref() = global_engine_pref;
236 const PerOpEnginePrefType& per_op_engine_pref,
237 const GlobalEnginePrefType& global_engine_pref) {
238 SetPerOpEnginePref(per_op_engine_pref);
239 SetGlobalEnginePref(global_engine_pref);
242 void SetOpEnginePref(
243 const std::string& op_type,
244 const CaffeMap<int, EnginePrefType>& op_pref) {
245 for (
const auto& device_pref_pair : op_pref) {
246 const auto& device_type = device_pref_pair.first;
248 gDeviceTypeRegistry()->count(device_type),
253 gDeviceTypeRegistry()->at(device_type)->Has(op_type),
256 " not registered in ",
259 g_per_op_engine_pref()[device_type][op_type] = device_pref_pair.second;
263 unique_ptr<OperatorBase> CreateOperator(
264 const OperatorDef& operator_def,
268 auto op = _CreateOperator(operator_def, ws);
269 op->set_net_position(net_position);
272 if (net_position != 0) {
273 VLOG(1) <<
"Operator constructor with net position " << net_position
275 ws->last_failed_op_net_position = net_position;
277 VLOG(1) <<
"Failed operator constructor doesn't have an id set";
283 std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() {
284 static std::map<int32_t, OperatorRegistry*> g_device_type_registry;
285 return &g_device_type_registry;
288 CAFFE_DEFINE_REGISTRY(
293 CAFFE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry);
295 CAFFE_DEFINE_REGISTRY(
296 CUDAOperatorRegistry,
300 CAFFE_REGISTER_DEVICE_TYPE(DeviceType::CUDA, CUDAOperatorRegistry);
302 CAFFE_DEFINE_REGISTRY(
305 const OperatorDef&,
const vector<GradientWrapper>&);
308 const OperatorDef& def,
const vector<GradientWrapper>& g_output) {
309 std::unique_ptr<GradientMakerBase> maker(
310 GradientRegistry()->Create(def.type(), def, g_output));
312 "Gradient maker for operator ", def.type(),
" not implemented.");
315 if (maker->CopyDeviceOption() && def.has_device_option()) {
316 for (OperatorDef& grad_def : meta.ops_) {
317 grad_def.mutable_device_option()->CopyFrom(def.device_option());
321 if (maker->CopyEngine() && def.has_engine()) {
322 for (OperatorDef& grad_def : meta.ops_) {
323 grad_def.set_engine(def.engine());
327 if (maker->CopyArguments() && def.arg_size()) {
328 for (OperatorDef& grad_def : meta.ops_) {
329 for (
auto& arg : def.arg()) {
330 grad_def.add_arg()->CopyFrom(arg);
335 for (
const OperatorDef& grad_def : meta.ops_) {
336 VLOG(1) <<
"Gradient ops: " << ProtoDebugString(grad_def);
340 CAFFE_ENFORCE_EQ(meta.g_input_.size(), def.input_size());
341 VLOG(1) <<
"Gradients:";
345 if (!grad.IsDense() && !grad.IsSparse()) {
346 VLOG(1) <<
"\t [no gradient]";
347 }
else if (grad.IsDense()) {
348 VLOG(1) <<
"\t [dense]" << grad.dense_;
351 grad.indices_.size() && grad.values_.size(),
352 "For sparse gradient, one should set both indices and values. " 353 "Currently we have: (" +
354 grad.indices_ +
", " + grad.values_ +
").");
355 VLOG(1) <<
"\t [sparse] " << grad.indices_ <<
", " << grad.values_;
361 static TensorShapes InferBlobShapesAndTypes(
362 CaffeMap<string, TensorShape>& blob_desc,
363 const vector<std::unique_ptr<NetDef>>& nets) {
364 for (
auto& defptr : nets) {
366 CaffeMap<string, string> unmatched_sum_blobs;
367 CaffeMap<string, TensorShape> reshape_cache;
369 for (
const OperatorDef& op : defptr.get()->op()) {
371 if (op.type().find(
"Dequeue") != std::string::npos ||
372 op.type().find(
"Enqueue") != std::string::npos) {
376 vector<TensorShape> input_desc;
377 bool found_all =
true;
378 for (
const string& in : op.input()) {
379 auto inp_desc = blob_desc.find(in);
380 if (inp_desc == blob_desc.end()) {
381 LOG(WARNING) <<
"Shape and type inference failed for input: " << in
382 <<
" for op " << op.type() <<
", skipping.";
386 input_desc.push_back(inp_desc->second);
391 auto op_schema = OpSchemaRegistry::Schema(op.type());
392 if (op_schema ==
nullptr) {
393 LOG(WARNING) <<
"Shape inference failed, no schema for: " << op.type();
400 if (op.type() ==
"Sum") {
401 TensorShape sum_shape;
402 for (
auto inp : op.input()) {
403 auto it = blob_desc.find(inp);
404 if (it != blob_desc.end() && !it->second.unknown_shape()) {
405 if (it->second.dims_size() > 0) {
406 sum_shape = blob_desc[inp];
411 for (
auto inp : op.input()) {
412 auto it = blob_desc.find(inp);
413 if (it == blob_desc.end() || it->second.unknown_shape()) {
414 blob_desc[inp] = sum_shape;
415 if (sum_shape.dims_size() == 0) {
417 unmatched_sum_blobs[inp] = op.output(0);
423 if (op.type() ==
"Reshape" && op.is_gradient_op()) {
424 CAFFE_ENFORCE(reshape_cache.find(op.input(1)) != reshape_cache.end());
425 TensorShape cached = reshape_cache[op.input(1)];
426 blob_desc[op.output(0)] = cached;
430 std::vector<TensorShape> out;
432 out = op_schema->InferTensor(op, input_desc);
433 if (op.is_gradient_op() && out.size()) {
439 CaffeMap<string, string> grads_to_params =
440 GradientMakerBase::MatchGradsToParams(op);
442 for (
int i = 0; i < out.size(); i++) {
443 if (out[i].unknown_shape()) {
444 std::string gradout = op.output(i);
446 if (grads_to_params.find(gradout) != grads_to_params.end()) {
447 std::string var = grads_to_params[gradout];
448 if (blob_desc.find(var) != blob_desc.end()) {
449 out[i] = blob_desc[var];
456 if (op.type() ==
"Reshape") {
459 reshape_cache[op.output(1)] = input_desc[0];
463 LOG(ERROR) <<
"Shape inference error: " << enf.msg();
464 LOG(ERROR) <<
"Operator: " << ProtoDebugString(op) << std::endl;
465 LOG(ERROR) <<
"Returning empty results.";
471 if (out.size() != op.output_size()) {
472 if (op.type() ==
"Slice") {
475 "For Slice operator, either shape of all output blobs are " 476 "inferred or shape of none can be inferred.");
479 "Invalid shape inference for operator ",
483 " outputs, but got ",
487 for (
int i = 0; i < out.size(); i++) {
488 blob_desc[op.output(i)] = out[i];
493 for (
auto& unmatched : unmatched_sum_blobs) {
494 if (blob_desc.find(unmatched.second) != blob_desc.end()) {
495 blob_desc[unmatched.first] = blob_desc[unmatched.second];
501 for (
auto kv : blob_desc) {
502 TensorShape& tp = kv.second;
503 TensorShape* tpnew = tps.add_shapes();
505 tpnew->set_name(kv.first);
510 TensorShape GetTensorShapeOfBlob(
const Blob* b) {
511 TypeCall type_fun = GetTypeCallFunction(b->
meta().
id());
512 TensorInfoCall tensor_info_fun = GetTensorInfoFunction(b->
meta().
id());
516 tp.set_data_type(TypeMetaToDataType(type_fun(b->GetRaw())));
518 if (tensor_info_fun) {
521 DeviceOption _device;
523 tensor_info_fun(b->GetRaw(), &_shares_data, &_capacity, &_device);
524 for (
auto d : shape) {
528 tp.set_unknown_shape(
true);
533 TensorShapes InferBlobShapesAndTypesFromWorkspace(
535 const vector<std::unique_ptr<NetDef>>& nets) {
536 CaffeMap<string, TensorShape> blob_desc;
538 const std::vector<string>& ws_blobs = ws->
Blobs();
539 for (
const auto& s : ws_blobs) {
541 TensorShape tp = GetTensorShapeOfBlob(b);
544 return InferBlobShapesAndTypes(blob_desc, nets);
547 TensorShapes InferBlobShapesAndTypesFromMap(
548 const CaffeMap<std::string, std::vector<TIndex>>& blob_dimensions,
549 const vector<std::unique_ptr<NetDef>>& nets) {
550 CaffeMap<string, TensorShape> blob_desc;
552 for (
const auto& blob : blob_dimensions) {
554 for (
auto d : blob.second) {
555 CAFFE_ENFORCE_GT(d, 0);
558 blob_desc[blob.first] = tp;
560 return InferBlobShapesAndTypes(blob_desc, nets);
563 std::map<string, std::pair<DeviceOption, DeviceOption>> ValidateTensorDevices(
565 const OperatorDef& op_def) {
566 std::map<string, std::pair<DeviceOption, DeviceOption>> mismatches;
567 DeviceOption op_device = op_def.device_option();
569 #ifndef CAFFE2_NO_OPERATOR_SCHEMA 571 auto op_schema = OpSchemaRegistry::Schema(op_def.type());
572 if (op_schema !=
nullptr) {
573 if (op_schema->inputs_can_cross_devices()) {
577 #endif // CAFFE2_NO_OPERATOR_SCHEMA 579 auto Check = [&](
const Blob& blob, std::string blob_name) {
580 TensorInfoCall tensor_info_fun = GetTensorInfoFunction(blob.
meta().
id());
581 if (tensor_info_fun) {
584 DeviceOption blob_device;
586 const_cast<Blob&>(blob).GetRaw(),
591 if (blob_device.device_type() == CUDA &&
592 blob_device.cuda_gpu_id() != op_device.cuda_gpu_id()) {
593 mismatches[blob_name] = std::make_pair(op_device, blob_device);
599 for (
int i = 0; i < op.InputSize(); i++) {
600 Check(op.InputBlob(i), op_def.input(i));
602 for (
int i = 0; i < op.OutputSize(); i++) {
603 Check(*op.OutputBlob(i), op_def.output(i));
608 std::set<std::string> GetRegisteredOperators() {
609 std::set<std::string> all_keys;
612 for (
const auto& name : CPUOperatorRegistry()->Keys()) {
613 all_keys.emplace(name);
616 for (
const auto& name : CUDAOperatorRegistry()->Keys()) {
617 all_keys.emplace(name);
Blob is a general container that hosts a typed pointer.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
const Blob * GetBlob(const string &name) const
Gets the blob with the given name as a const pointer.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
const TypeMeta & meta() const
Returns the meta info of the blob.
GradientOpsMeta GetGradientForOp(const OperatorDef &def, const vector< GradientWrapper > &g_output)
Gets the GradientOpsMeta for the given operator def.
vector< string > Blobs() const
Return a list of blob names.