Caffe2 - C++ API
A deep learning, cross platform ML framework
operator.cc
1 #include "caffe2/core/operator.h"
2 
3 #include <algorithm>
4 
5 #include "caffe2/core/logging.h"
6 #include "caffe2/core/net.h"
7 #include "caffe2/core/operator_gradient.h"
8 #include "caffe2/core/tensor.h"
9 #include "caffe2/core/types.h"
10 #include "caffe2/core/workspace.h"
11 
12 #include "caffe2/proto/caffe2.pb.h"
13 #include "caffe2/utils/proto_utils.h"
14 #include "caffe2/utils/string_utils.h"
15 
16 CAFFE2_DEFINE_int(
17  caffe2_operator_max_engine_name_length,
18  10,
19  "Maximum engine name length to be stored");
20 CAFFE2_DEFINE_bool(
21  caffe2_disable_implicit_engine_preference,
22  false,
23  "If set, disable implicit engine preferences. This is useful for unit "
24  "testing and debugging cases.");
25 
26 namespace caffe2 {
27 
28 OperatorBase::OperatorBase(const OperatorDef& operator_def, Workspace* ws)
29  : operator_ws_(ws),
30  operator_def_(std::make_shared<OperatorDef>(operator_def)),
31  device_option_(
32  operator_def.has_device_option() ? operator_def.device_option()
33  : DeviceOption()),
34  event_(caffe2::make_unique<Event>(device_option_)) {
35  for (const string& input_str : operator_def.input()) {
36  auto* blob = ws->GetBlob(input_str);
37  CAFFE_ENFORCE(
38  blob != nullptr,
39  "op ",
40  operator_def.type(),
41  ": Encountered a non-existing input blob: ",
42  input_str);
43  inputs_.push_back(blob);
44  }
45 
46  GetOperatorLogger()(operator_def);
47 
48  for (const string& output_str : operator_def.output()) {
49  outputs_.push_back(CHECK_NOTNULL(ws->CreateBlob(output_str)));
50  }
51 }
52 
53 vector<TensorShape> OperatorBase::InputTensorShapes() {
54  vector<TensorShape> tps;
55  for (const auto& blob : inputs_) {
56  tps.push_back(GetTensorShapeOfBlob(blob));
57  }
58  return tps;
59 }
60 
61 namespace {
62 
63 PerOpEnginePrefType& g_per_op_engine_pref() {
64  static auto* g_per_op_engine_pref_ = new PerOpEnginePrefType();
65  return *g_per_op_engine_pref_;
66 }
67 
68 GlobalEnginePrefType& g_global_engine_pref() {
69  static auto* g_global_engine_pref_ =
70  new GlobalEnginePrefType{{DeviceType::CUDA, {"CUDNN"}}};
71  return *g_global_engine_pref_;
72 }
73 
74 unique_ptr<OperatorBase> TryCreateOperator(
75  const string& key, const OperatorDef& operator_def, Workspace* ws) {
76  const auto& type = operator_def.device_option().device_type();
77  CAFFE_ENFORCE(
78  gDeviceTypeRegistry()->count(type),
79  "Device type ",
80  type,
81  " not registered.");
82  OperatorRegistry* registry = gDeviceTypeRegistry()->at(type);
83  VLOG(1) << "Creating operator with device type " << type;
84  try {
85  return registry->Create(key, operator_def, ws);
86  } catch (const UnsupportedOperatorFeature& err) {
87  LOG(WARNING) << "Operator " << operator_def.type()
88  << " does not support the requested feature. Msg: "
89  << err.what()
90  << ". Proto is: " << ProtoDebugString(operator_def);
91  return nullptr;
92  }
93 }
94 
95 unique_ptr<OperatorBase> _CreateOperator(
96  const OperatorDef& operator_def,
97  Workspace* ws) {
98  static StaticLinkingProtector g_protector;
99  const auto& op_type = operator_def.type();
100  const auto& device_type = operator_def.device_option().device_type();
101 
102 #ifndef CAFFE2_NO_OPERATOR_SCHEMA
103  // first, check with OpSchema if the operator is legal.
104  auto* schema = OpSchemaRegistry::Schema(op_type);
105  if (schema) {
106  CAFFE_ENFORCE(
107  schema->Verify(operator_def),
108  "Operator def did not pass schema checking: ",
109  ProtoDebugString(operator_def));
110  } else {
111  // We would like to recommend every op to register its schema, so if there
112  // is not one, we print a LOG_ERROR. But we will still allow the operator
113  // to be constructed.
114  LOG(ERROR) << "Cannot find operator schema for " << op_type
115  << ". Will skip schema checking.";
116  }
117 #endif
118 
119  // second try engines specified in the operator_def and preferred engines
120  std::vector<std::string> engines{};
121  if (operator_def.engine().size()) {
122  const auto op_def_engines = split(',', operator_def.engine());
123  engines.insert(engines.end(), op_def_engines.begin(), op_def_engines.end());
124  }
125  if (!FLAGS_caffe2_disable_implicit_engine_preference &&
126  g_per_op_engine_pref().count(device_type) &&
127  g_per_op_engine_pref()[device_type].count(op_type)) {
128  const auto& preferred_engines =
129  g_per_op_engine_pref()[device_type][op_type];
130  VLOG(2) << "Inserting per-op engine preference: " << preferred_engines;
131  engines.insert(
132  engines.end(), preferred_engines.begin(), preferred_engines.end());
133  }
134  if (!FLAGS_caffe2_disable_implicit_engine_preference &&
135  g_global_engine_pref().count(device_type)) {
136  const auto& preferred_engines = g_global_engine_pref()[device_type];
137  VLOG(2) << "Inserting global engine preference: " << preferred_engines;
138  engines.insert(
139  engines.end(), preferred_engines.begin(), preferred_engines.end());
140  }
141  for (const auto& engine : engines) {
142  const std::string key = OpRegistryKey(op_type, engine);
143  VLOG(1) << "Trying to create operator " << op_type << " with engine "
144  << engine;
145  auto op = TryCreateOperator(key, operator_def, ws);
146  if (op) {
147  if (engine.size() <= FLAGS_caffe2_operator_max_engine_name_length) {
148  op->annotate_engine(engine);
149  } else {
150  op->annotate_engine(
151  engine.substr(0, FLAGS_caffe2_operator_max_engine_name_length));
152  }
153  return op;
154  } else {
155  // If the above fails, we will just return the normal case with the
156  // default implementation.
157  VLOG(1) << "Engine " << engine
158  << " is not available for operator " << op_type << ".";
159  }
160  }
161  if (operator_def.engine().size() && !VLOG_IS_ON(1)) {
162  static int log_occurrences = 0;
163  if (log_occurrences <= 64) {
164  ++log_occurrences;
165  LOG(INFO) << "Engine " << operator_def.engine()
166  << " is not available for operator " << op_type << ".";
167  }
168  }
169  VLOG(1) << "Using default implementation.";
170 
171  // Lastly, if the engine does not work here, try using the default engine.
172  auto op = TryCreateOperator(op_type, operator_def, ws);
173  CAFFE_ENFORCE(
174  op,
175  "Cannot create operator of type '",
176  op_type,
177  "' on the device '",
178  DeviceTypeName(device_type),
179  "'. Verify that implementation for the corresponding device exist. It "
180  "might also happen if the binary is not linked with the operator "
181  "implementation code. If Python frontend is used it might happen if "
182  "dyndep.InitOpsLibrary call is missing. Operator def: ",
183  ProtoDebugString(operator_def));
184  return op;
185 }
186 
187 } // namespace
188 
189 const std::string OpRegistryKey(
190  const std::string& op_type,
191  const std::string& engine) {
192  if (engine == "" || engine == "DEFAULT") {
193  return op_type;
194  } else {
195  return op_type + "_ENGINE_" + engine;
196  }
197 }
198 
199 void SetPerOpEnginePref(const PerOpEnginePrefType& per_op_engine_pref) {
200  for (const auto& device_pref_pair : per_op_engine_pref) {
201  const auto& device_type = device_pref_pair.first;
202  CAFFE_ENFORCE(
203  gDeviceTypeRegistry()->count(device_type),
204  "Device type ",
205  device_type,
206  " not registered.");
207  auto* registry = gDeviceTypeRegistry()->at(device_type);
208 
209  for (const auto& op_pref_pair : device_pref_pair.second) {
210  const auto& op_type = op_pref_pair.first;
211  CAFFE_ENFORCE(
212  registry->Has(op_type),
213  "Operator type ",
214  op_type,
215  " not registered in ",
216  device_type,
217  " registry.");
218  }
219  }
220  g_per_op_engine_pref() = per_op_engine_pref;
221 }
222 
223 void SetGlobalEnginePref(const GlobalEnginePrefType& global_engine_pref) {
224  for (const auto& device_pref_pair : global_engine_pref) {
225  const auto& device_type = device_pref_pair.first;
226  CAFFE_ENFORCE(
227  gDeviceTypeRegistry()->count(device_type),
228  "Device type ",
229  device_type,
230  " not registered.");
231  }
232  g_global_engine_pref() = global_engine_pref;
233 }
234 
235 void SetEnginePref(
236  const PerOpEnginePrefType& per_op_engine_pref,
237  const GlobalEnginePrefType& global_engine_pref) {
238  SetPerOpEnginePref(per_op_engine_pref);
239  SetGlobalEnginePref(global_engine_pref);
240 }
241 
242 void SetOpEnginePref(
243  const std::string& op_type,
244  const CaffeMap<int, EnginePrefType>& op_pref) {
245  for (const auto& device_pref_pair : op_pref) {
246  const auto& device_type = device_pref_pair.first;
247  CAFFE_ENFORCE(
248  gDeviceTypeRegistry()->count(device_type),
249  "Device type ",
250  device_type,
251  " not registered.");
252  CAFFE_ENFORCE(
253  gDeviceTypeRegistry()->at(device_type)->Has(op_type),
254  "Operator type ",
255  op_type,
256  " not registered in ",
257  device_type,
258  " registry.");
259  g_per_op_engine_pref()[device_type][op_type] = device_pref_pair.second;
260  }
261 }
262 
263 unique_ptr<OperatorBase> CreateOperator(
264  const OperatorDef& operator_def,
265  Workspace* ws,
266  int net_position) {
267  try {
268  auto op = _CreateOperator(operator_def, ws);
269  op->set_net_position(net_position);
270  return op;
271  } catch (...) {
272  if (net_position != 0) {
273  VLOG(1) << "Operator constructor with net position " << net_position
274  << " failed";
275  ws->last_failed_op_net_position = net_position;
276  } else {
277  VLOG(1) << "Failed operator constructor doesn't have an id set";
278  }
279  throw;
280  }
281 }
282 
283 std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() {
284  static std::map<int32_t, OperatorRegistry*> g_device_type_registry;
285  return &g_device_type_registry;
286 }
287 
288 CAFFE_DEFINE_REGISTRY(
289  CPUOperatorRegistry,
290  OperatorBase,
291  const OperatorDef&,
292  Workspace*);
293 CAFFE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry);
294 
295 CAFFE_DEFINE_REGISTRY(
296  CUDAOperatorRegistry,
297  OperatorBase,
298  const OperatorDef&,
299  Workspace*);
300 CAFFE_REGISTER_DEVICE_TYPE(DeviceType::CUDA, CUDAOperatorRegistry);
301 
302 CAFFE_DEFINE_REGISTRY(
303  GradientRegistry,
304  GradientMakerBase,
305  const OperatorDef&, const vector<GradientWrapper>&);
306 
308  const OperatorDef& def, const vector<GradientWrapper>& g_output) {
309  std::unique_ptr<GradientMakerBase> maker(
310  GradientRegistry()->Create(def.type(), def, g_output));
311  CAFFE_ENFORCE(maker,
312  "Gradient maker for operator ", def.type(), " not implemented.");
313  GradientOpsMeta meta = maker->Get();
314  // Copy device option, engine, and arguments if needed.
315  if (maker->CopyDeviceOption() && def.has_device_option()) {
316  for (OperatorDef& grad_def : meta.ops_) {
317  grad_def.mutable_device_option()->CopyFrom(def.device_option());
318  }
319  }
320  // Copy engine if needed.
321  if (maker->CopyEngine() && def.has_engine()) {
322  for (OperatorDef& grad_def : meta.ops_) {
323  grad_def.set_engine(def.engine());
324  }
325  }
326  // Copy arguments if needed.
327  if (maker->CopyArguments() && def.arg_size()) {
328  for (OperatorDef& grad_def : meta.ops_) {
329  for (auto& arg : def.arg()) {
330  grad_def.add_arg()->CopyFrom(arg);
331  }
332  }
333  }
334  // VLOG for debugging purposes.
335  for (const OperatorDef& grad_def : meta.ops_) {
336  VLOG(1) << "Gradient ops: " << ProtoDebugString(grad_def);
337  }
338  // Check if the gradient computation has returned the right size for the
339  // gradient vector.
340  CAFFE_ENFORCE_EQ(meta.g_input_.size(), def.input_size());
341  VLOG(1) << "Gradients:";
342  for (const GradientWrapper& grad : meta.g_input_) {
343  // The gradient should either be (1) not set, or (2) dense, or (3) sparse,
344  // but cannot be both dense and sparse.
345  if (!grad.IsDense() && !grad.IsSparse()) {
346  VLOG(1) << "\t [no gradient]";
347  } else if (grad.IsDense()) {
348  VLOG(1) << "\t [dense]" << grad.dense_;
349  } else {
350  CAFFE_ENFORCE(
351  grad.indices_.size() && grad.values_.size(),
352  "For sparse gradient, one should set both indices and values. "
353  "Currently we have: (" +
354  grad.indices_ + ", " + grad.values_ + ").");
355  VLOG(1) << "\t [sparse] " << grad.indices_ << ", " << grad.values_;
356  }
357  }
358  return meta;
359 }
360 
361 static TensorShapes InferBlobShapesAndTypes(
362  CaffeMap<string, TensorShape>& blob_desc,
363  const vector<std::unique_ptr<NetDef>>& nets) {
364  for (auto& defptr : nets) {
365  // Hack to work with auto split gradients
366  CaffeMap<string, string> unmatched_sum_blobs;
367  CaffeMap<string, TensorShape> reshape_cache;
368 
369  for (const OperatorDef& op : defptr.get()->op()) {
370  // Hack to ignore queues
371  if (op.type().find("Dequeue") != std::string::npos ||
372  op.type().find("Enqueue") != std::string::npos) {
373  continue;
374  }
375 
376  vector<TensorShape> input_desc;
377  bool found_all = true;
378  for (const string& in : op.input()) {
379  auto inp_desc = blob_desc.find(in);
380  if (inp_desc == blob_desc.end()) {
381  LOG(WARNING) << "Shape and type inference failed for input: " << in
382  << " for op " << op.type() << ", skipping.";
383  found_all = false;
384  break;
385  }
386  input_desc.push_back(inp_desc->second);
387  }
388  if (!found_all) {
389  continue;
390  }
391  auto op_schema = OpSchemaRegistry::Schema(op.type());
392  if (op_schema == nullptr) {
393  LOG(WARNING) << "Shape inference failed, no schema for: " << op.type();
394  continue;
395  }
396 
397  // Special handling for Sum as it used with the autosplits, which have
398  // different naming convention. Assuming that all sum inputs must be of
399  // same size, we can infer their shapes.
400  if (op.type() == "Sum") {
401  TensorShape sum_shape;
402  for (auto inp : op.input()) {
403  auto it = blob_desc.find(inp);
404  if (it != blob_desc.end() && !it->second.unknown_shape()) {
405  if (it->second.dims_size() > 0) {
406  sum_shape = blob_desc[inp];
407  break;
408  }
409  }
410  }
411  for (auto inp : op.input()) {
412  auto it = blob_desc.find(inp);
413  if (it == blob_desc.end() || it->second.unknown_shape()) {
414  blob_desc[inp] = sum_shape;
415  if (sum_shape.dims_size() == 0) {
416  // Match later with the output
417  unmatched_sum_blobs[inp] = op.output(0);
418  }
419  }
420  }
421  }
422 
423  if (op.type() == "Reshape" && op.is_gradient_op()) {
424  CAFFE_ENFORCE(reshape_cache.find(op.input(1)) != reshape_cache.end());
425  TensorShape cached = reshape_cache[op.input(1)];
426  blob_desc[op.output(0)] = cached;
427  continue;
428  }
429 
430  std::vector<TensorShape> out;
431  try {
432  out = op_schema->InferTensor(op, input_desc);
433  if (op.is_gradient_op() && out.size()) {
434  // Special handling for gradient ops. We can assume gradients
435  // are of same size as the corresponding variables. This is bit
436  // ugly to base on string matching, but we don't have the connection
437  // between variable and its gradient specified
438 
439  CaffeMap<string, string> grads_to_params =
440  GradientMakerBase::MatchGradsToParams(op);
441 
442  for (int i = 0; i < out.size(); i++) {
443  if (out[i].unknown_shape()) {
444  std::string gradout = op.output(i);
445 
446  if (grads_to_params.find(gradout) != grads_to_params.end()) {
447  std::string var = grads_to_params[gradout];
448  if (blob_desc.find(var) != blob_desc.end()) {
449  out[i] = blob_desc[var];
450  }
451  }
452  }
453  }
454  }
455 
456  if (op.type() == "Reshape") {
457  // Reshape stores the original input shape to its second output
458  // blob. We need this for gradient reshape.
459  reshape_cache[op.output(1)] = input_desc[0];
460  }
461 
462  } catch (::caffe2::EnforceNotMet& enf) {
463  LOG(ERROR) << "Shape inference error: " << enf.msg();
464  LOG(ERROR) << "Operator: " << ProtoDebugString(op) << std::endl;
465  LOG(ERROR) << "Returning empty results.";
466 
467  TensorShapes tps;
468  return tps;
469  }
470 
471  if (out.size() != op.output_size()) {
472  if (op.type() == "Slice") {
473  CAFFE_ENFORCE(
474  out.size() == 0,
475  "For Slice operator, either shape of all output blobs are "
476  "inferred or shape of none can be inferred.");
477  } else {
478  CAFFE_THROW(
479  "Invalid shape inference for operator ",
480  op.type(),
481  " Expected ",
482  op.output_size(),
483  " outputs, but got ",
484  out.size());
485  }
486  } else {
487  for (int i = 0; i < out.size(); i++) {
488  blob_desc[op.output(i)] = out[i];
489  }
490  }
491  } // net.ops
492 
493  for (auto& unmatched : unmatched_sum_blobs) {
494  if (blob_desc.find(unmatched.second) != blob_desc.end()) {
495  blob_desc[unmatched.first] = blob_desc[unmatched.second];
496  }
497  }
498 
499  } // nets
500  TensorShapes tps;
501  for (auto kv : blob_desc) {
502  TensorShape& tp = kv.second;
503  TensorShape* tpnew = tps.add_shapes();
504  tpnew->CopyFrom(tp);
505  tpnew->set_name(kv.first);
506  }
507  return tps;
508 }
509 
510 TensorShape GetTensorShapeOfBlob(const Blob* b) {
511  TypeCall type_fun = GetTypeCallFunction(b->meta().id());
512  TensorInfoCall tensor_info_fun = GetTensorInfoFunction(b->meta().id());
513  TensorShape tp;
514 
515  if (type_fun) {
516  tp.set_data_type(TypeMetaToDataType(type_fun(b->GetRaw())));
517  }
518  if (tensor_info_fun) {
519  bool _shares_data;
520  size_t _capacity;
521  DeviceOption _device;
522  auto shape =
523  tensor_info_fun(b->GetRaw(), &_shares_data, &_capacity, &_device);
524  for (auto d : shape) {
525  tp.add_dims(d);
526  }
527  } else {
528  tp.set_unknown_shape(true);
529  }
530  return tp;
531 }
532 
533 TensorShapes InferBlobShapesAndTypesFromWorkspace(
534  Workspace* ws,
535  const vector<std::unique_ptr<NetDef>>& nets) {
536  CaffeMap<string, TensorShape> blob_desc;
537  // Populate shapes from workplace
538  const std::vector<string>& ws_blobs = ws->Blobs();
539  for (const auto& s : ws_blobs) {
540  Blob* b = ws->GetBlob(s);
541  TensorShape tp = GetTensorShapeOfBlob(b);
542  blob_desc[s] = tp;
543  }
544  return InferBlobShapesAndTypes(blob_desc, nets);
545 }
546 
547 TensorShapes InferBlobShapesAndTypesFromMap(
548  const CaffeMap<std::string, std::vector<TIndex>>& blob_dimensions,
549  const vector<std::unique_ptr<NetDef>>& nets) {
550  CaffeMap<string, TensorShape> blob_desc;
551  // Populate shapes from known blobs
552  for (const auto& blob : blob_dimensions) {
553  TensorShape tp;
554  for (auto d : blob.second) {
555  CAFFE_ENFORCE_GT(d, 0);
556  tp.add_dims(d);
557  }
558  blob_desc[blob.first] = tp;
559  }
560  return InferBlobShapesAndTypes(blob_desc, nets);
561 }
562 
563 std::map<string, std::pair<DeviceOption, DeviceOption>> ValidateTensorDevices(
564  OperatorBase& op,
565  const OperatorDef& op_def) {
566  std::map<string, std::pair<DeviceOption, DeviceOption>> mismatches;
567  DeviceOption op_device = op_def.device_option();
568 
569 #ifndef CAFFE2_NO_OPERATOR_SCHEMA
570  // Check from op schema if this op is used for crossing devices
571  auto op_schema = OpSchemaRegistry::Schema(op_def.type());
572  if (op_schema != nullptr) {
573  if (op_schema->inputs_can_cross_devices()) {
574  return mismatches;
575  }
576  }
577 #endif // CAFFE2_NO_OPERATOR_SCHEMA
578 
579  auto Check = [&](const Blob& blob, std::string blob_name) {
580  TensorInfoCall tensor_info_fun = GetTensorInfoFunction(blob.meta().id());
581  if (tensor_info_fun) {
582  bool _shares_data;
583  size_t _capacity;
584  DeviceOption blob_device;
585  tensor_info_fun(
586  const_cast<Blob&>(blob).GetRaw(),
587  &_shares_data,
588  &_capacity,
589  &blob_device);
590 
591  if (blob_device.device_type() == CUDA &&
592  blob_device.cuda_gpu_id() != op_device.cuda_gpu_id()) {
593  mismatches[blob_name] = std::make_pair(op_device, blob_device);
594  }
595  }
596  };
597 
598  // Check that inputs have same device type as the op
599  for (int i = 0; i < op.InputSize(); i++) {
600  Check(op.InputBlob(i), op_def.input(i));
601  }
602  for (int i = 0; i < op.OutputSize(); i++) {
603  Check(*op.OutputBlob(i), op_def.output(i));
604  }
605  return mismatches;
606 }
607 
608 std::set<std::string> GetRegisteredOperators() {
609  std::set<std::string> all_keys;
610 
611  // CPU operators
612  for (const auto& name : CPUOperatorRegistry()->Keys()) {
613  all_keys.emplace(name);
614  }
615  // CUDA operators
616  for (const auto& name : CUDAOperatorRegistry()->Keys()) {
617  all_keys.emplace(name);
618  }
619 
620  return all_keys;
621 }
622 
623 } // namespace caffe2
Blob is a general container that hosts a typed pointer.
Definition: blob.h:25
Definition: types.h:72
A struct that holds the gradient operators and related gradient maps.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
const Blob * GetBlob(const string &name) const
Gets the blob with the given name as a const pointer.
Definition: workspace.cc:164
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
const CaffeTypeId & id() const
Returns the type id.
Definition: typeid.h:137
const TypeMeta & meta() const
Returns the meta info of the blob.
Definition: blob.h:63
GradientOpsMeta GetGradientForOp(const OperatorDef &def, const vector< GradientWrapper > &g_output)
Gets the GradientOpsMeta for the given operator def.
Definition: operator.cc:307
vector< string > Blobs() const
Return a list of blob names.
Definition: workspace.cc:84