Caffe2 - C++ API
A deep learning, cross platform ML framework
profile_observer_gpu.cc
1 
17 #include "caffe2/core/logging.h"
18 #include "profile_observer.h"
19 
20 namespace caffe2 {
21 
22 void ProfileOperatorObserver::Dump() const {
23  static std::mutex loggingMutex;
24  std::lock_guard<std::mutex> lock(loggingMutex);
25 
26  LOG(INFO) << "--------- Starting operator " << subject_->debug_def().type()
27  << " op#" << getId() << " ---------";
28  for (int i = 0; i < subject_->InputSize(); ++i) {
29  if (subject_->InputIsType<TensorCPU>(i)) {
30  const auto& tensor = subject_->Input<TensorCPU>(i);
31  const auto& name = subject_->debug_def().input(i);
32  TensorPrinter printer(name);
33  LOG(INFO) << "Input " << i << ": " << printer.MetaStr(tensor);
34  } else if (subject_->InputIsType<TensorCUDA>(i)) {
35  const auto& tensor = subject_->Input<TensorCUDA>(i);
36  const auto& name = subject_->debug_def().input(i);
37  TensorPrinter printer(name);
38  LOG(INFO) << "Input " << i << ": " << printer.MetaStr(tensor);
39  }
40  }
41 
42  int a = 0;
43  for (const auto& arg : subject_->debug_def().arg()) {
44  LOG(INFO) << "Argument " << a << ": " << arg.ShortDebugString();
45  ++a;
46  }
47 
48  for (int o = 0; o < subject_->OutputSize(); ++o) {
49  if (subject_->OutputIsType<TensorCPU>(o)) {
50  auto* tensor = subject_->Output<TensorCPU>(o);
51  const auto& name = subject_->debug_def().output(o);
52  TensorPrinter printer(name);
53  LOG(INFO) << "Output " << o << ": " << printer.MetaStr(*tensor);
54  } else if (subject_->OutputIsType<TensorCUDA>(o)) {
55  auto* tensor = subject_->Output<TensorCUDA>(o);
56  const auto& name = subject_->debug_def().output(o);
57  TensorPrinter printer(name);
58  LOG(INFO) << "Output " << o << ": " << printer.MetaStr(*tensor);
59  }
60  }
61 
62  LOG(INFO) << "--------- Finished operator " << subject_->debug_def().type()
63  << " in " << run_time_ << " ms ---------";
64 }
65 
66 void ProfileOperatorObserver::Start() {
67  auto cudaOp = dynamic_cast_if_rtti<const Operator<CUDAContext>*>(subject_);
68  if (cudaOp) {
69  auto context = cudaOp->getContext();
70  int device;
71  cudaGetDevice(&device);
72 
73  cudaSetDevice(context->cuda_gpu_id());
74  cudaEventCreate(&start_);
75  cudaEventRecord(start_, context->cuda_stream());
76 
77  cudaSetDevice(device);
78 
79  cudaError_t error = cudaGetLastError();
80  if (error != cudaSuccess) {
81  CAFFE_THROW("Encountered CUDA error Start: ", cudaGetErrorString(error));
82  }
83  } else {
84  start_time_ = timer_.MilliSeconds();
85  }
86 }
87 
88 void ProfileOperatorObserver::Stop() {
89  auto cudaOp = dynamic_cast_if_rtti<const Operator<CUDAContext>*>(subject_);
90  if (cudaOp) {
91  auto context = cudaOp->getContext();
92  int device;
93  cudaGetDevice(&device);
94 
95  cudaSetDevice(context->cuda_gpu_id());
96  cudaEventCreate(&stop_);
97  cudaEventRecord(stop_, context->cuda_stream());
98  cudaEventSynchronize(stop_);
99  cudaEventElapsedTime(&run_time_, start_, stop_);
100  cudaEventDestroy(start_);
101  cudaEventDestroy(stop_);
102 
103  cudaSetDevice(device);
104 
105  cudaError_t error = cudaGetLastError();
106  if (error != cudaSuccess) {
107  CAFFE_THROW("Encountered CUDA error Stop: ", cudaGetErrorString(error));
108  }
109  } else {
110  run_time_ = timer_.MilliSeconds() - start_time_;
111  }
112 
113  Dump();
114 }
115 
116 std::unique_ptr<ObserverBase<OperatorBase>> ProfileOperatorObserver::rnnCopy(
117  OperatorBase* subject,
118  int rnn_order) const {
119  return std::unique_ptr<ObserverBase<OperatorBase>>(
120  new ProfileOperatorObserver(
121  subject, netObserver_, net_position_, rnn_order));
122 }
123 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
float MilliSeconds()
Returns the elapsed time in milliseconds.
Definition: timer.h:32