17 #include "caffe2/core/logging.h" 18 #include "profile_observer.h" 22 void ProfileOperatorObserver::Dump()
const {
23 static std::mutex loggingMutex;
24 std::lock_guard<std::mutex> lock(loggingMutex);
26 LOG(INFO) <<
"--------- Starting operator " << subject_->debug_def().type()
27 <<
" op#" << getId() <<
" ---------";
28 for (
int i = 0; i < subject_->InputSize(); ++i) {
29 if (subject_->InputIsType<TensorCPU>(i)) {
30 const auto& tensor = subject_->Input<TensorCPU>(i);
31 const auto& name = subject_->debug_def().input(i);
32 TensorPrinter printer(name);
33 LOG(INFO) <<
"Input " << i <<
": " << printer.MetaStr(tensor);
34 }
else if (subject_->InputIsType<TensorCUDA>(i)) {
35 const auto& tensor = subject_->Input<TensorCUDA>(i);
36 const auto& name = subject_->debug_def().input(i);
37 TensorPrinter printer(name);
38 LOG(INFO) <<
"Input " << i <<
": " << printer.MetaStr(tensor);
43 for (
const auto& arg : subject_->debug_def().arg()) {
44 LOG(INFO) <<
"Argument " << a <<
": " << arg.ShortDebugString();
48 for (
int o = 0; o < subject_->OutputSize(); ++o) {
49 if (subject_->OutputIsType<TensorCPU>(o)) {
50 auto* tensor = subject_->Output<TensorCPU>(o);
51 const auto& name = subject_->debug_def().output(o);
52 TensorPrinter printer(name);
53 LOG(INFO) <<
"Output " << o <<
": " << printer.MetaStr(*tensor);
54 }
else if (subject_->OutputIsType<TensorCUDA>(o)) {
55 auto* tensor = subject_->Output<TensorCUDA>(o);
56 const auto& name = subject_->debug_def().output(o);
57 TensorPrinter printer(name);
58 LOG(INFO) <<
"Output " << o <<
": " << printer.MetaStr(*tensor);
62 LOG(INFO) <<
"--------- Finished operator " << subject_->debug_def().type()
63 <<
" in " << run_time_ <<
" ms ---------";
66 void ProfileOperatorObserver::Start() {
67 auto cudaOp = dynamic_cast_if_rtti<const Operator<CUDAContext>*>(subject_);
69 auto context = cudaOp->getContext();
71 cudaGetDevice(&device);
73 cudaSetDevice(context->cuda_gpu_id());
74 cudaEventCreate(&start_);
75 cudaEventRecord(start_, context->cuda_stream());
77 cudaSetDevice(device);
79 cudaError_t error = cudaGetLastError();
80 if (error != cudaSuccess) {
81 CAFFE_THROW(
"Encountered CUDA error Start: ", cudaGetErrorString(error));
88 void ProfileOperatorObserver::Stop() {
89 auto cudaOp = dynamic_cast_if_rtti<const Operator<CUDAContext>*>(subject_);
91 auto context = cudaOp->getContext();
93 cudaGetDevice(&device);
95 cudaSetDevice(context->cuda_gpu_id());
96 cudaEventCreate(&stop_);
97 cudaEventRecord(stop_, context->cuda_stream());
98 cudaEventSynchronize(stop_);
99 cudaEventElapsedTime(&run_time_, start_, stop_);
100 cudaEventDestroy(start_);
101 cudaEventDestroy(stop_);
103 cudaSetDevice(device);
105 cudaError_t error = cudaGetLastError();
106 if (error != cudaSuccess) {
107 CAFFE_THROW(
"Encountered CUDA error Stop: ", cudaGetErrorString(error));
116 std::unique_ptr<ObserverBase<OperatorBase>> ProfileOperatorObserver::rnnCopy(
117 OperatorBase* subject,
118 int rnn_order)
const {
119 return std::unique_ptr<ObserverBase<OperatorBase>>(
120 new ProfileOperatorObserver(
121 subject, netObserver_, net_position_, rnn_order));
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
float MilliSeconds()
Returns the elapsed time in milliseconds.