Caffe2 - C++ API
A deep learning, cross platform ML framework
net_simple.cc
1 #include "caffe2/core/net_simple.h"
2 #include "caffe2/core/net.h"
3 
4 #include <iostream>
5 #include <set>
6 #include <unordered_map>
7 #include <unordered_set>
8 
9 #include "caffe2/core/operator.h"
10 #include "caffe2/core/static_tracepoint.h"
11 #include "caffe2/core/timer.h"
12 #include "caffe2/proto/caffe2.pb.h"
13 #include "caffe2/utils/proto_utils.h"
14 
15 namespace caffe2 {
16 
17 SimpleNet::SimpleNet(
18  const std::shared_ptr<const NetDef>& net_def,
19  Workspace* ws)
20  : NetBase(net_def, ws) {
21  VLOG(1) << "Constructing SimpleNet " << net_def->name();
22  const bool net_def_has_device_option = net_def->has_device_option();
23  // Initialize the operators
24  for (int idx = 0; idx < net_def->op_size(); ++idx) {
25  const auto& operator_def = net_def->op(idx);
26  VLOG(1) << "Creating operator " << operator_def.name() << ": "
27  << operator_def.type();
28  std::unique_ptr<OperatorBase> op{nullptr};
29  if (!operator_def.has_device_option() && net_def_has_device_option) {
30  // In the case that the operator def does not specify a device option but
31  // the net def has a default option, we copy the device option over to the
32  // operator def.
33  OperatorDef temp_def(operator_def);
34  temp_def.mutable_device_option()->CopyFrom(net_def->device_option());
35  op = CreateOperator(temp_def, ws, idx);
36  } else {
37  op = CreateOperator(operator_def, ws, idx);
38  op->set_debug_def(
39  std::shared_ptr<const OperatorDef>{net_def, &(net_def->op(idx))});
40  }
41  operators_.emplace_back(std::move(op));
42  }
43 }
44 
45 bool SimpleNet::Run() {
46  StartAllObservers();
47  VLOG(1) << "Running net " << name_;
48  for (auto& op : operators_) {
49  VLOG(1) << "Running operator " << op->debug_def().name() << "("
50  << op->debug_def().type() << ").";
51 #ifdef CAFFE2_ENABLE_SDT
52  const auto& op_name = op->debug_def().name().c_str();
53  const auto& op_type = op->debug_def().type().c_str();
54  auto* op_ptr = op.get();
55  const auto& net_name = name_.c_str();
56  CAFFE_SDT(operator_start, net_name, op_name, op_type, op_ptr);
57 #endif
58  bool res = op->Run();
59 #ifdef CAFFE2_ENABLE_SDT
60  CAFFE_SDT(operator_done, net_name, op_name, op_type, op_ptr);
61 #endif
62  if (!res) {
63  LOG(ERROR) << "Operator failed: " << ProtoDebugString(op->debug_def());
64  return false;
65  }
66  }
67  StopAllObservers();
68  return true;
69 }
70 
71 bool SimpleNet::RunAsync() {
72  return Run();
73 }
74 
75 namespace {
76 template <typename A, typename B>
77 bool PairLargerThan(const std::pair<A, B>& x, const std::pair<A, B>& y) {
78  return x.second > y.second;
79 }
80 }
81 
83  const int warmup_runs,
84  const int main_runs,
85  const bool run_individual) {
86  /* Use std::cout because logging may be disabled */
87  std::cout << "Starting benchmark." << std::endl;
88  std::cout << "Running warmup runs." << std::endl;
89  CAFFE_ENFORCE(
90  warmup_runs >= 0,
91  "Number of warm up runs should be non negative, provided ",
92  warmup_runs,
93  ".");
94  for (int i = 0; i < warmup_runs; ++i) {
95  CAFFE_ENFORCE(Run(), "Warmup run ", i, " has failed.");
96  }
97 
98  std::cout << "Main runs." << std::endl;
99  CAFFE_ENFORCE(
100  main_runs >= 0,
101  "Number of main runs should be non negative, provided ",
102  main_runs,
103  ".");
104  Timer timer;
105  for (int i = 0; i < main_runs; ++i) {
106  CAFFE_ENFORCE(Run(), "Main run ", i, " has failed.");
107  }
108  auto millis = timer.MilliSeconds();
109  std::cout << "Main run finished. Milliseconds per iter: "
110  << millis / main_runs
111  << ". Iters per second: " << 1000.0 * main_runs / millis << std::endl;
112 
113  vector<float> time_per_op(operators_.size(), 0);
114  vector<uint64_t> flops_per_op;
115  vector<uint64_t> memory_bytes_per_op;
116  vector<uint64_t> param_bytes_per_op;
117  CaffeMap<string, float> time_per_op_type;
118  CaffeMap<string, float> flops_per_op_type;
119  CaffeMap<string, float> memory_bytes_per_op_type;
120  CaffeMap<string, float> param_bytes_per_op_type;
121  if (run_individual) {
122  for (int i = 0; i < main_runs; ++i) {
123  for (auto& op : operators_) {
124  op->ResetEvent();
125  }
126  int idx = 0;
127  for (auto& op : operators_) {
128  const string& op_type = op->debug_def().type();
129  if (i == 0) { // Gather flops on the first run.
130  auto* schema = OpSchemaRegistry::Schema(op_type);
131  if (schema && schema->HasCostInferenceFunction()) {
132  vector<TensorShape> shapes = op->InputTensorShapes();
133 
134  OpSchema::Cost cost = schema->InferCost(op->debug_def(), shapes);
135 
136  flops_per_op.emplace_back(cost.flops);
137  memory_bytes_per_op.emplace_back(cost.bytes_moved);
138  param_bytes_per_op.emplace_back(cost.params_bytes);
139 
140  flops_per_op_type[op_type] += cost.flops;
141  memory_bytes_per_op_type[op_type] += cost.bytes_moved;
142  param_bytes_per_op_type[op_type] += cost.params_bytes;
143  }
144  }
145  timer.Start();
146  CAFFE_ENFORCE(
147  op->Run(),
148  "operator ",
149  op->debug_def().name(),
150  "(",
151  op_type,
152  ") has failed.");
153  float spent = timer.MilliSeconds();
154  time_per_op[idx] += spent;
155  time_per_op_type[op_type] += spent;
156  ++idx;
157  }
158  }
159  int idx = 0;
160  for (auto& op : operators_) {
161  const string& op_type = op->debug_def().type();
162  const string& print_name =
163  (op->debug_def().name().size()
164  ? op->debug_def().name()
165  : (op->debug_def().output_size() ? op->debug_def().output(0)
166  : "NO_OUTPUT"));
167  std::stringstream flops_str;
168  if (idx < flops_per_op.size() && flops_per_op[idx]) {
169  flops_str << " (" << to_string(1.0e-9 * flops_per_op[idx]) << " GFLOP, "
170  << to_string(1.0e-6 * flops_per_op[idx] / time_per_op[idx])
171  << " GFLOPS)";
172  }
173  std::stringstream memory_bytes_str;
174  if (idx < memory_bytes_per_op.size() && memory_bytes_per_op[idx]) {
175  memory_bytes_str << " (" << to_string(1.0e-6 * memory_bytes_per_op[idx])
176  << " MB)";
177  }
178  std::stringstream param_bytes_str;
179  if (idx < param_bytes_per_op.size() && param_bytes_per_op[idx]) {
180  memory_bytes_str << " (" << to_string(1.0e-6 * param_bytes_per_op[idx])
181  << " MB)";
182  }
183  std::cout << "Operator #" << idx << " (" << print_name << ", " << op_type
184  << ") " << time_per_op[idx] / main_runs << " ms/iter"
185  << flops_str.str() << memory_bytes_str.str()
186  << param_bytes_str.str() << std::endl;
187  ++idx;
188  }
189  const std::vector<string> metric(
190  {"Time", "FLOP", "Feature Memory", "Parameter Memory"});
191  const std::vector<double> normalizer(
192  {1.0 / main_runs, 1.0e-9, 1.0e-6, 1.0e-6});
193  const std::vector<string> unit({"ms", "GFLOP", "MB", "MB"});
194 
195  std::vector<CaffeMap<string, float>*> metric_per_op_type_vec_vec;
196  metric_per_op_type_vec_vec.emplace_back(&time_per_op_type);
197  metric_per_op_type_vec_vec.emplace_back(&flops_per_op_type);
198  metric_per_op_type_vec_vec.emplace_back(&memory_bytes_per_op_type);
199  metric_per_op_type_vec_vec.emplace_back(&param_bytes_per_op_type);
200  for (int i = 0; i < metric_per_op_type_vec_vec.size(); ++i) {
201  std::cout << metric[i] << " per operator type:" << std::endl;
202  auto* item = metric_per_op_type_vec_vec[i];
203  std::vector<std::pair<string, float>> metric_per_op_type_vec(
204  (*item).begin(), (*item).end());
205  std::sort(
206  metric_per_op_type_vec.begin(),
207  metric_per_op_type_vec.end(),
208  PairLargerThan<string, float>);
209  float total_metric = 0.;
210  for (const auto& op_item : metric_per_op_type_vec) {
211  total_metric += op_item.second * normalizer[i];
212  }
213  for (const auto& op_item : metric_per_op_type_vec) {
214  float percent = 0.;
215  if (total_metric > 0.) {
216  percent = (100.0 * op_item.second * normalizer[i] / total_metric);
217  }
218  std::cout << std::setw(15) << std::setfill(' ')
219  << op_item.second * normalizer[i] << " " << unit[i] << ". "
220  << std::setw(10) << std::setfill(' ') << percent << "%. "
221  << op_item.first << std::endl;
222  }
223  std::cout << std::setw(15) << std::setfill(' ') << total_metric << " "
224  << unit[i] << " in Total" << std::endl;
225  }
226  }
227  // We will reuse time_per_op to return the result of BenchmarkNet.
228  for (int i = 0; i < time_per_op.size(); ++i) {
229  time_per_op[i] /= main_runs;
230  }
231  time_per_op.insert(time_per_op.begin(), millis / main_runs);
232  return time_per_op;
233 }
234 
235 REGISTER_NET(simple, SimpleNet);
236 
237 } // namespace caffe2
void Start()
Starts a timer.
Definition: timer.h:24
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
float MilliSeconds()
Returns the elapsed time in milliseconds.
Definition: timer.h:32
vector< float > TEST_Benchmark(const int warmup_runs, const int main_runs, const bool run_individual) override
Benchmarks a network.
Definition: net_simple.cc:82
A simple timer object for measuring time.
Definition: timer.h:16