1 #include "caffe2/core/net_simple.h" 2 #include "caffe2/core/net.h" 6 #include <unordered_map> 7 #include <unordered_set> 9 #include "caffe2/core/operator.h" 10 #include "caffe2/core/static_tracepoint.h" 11 #include "caffe2/core/timer.h" 12 #include "caffe2/proto/caffe2.pb.h" 13 #include "caffe2/utils/proto_utils.h" 18 const std::shared_ptr<const NetDef>& net_def,
20 : NetBase(net_def, ws) {
21 VLOG(1) <<
"Constructing SimpleNet " << net_def->name();
22 const bool net_def_has_device_option = net_def->has_device_option();
24 for (
int idx = 0; idx < net_def->op_size(); ++idx) {
25 const auto& operator_def = net_def->op(idx);
26 VLOG(1) <<
"Creating operator " << operator_def.name() <<
": " 27 << operator_def.type();
28 std::unique_ptr<OperatorBase> op{
nullptr};
29 if (!operator_def.has_device_option() && net_def_has_device_option) {
33 OperatorDef temp_def(operator_def);
34 temp_def.mutable_device_option()->CopyFrom(net_def->device_option());
35 op = CreateOperator(temp_def, ws, idx);
37 op = CreateOperator(operator_def, ws, idx);
39 std::shared_ptr<const OperatorDef>{net_def, &(net_def->op(idx))});
41 operators_.emplace_back(std::move(op));
45 bool SimpleNet::Run() {
47 VLOG(1) <<
"Running net " << name_;
48 for (
auto& op : operators_) {
49 VLOG(1) <<
"Running operator " << op->debug_def().name() <<
"(" 50 << op->debug_def().type() <<
").";
51 #ifdef CAFFE2_ENABLE_SDT 52 const auto& op_name = op->debug_def().name().c_str();
53 const auto& op_type = op->debug_def().type().c_str();
54 auto* op_ptr = op.get();
55 const auto& net_name = name_.c_str();
56 CAFFE_SDT(operator_start, net_name, op_name, op_type, op_ptr);
59 #ifdef CAFFE2_ENABLE_SDT 60 CAFFE_SDT(operator_done, net_name, op_name, op_type, op_ptr);
63 LOG(ERROR) <<
"Operator failed: " << ProtoDebugString(op->debug_def());
71 bool SimpleNet::RunAsync() {
76 template <
typename A,
typename B>
77 bool PairLargerThan(
const std::pair<A, B>& x,
const std::pair<A, B>& y) {
78 return x.second > y.second;
83 const int warmup_runs,
85 const bool run_individual) {
87 std::cout <<
"Starting benchmark." << std::endl;
88 std::cout <<
"Running warmup runs." << std::endl;
91 "Number of warm up runs should be non negative, provided ",
94 for (
int i = 0; i < warmup_runs; ++i) {
95 CAFFE_ENFORCE(Run(),
"Warmup run ", i,
" has failed.");
98 std::cout <<
"Main runs." << std::endl;
101 "Number of main runs should be non negative, provided ",
105 for (
int i = 0; i < main_runs; ++i) {
106 CAFFE_ENFORCE(Run(),
"Main run ", i,
" has failed.");
109 std::cout <<
"Main run finished. Milliseconds per iter: " 110 << millis / main_runs
111 <<
". Iters per second: " << 1000.0 * main_runs / millis << std::endl;
113 vector<float> time_per_op(operators_.size(), 0);
114 vector<uint64_t> flops_per_op;
115 vector<uint64_t> memory_bytes_per_op;
116 vector<uint64_t> param_bytes_per_op;
117 CaffeMap<string, float> time_per_op_type;
118 CaffeMap<string, float> flops_per_op_type;
119 CaffeMap<string, float> memory_bytes_per_op_type;
120 CaffeMap<string, float> param_bytes_per_op_type;
121 if (run_individual) {
122 for (
int i = 0; i < main_runs; ++i) {
123 for (
auto& op : operators_) {
127 for (
auto& op : operators_) {
128 const string& op_type = op->debug_def().type();
130 auto* schema = OpSchemaRegistry::Schema(op_type);
131 if (schema && schema->HasCostInferenceFunction()) {
132 vector<TensorShape> shapes = op->InputTensorShapes();
136 flops_per_op.emplace_back(cost.flops);
137 memory_bytes_per_op.emplace_back(cost.bytes_moved);
138 param_bytes_per_op.emplace_back(cost.params_bytes);
140 flops_per_op_type[op_type] += cost.flops;
141 memory_bytes_per_op_type[op_type] += cost.bytes_moved;
142 param_bytes_per_op_type[op_type] += cost.params_bytes;
149 op->debug_def().name(),
154 time_per_op[idx] += spent;
155 time_per_op_type[op_type] += spent;
160 for (
auto& op : operators_) {
161 const string& op_type = op->debug_def().type();
162 const string& print_name =
163 (op->debug_def().name().size()
164 ? op->debug_def().name()
165 : (op->debug_def().output_size() ? op->debug_def().output(0)
167 std::stringstream flops_str;
168 if (idx < flops_per_op.size() && flops_per_op[idx]) {
169 flops_str <<
" (" << to_string(1.0e-9 * flops_per_op[idx]) <<
" GFLOP, " 170 << to_string(1.0e-6 * flops_per_op[idx] / time_per_op[idx])
173 std::stringstream memory_bytes_str;
174 if (idx < memory_bytes_per_op.size() && memory_bytes_per_op[idx]) {
175 memory_bytes_str <<
" (" << to_string(1.0e-6 * memory_bytes_per_op[idx])
178 std::stringstream param_bytes_str;
179 if (idx < param_bytes_per_op.size() && param_bytes_per_op[idx]) {
180 memory_bytes_str <<
" (" << to_string(1.0e-6 * param_bytes_per_op[idx])
183 std::cout <<
"Operator #" << idx <<
" (" << print_name <<
", " << op_type
184 <<
") " << time_per_op[idx] / main_runs <<
" ms/iter" 185 << flops_str.str() << memory_bytes_str.str()
186 << param_bytes_str.str() << std::endl;
189 const std::vector<string> metric(
190 {
"Time",
"FLOP",
"Feature Memory",
"Parameter Memory"});
191 const std::vector<double> normalizer(
192 {1.0 / main_runs, 1.0e-9, 1.0e-6, 1.0e-6});
193 const std::vector<string> unit({
"ms",
"GFLOP",
"MB",
"MB"});
195 std::vector<CaffeMap<string, float>*> metric_per_op_type_vec_vec;
196 metric_per_op_type_vec_vec.emplace_back(&time_per_op_type);
197 metric_per_op_type_vec_vec.emplace_back(&flops_per_op_type);
198 metric_per_op_type_vec_vec.emplace_back(&memory_bytes_per_op_type);
199 metric_per_op_type_vec_vec.emplace_back(¶m_bytes_per_op_type);
200 for (
int i = 0; i < metric_per_op_type_vec_vec.size(); ++i) {
201 std::cout << metric[i] <<
" per operator type:" << std::endl;
202 auto* item = metric_per_op_type_vec_vec[i];
203 std::vector<std::pair<string, float>> metric_per_op_type_vec(
204 (*item).begin(), (*item).end());
206 metric_per_op_type_vec.begin(),
207 metric_per_op_type_vec.end(),
208 PairLargerThan<string, float>);
209 float total_metric = 0.;
210 for (
const auto& op_item : metric_per_op_type_vec) {
211 total_metric += op_item.second * normalizer[i];
213 for (
const auto& op_item : metric_per_op_type_vec) {
215 if (total_metric > 0.) {
216 percent = (100.0 * op_item.second * normalizer[i] / total_metric);
218 std::cout << std::setw(15) << std::setfill(
' ')
219 << op_item.second * normalizer[i] <<
" " << unit[i] <<
". " 220 << std::setw(10) << std::setfill(
' ') << percent <<
"%. " 221 << op_item.first << std::endl;
223 std::cout << std::setw(15) << std::setfill(
' ') << total_metric <<
" " 224 << unit[i] <<
" in Total" << std::endl;
228 for (
int i = 0; i < time_per_op.size(); ++i) {
229 time_per_op[i] /= main_runs;
231 time_per_op.insert(time_per_op.begin(), millis / main_runs);
void Start()
Starts a timer.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
float MilliSeconds()
Returns the elapsed time in milliseconds.
vector< float > TEST_Benchmark(const int warmup_runs, const int main_runs, const bool run_individual) override
Benchmarks a network.
A simple timer object for measuring time.