Caffe2 - C++ API
A deep learning, cross platform ML framework
stats_ops.cc
1 #include <chrono>
2 #include <vector>
3 #include "caffe2/core/operator.h"
4 #include "caffe2/core/stats.h"
5 #include "caffe2/core/tensor.h"
6 
7 namespace caffe2 {
8 
9 class StatRegistryCreateOp : public Operator<CPUContext> {
10  public:
11  StatRegistryCreateOp(const OperatorDef& operator_def, Workspace* ws)
12  : Operator(operator_def, ws) {}
13 
14  bool RunOnDevice() override {
15  *OperatorBase::Output<std::unique_ptr<StatRegistry>>(0) =
16  std::unique_ptr<StatRegistry>(new StatRegistry);
17  return true;
18  }
19 };
20 
21 class StatRegistryExportOp : public Operator<CPUContext> {
22  public:
23  StatRegistryExportOp(const OperatorDef& operator_def, Workspace* ws)
24  : Operator(operator_def, ws),
25  reset_(GetSingleArgument<bool>("reset", true)) {}
26 
27  bool RunOnDevice() override {
28  auto registry = InputSize() > 0
29  ? OperatorBase::Input<std::unique_ptr<StatRegistry>>(0).get()
30  : &StatRegistry::get();
31  auto* keys = Output(0);
32  auto* values = Output(1);
33  auto* timestamps = Output(2);
34  auto data = registry->publish(reset_);
35  keys->Resize(data.size());
36  values->Resize(data.size());
37  timestamps->Resize(data.size());
38  auto* pkeys = keys->mutable_data<std::string>();
39  auto* pvals = values->mutable_data<int64_t>();
40  auto* ptimestamps = timestamps->mutable_data<int64_t>();
41  int i = 0;
42  for (const auto& stat : data) {
43  pkeys[i] = std::move(stat.key);
44  pvals[i] = stat.value;
45  ptimestamps[i] =
46  std::chrono::nanoseconds(stat.ts.time_since_epoch()).count();
47  ++i;
48  }
49  return true;
50  }
51 
52  private:
53  bool reset_;
54 };
55 
56 class StatRegistryUpdateOp : public Operator<CPUContext> {
57  public:
58  StatRegistryUpdateOp(const OperatorDef& operator_def, Workspace* ws)
59  : Operator(operator_def, ws) {}
60 
61  bool RunOnDevice() override {
62  const auto& keys = Input(0);
63  const auto& values = Input(1);
64  auto registry = InputSize() == 3
65  ? OperatorBase::Input<std::unique_ptr<StatRegistry>>(2).get()
66  : &StatRegistry::get();
67  CAFFE_ENFORCE_EQ(keys.size(), values.size());
68  ExportedStatList data(keys.size());
69  auto* pkeys = keys.data<std::string>();
70  auto* pvals = values.data<int64_t>();
71  int i = 0;
72  for (auto& stat : data) {
73  stat.key = pkeys[i];
74  stat.value = pvals[i];
75  ++i;
76  }
77  registry->update(data);
78  return true;
79  }
80 };
81 
83  public:
84  explicit TimerInstance(const std::string& name)
85  : running_(false), stat_(name) {}
86 
87  void begin() {
88  CAFFE_ENFORCE(!running_, "Called TimerBegin on an already running timer.");
89  running_ = true;
90  start_ = std::chrono::high_resolution_clock::now();
91  }
92 
93  void end() {
94  CAFFE_ENFORCE(running_, "Called TimerEnd on a stopped timer.");
95  using namespace std::chrono;
96  auto duration = high_resolution_clock::now() - start_;
97  auto nanos = duration_cast<nanoseconds>(duration).count();
98  CAFFE_EVENT(stat_, time_ns, nanos);
99  running_ = false;
100  }
101 
102  int64_t get_ns() {
103  CAFFE_ENFORCE(running_, "Called TimerGet on a stopped timer.");
104  using namespace std::chrono;
105  auto duration = high_resolution_clock::now() - start_;
106  auto nanos = duration_cast<nanoseconds>(duration).count();
107  return nanos;
108  }
109 
110  private:
111  bool running_;
112  std::chrono::high_resolution_clock::time_point start_;
113 
114  struct TimerStat {
115  CAFFE_STAT_CTOR(TimerStat);
116  CAFFE_AVG_EXPORTED_STAT(time_ns);
117  } stat_;
118 };
119 
120 struct TimerBeginOp : public Operator<CPUContext> {
121  TimerBeginOp(const OperatorDef& operator_def, Workspace* ws)
122  : Operator(operator_def, ws),
123  given_name_(GetSingleArgument<std::string>(
124  "counter_name",
125  operator_def.output().Get(0))),
126  timer_([this]() { return given_name_; }()) {}
127 
128  bool RunOnDevice() override {
129  *OperatorBase::Output<TimerInstance*>(0) = &timer_;
130  timer_.begin();
131  return true;
132  }
133 
134  private:
135  const std::string given_name_;
136  TimerInstance timer_;
137 };
138 
139 struct TimerEndOp : public Operator<CPUContext> {
140  TimerEndOp(const OperatorDef& operator_def, Workspace* ws)
141  : Operator(operator_def, ws) {}
142 
143  bool RunOnDevice() override {
144  OperatorBase::Input<TimerInstance*>(0)->end();
145  return true;
146  }
147 };
148 
149 struct TimerGetAndEndOp : public Operator<CPUContext> {
150  TimerGetAndEndOp(const OperatorDef& operator_def, Workspace* ws)
151  : Operator(operator_def, ws) {}
152 
153  bool RunOnDevice() override {
154  int64_t nanos = OperatorBase::Input<TimerInstance*>(0)->get_ns();
155  OperatorBase::Input<TimerInstance*>(0)->end();
156  auto* res = OperatorBase::Output<TensorCPU>(0);
157  res->Resize(1);
158  res->template mutable_data<int64_t>()[0] = nanos;
159  return true;
160  }
161 };
162 
163 struct TimerGetOp : public Operator<CPUContext> {
164  TimerGetOp(const OperatorDef& operator_def, Workspace* ws)
165  : Operator(operator_def, ws) {}
166 
167  bool RunOnDevice() override {
168  int64_t nanos = OperatorBase::Input<TimerInstance*>(0)->get_ns();
169  auto* res = OperatorBase::Output<TensorCPU>(0);
170  res->Resize();
171  res->template mutable_data<int64_t>()[0] = nanos;
172  return true;
173  }
174 };
175 
176 struct CpuUtilizationReportOp : public Operator<CPUContext> {
177  CpuUtilizationReportOp(const OperatorDef& operator_def, Workspace* ws)
178  : Operator(operator_def, ws),
179  statsName_(GetSingleArgument<std::string>("stats_name", "utilization")),
180  stat_([this]() { return statsName_; }()) {}
181 
182  bool RunOnDevice() override {
183  float utilization = Input(0).template data<float>()[0];
184  // Utilization is a float value, but CAFFE_EVENT only keeps int64_t values.
185  // We will keep 100x of the received utilization to maintain accuracy.
186  CAFFE_EVENT(stat_, cpu_utilization, (int)(utilization * 100));
187  return true;
188  }
189 
190  private:
191  std::string statsName_;
192  struct CpuStats {
193  CAFFE_STAT_CTOR(CpuStats);
194  CAFFE_EXPORTED_STAT(cpu_utilization);
195  } stat_;
196 };
197 
198 REGISTER_CPU_OPERATOR(StatRegistryCreate, StatRegistryCreateOp);
199 REGISTER_CPU_OPERATOR(StatRegistryUpdate, StatRegistryUpdateOp);
200 REGISTER_CPU_OPERATOR(StatRegistryExport, StatRegistryExportOp);
201 
202 REGISTER_CPU_OPERATOR(TimerBegin, TimerBeginOp);
203 REGISTER_CPU_OPERATOR(TimerEnd, TimerEndOp);
204 REGISTER_CPU_OPERATOR(TimerGetAndEnd, TimerGetAndEndOp);
205 REGISTER_CPU_OPERATOR(TimerGet, TimerGetOp);
206 REGISTER_CPU_OPERATOR(CpuUtilizationReport, CpuUtilizationReportOp);
207 
208 OPERATOR_SCHEMA(StatRegistryCreate)
209  .NumInputs(0)
210  .NumOutputs(1)
211  .SetDoc(R"DOC(
212 Create a StatRegistry object that will contain a map of performance counters
213 keyed by name. A StatRegistry is used to gather and retrieve performance
214 counts throughout the caffe2 codebase.
215 )DOC")
216  .Output(0, "handle", "A Blob pointing to the newly created StatRegistry.");
217 
218 OPERATOR_SCHEMA(StatRegistryUpdate)
219  .NumInputs(2, 3)
220  .NumOutputs(0)
221  .SetDoc(R"DOC(
222 Update the given StatRegistry, or the global StatRegistry,
223 with the values of counters for the given keys.
224 )DOC")
225  .Input(0, "keys", "1D string tensor with the key names to update.")
226  .Input(1, "values", "1D int64 tensor with the values to update.")
227  .Input(
228  2,
229  "handle",
230  "If provided, update the given StatRegistry. "
231  "Otherwise, update the global singleton.");
232 
233 OPERATOR_SCHEMA(StatRegistryExport)
234  .NumInputs(0, 1)
235  .NumOutputs(3)
236  .Input(
237  0,
238  "handle",
239  "If provided, export values from given StatRegistry."
240  "Otherwise, export values from the global singleton StatRegistry.")
241  .Output(0, "keys", "1D string tensor with exported key names")
242  .Output(1, "values", "1D int64 tensor with exported values")
243  .Output(2, "timestamps", "The unix timestamp at counter retrieval.")
244  .Arg(
245  "reset",
246  "(default true) Whether to atomically reset the counters afterwards.");
247 
248 OPERATOR_SCHEMA(TimerBegin)
249  .NumInputs(0)
250  .NumOutputs(1)
251  .SetDoc(R"DOC(
252 Start a wallclock timer, returning a pointer to it.
253 The timer is stopped by calling TimerEnd)DOC")
254  .Arg("counter_name", "Name of the timer. If not provided, use output name.")
255  .Output(0, "timer", "Pointer to timer, to be passed to TimerEnd.");
256 
257 OPERATOR_SCHEMA(TimerEnd)
258  .NumInputs(1)
259  .NumOutputs(0)
260  .SetDoc("Stop a timer started with TimerBegin, publishing a CAFFE_EVENT")
261  .Input(0, "timer", "Pointer to timer, obtained from TimerBegin.");
262 
263 OPERATOR_SCHEMA(TimerGetAndEnd)
264  .NumInputs(1)
265  .NumOutputs(1)
266  .SetDoc(R"DOC(Queries the current time of a timer in nanos, stops the timer
267  publishing a CAFFE_EVENT)DOC")
268  .Input(0, "timer", "Pointer to timer, obtained from TimerBegin.")
269  .Output(0, "nanos", "nanoseconds in int64");
270 
271 OPERATOR_SCHEMA(TimerGet)
272  .NumInputs(1)
273  .NumOutputs(1)
274  .SetDoc(R"DOC(Queries the current time of a timer in nanos)DOC")
275  .Input(0, "timer", "Pointer to timer, obtained from TimerBegin.")
276  .Output(0, "nanos", "nanoseconds in int64");
277 
278 OPERATOR_SCHEMA(CpuUtilizationReport)
279  .NumInputs(1)
280  .NumOutputs(0)
281  .SetDoc(R"DOC(Report the delta in max CPU utilization observed so far in the
282  plan)DOC")
283  .Input(
284  0,
285  "utilization",
286  "Delta in max CPU utilization observed, in percentage as a float value")
287  .Arg("stats_name", "String name of the stat entry holding CPU utilization");
288 
289 CAFFE_KNOWN_TYPE(TimerInstance*);
290 CAFFE_KNOWN_TYPE(std::unique_ptr<caffe2::StatRegistry>);
291 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
std::vector< ExportedStatValue > ExportedStatList
Holds names and values of counters exported from a StatRegistry.
Definition: stats.h:40
static StatRegistry & get()
Retrieve the singleton StatRegistry, which gets populated through the CAFFE_EVENT macro...
Definition: stats.cc:49
Holds a map of atomic counters keyed by name.
Definition: stats.h:117