Caffe2 - C++ API
A deep learning, cross platform ML framework
adagrad_op.h
1 #pragma once
2 
3 #include "caffe2/core/operator.h"
4 
5 namespace caffe2 {
6 
7 template <typename Context>
8 void adagrad_update(
9  int N,
10  const float* w,
11  const float* g,
12  const float* h,
13  float* nw,
14  float* nh,
15  float epsilon,
16  float decay,
17  const float* lr,
18  Context* /*context*/) {
19  for (auto i = 0; i < N; ++i) {
20  float gi = g[i];
21  float hi = nh[i] = decay * h[i] + gi * gi;
22  nw[i] = w[i] + lr[0] * gi / (std::sqrt(hi) + epsilon);
23  }
24 }
25 
26 template <typename T, class Context>
27 class AdagradOp final : public Operator<Context> {
28  public:
29  USE_OPERATOR_CONTEXT_FUNCTIONS;
30  AdagradOp(const OperatorDef& operator_def, Workspace* ws)
31  : Operator<Context>(operator_def, ws),
32  epsilon_(OperatorBase::GetSingleArgument<T>("epsilon", 1e-5f)),
33  decay_(OperatorBase::GetSingleArgument<T>("decay", 1.0f)) {}
34 
35  bool RunOnDevice() override {
36  CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENT_1).size());
37  CAFFE_ENFORCE(Input(GRAD).size() == Input(PARAM).size());
38  Output(OUTPUT_PARAM)->ResizeLike(Input(PARAM));
39  Output(OUTPUT_MOMENT_1)->ResizeLike(Input(MOMENT_1));
40  adagrad_update<Context>(
41  Input(GRAD).size(),
42  Input(PARAM).template data<T>(),
43  Input(GRAD).template data<T>(),
44  Input(MOMENT_1).template data<T>(),
45  Output(OUTPUT_PARAM)->template mutable_data<T>(),
46  Output(OUTPUT_MOMENT_1)->template mutable_data<T>(),
47  epsilon_,
48  decay_,
49  Input(LR).template data<T>(),
50  &context_);
51  return true;
52  }
53 
54  protected:
55  T epsilon_;
56  T decay_;
57  INPUT_TAGS(PARAM, MOMENT_1, GRAD, LR);
58  OUTPUT_TAGS(OUTPUT_PARAM, OUTPUT_MOMENT_1);
59 };
60 
61 template <typename T, class Context>
62 class SparseAdagradOp final : public Operator<Context> {
63  public:
64  USE_OPERATOR_CONTEXT_FUNCTIONS;
65  SparseAdagradOp(const OperatorDef& operator_def, Workspace* ws)
66  : Operator<Context>(operator_def, ws),
67  epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {}
68 
69  bool RunOnDevice() override {
70  // Enforce shapes
71  CAFFE_ENFORCE_EQ(Input(PARAM).size(), Input(MOMENT_1).size());
72  CAFFE_ENFORCE_EQ(Input(LR).size(), 1);
73  CAFFE_ENFORCE_EQ(
74  Input(PARAM).size_from_dim(1),
75  Input(GRAD).size_from_dim(Input(INDICES).ndim()));
76 
78  this, Input(INDICES));
79  }
80 
81  template <typename SIndex>
82  bool DoRunWithType() {
83  const auto* lr = Input(LR).template data<T>();
84  const auto* indices = Input(INDICES).template data<SIndex>();
85  const auto* gradIn = Input(GRAD).template data<T>();
86  const auto* paramIn = Input(PARAM).template data<T>();
87  const auto* momentIn = Input(MOMENT_1).template data<T>();
88  auto* paramOut = Output(OUTPUT_PARAM)->template mutable_data<T>();
89  auto* momentOut = Output(OUTPUT_MOMENT_1)->template mutable_data<T>();
90 
91  auto n = Input(INDICES).size();
92  if (n == 0) {
93  return true;
94  }
95 
96  auto block_size = Input(GRAD).size() / n;
97  for (auto i = 0; i < n; ++i) {
98  auto idx = indices[i];
99  if (block_size == 1) {
100  float gi = gradIn[i];
101  float hi = momentOut[idx] = momentIn[idx] + gi * gi;
102  paramOut[idx] = paramIn[idx] + lr[0] * gi / (std::sqrt(hi) + epsilon_);
103  } else {
104  auto offsetI = i * block_size;
105  auto offsetIdx = idx * block_size;
106 
107 #ifndef NDEBUG
108  CAFFE_ENFORCE_GE(
109  Input(PARAM).size(),
110  block_size + offsetIdx,
111  this->debug_def().input(PARAM),
112  ", out of bound, idx:",
113  idx,
114  " for input i:",
115  i,
116  " and block size:",
117  block_size);
118  CAFFE_ENFORCE_GE(
119  Input(GRAD).size(),
120  block_size + offsetI,
121  this->debug_def().input(GRAD),
122  ", out of bound idx, idx:",
123  idx,
124  " for input i:",
125  i);
126 #endif
127  adagrad_update(
128  block_size,
129  paramIn + offsetIdx,
130  gradIn + offsetI,
131  momentIn + offsetIdx,
132  paramOut + offsetIdx,
133  momentOut + offsetIdx,
134  epsilon_,
135  1.0f,
136  lr,
137  &context_);
138  }
139  }
140  return true;
141  }
142 
143  protected:
144  T epsilon_;
145  INPUT_TAGS(PARAM, MOMENT_1, INDICES, GRAD, LR);
146  OUTPUT_TAGS(OUTPUT_PARAM, OUTPUT_MOMENT_1);
147 };
148 
149 template <typename T, class Context>
150 class RowWiseSparseAdagradOp final : public Operator<Context> {
151  public:
152  USE_OPERATOR_CONTEXT_FUNCTIONS;
153  RowWiseSparseAdagradOp(const OperatorDef& operator_def, Workspace* ws)
154  : Operator<Context>(operator_def, ws),
155  epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {}
156 
157  bool RunOnDevice() override {
158  // Enforce shapes
159  CAFFE_ENFORCE_EQ(Input(PARAM).dims()[0], Input(MOMENT_1).size());
160  CAFFE_ENFORCE_EQ(Input(LR).size(), 1);
161  CAFFE_ENFORCE_EQ(
162  Input(PARAM).size_from_dim(1),
163  Input(GRAD).size_from_dim(Input(INDICES).ndim()));
164 
166  this, Input(INDICES));
167  }
168 
169  template <typename SIndex>
170  bool DoRunWithType() {
171  const auto* lr = Input(LR).template data<T>();
172  const auto* indices = Input(INDICES).template data<SIndex>();
173  const auto* gradIn = Input(GRAD).template data<T>();
174  const auto* paramIn = Input(PARAM).template data<T>();
175  const auto* momentIn = Input(MOMENT_1).template data<T>();
176  auto* paramOut = Output(OUTPUT_PARAM)->template mutable_data<T>();
177  auto* momentOut = Output(OUTPUT_MOMENT_1)->template mutable_data<T>();
178 
179  auto n = Input(INDICES).size();
180  if (n == 0) {
181  return true;
182  }
183 
184  auto block_size = Input(GRAD).size() / n;
185 
186  for (auto i = 0; i < n; ++i) {
187  auto idx = indices[i];
188  if (block_size == 1) {
189  float gi = gradIn[i];
190  float hi = momentOut[idx] = momentIn[idx] + gi * gi;
191  paramOut[idx] = paramIn[idx] + lr[0] * gi / (std::sqrt(hi) + epsilon_);
192  } else {
193  auto offsetI = i * block_size;
194  auto offsetIdx = idx * block_size;
195 
196 #ifndef NDEBUG
197  CAFFE_ENFORCE_GE(
198  Input(PARAM).size(),
199  block_size + offsetIdx,
200  this->debug_def().input(PARAM),
201  ", out of bound, idx:",
202  idx,
203  " for input i:",
204  i,
205  " and block size:",
206  block_size);
207  CAFFE_ENFORCE_GE(
208  Input(GRAD).size(),
209  block_size + offsetI,
210  this->debug_def().input(GRAD),
211  ", out of bound idx, idx:",
212  idx,
213  " for input i:",
214  i);
215 #endif
216 
217  const float* w = paramIn + offsetIdx;
218  const float* g = gradIn + offsetI;
219  const float* h = momentIn + idx;
220  float* nw = paramOut + offsetIdx;
221  float* nh = momentOut + idx;
222  float hs = 0.;
223  for (auto j = 0; j < block_size; ++j) {
224  float gj = g[j];
225  hs += gj * gj;
226  }
227  float hi = nh[0] = h[0] + hs / block_size;
228  float step = lr[0] / (std::sqrt(hi) + epsilon_);
229  for (auto j = 0; j < block_size; ++j) {
230  nw[j] = w[j] + g[j] * step;
231  }
232  }
233  }
234  return true;
235  }
236 
237  protected:
238  T epsilon_;
239  INPUT_TAGS(PARAM, MOMENT_1, INDICES, GRAD, LR);
240  OUTPUT_TAGS(OUTPUT_PARAM, OUTPUT_MOMENT_1);
241 };
242 }
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...