Caffe2 - C++ API
A deep learning, cross platform ML framework
learning_rate_functors.h
1 #ifndef CAFFE2_SGD_LEARNING_RATE_FUNCTORS_H_
2 #define CAFFE2_SGD_LEARNING_RATE_FUNCTORS_H_
3 
4 #include "caffe2/core/context.h"
5 #include "caffe2/core/operator.h"
6 
7 namespace caffe2 {
8 
9 // LearningRateFunctor is a functor that when fed with an iter number, produces
10 // the learning rate for the corresponding iteration.
11 template <typename T>
13  public:
14  virtual ~LearningRateFunctor() {}
15  virtual T operator()(const int64_t iter) const = 0;
16 };
17 
18 // Fixed: not changing the learning rate at all.
19 template <typename T>
21  public:
22  T operator()(const int64_t /*iter*/) const override {
23  return 1.;
24  }
25 };
26 
27 // Alter: alternatate learning rate with active_period and inactive_period.
28 // update for for a duration of active_period and then stop for a duration of
29 // inactive_period if active_first, and vice versa
30 template <typename T>
32  public:
34  const int64_t active_period,
35  const int64_t inactive_period,
36  const bool active_first)
37  : active_period_(active_period),
38  inactive_period_(inactive_period),
39  active_first_(active_first) {}
40  T operator()(const int64_t iter) const override {
41  if (iter % (active_period_ + inactive_period_) <
42  (active_first_ ? active_period_ : inactive_period_)) {
43  return active_first_ ? 1. : 0.;
44  } else {
45  return active_first_ ? 0. : 1.;
46  };
47  };
48 
49  int64_t active_period_;
50  int64_t inactive_period_;
51  bool active_first_;
52 };
53 
54 // Step: return gamma ^ (floor(iter / step))
55 template <typename T>
57  public:
58  StepLearningRate(const int stepsize, const T gamma)
59  : stepsize_(stepsize), gamma_(gamma) {}
60  T operator()(const int64_t iter) const override {
61  return std::pow(gamma_, static_cast<T>(iter / stepsize_));
62  }
63 
64  int stepsize_;
65  T gamma_;
66 };
67 
68 // Exp: return gamma ^ iter
69 template <typename T>
71  public:
72  explicit ExpLearningRate(const T gamma) : gamma_(gamma) {}
73  T operator()(const int64_t iter) const override {
74  return std::pow(gamma_, static_cast<T>(iter));
75  }
76 
77  T gamma_;
78 };
79 
80 // Inv: return (1 + gamma * iter) ^ (-power)
81 template <typename T>
83  public:
84  InvLearningRate(const T gamma, const T power)
85  : gamma_(gamma), power_(power) {}
86  T operator()(const int64_t iter) const override {
87  return std::pow(T(1) + gamma_ * iter, -power_);
88  }
89  T gamma_;
90  T power_;
91 };
92 
93 // Poly: return (1 - iter/max_iter) ^ (power)
94 template <typename T>
96  public:
97  PolyLearningRate(const T power, const int64_t max_iter)
98  : power_(power), max_iter_(max_iter) {}
99  T operator()(const int64_t iter) const override {
100  return std::pow(1 - T(iter) / T(max_iter_), power_);
101  }
102  T power_;
103  uint64_t max_iter_;
104 };
105 
106 // LinearWarmup: return max(iter/num_iter, 1)
107 template <typename T>
109  public:
110  LinearWarmupLearningRate(const T start_multiplier, const int64_t num_iter)
111  : start_multiplier_(start_multiplier), num_iter_(num_iter) {}
112  T operator()(const int64_t iter) const override {
113  if (iter >= num_iter_) {
114  return 1.;
115  }
116  return start_multiplier_ + (1. - start_multiplier_) * T(iter) / T(num_iter_);
117  }
118  T start_multiplier_;
119  uint64_t num_iter_;
120 };
121 
122 // ConstantWarmup: return scale when iter < num_iter, and 1 otherwise
123 template <typename T>
125  public:
126  ConstantWarmupLearningRate(const T multiplier, const int64_t num_iter)
127  : multiplier_(multiplier), num_iter_(num_iter) {}
128  T operator()(const int64_t iter) const override {
129  if (iter >= num_iter_) {
130  return 1.;
131  }
132  return T(multiplier_);
133  }
134  T multiplier_;
135  uint64_t num_iter_;
136 };
137 
138 // hill: the learning rate changes according to following 3 stages
139 // 1) linear warmup (increasing) at first num_iter steps from start_multiplier
140 // 2) inverse shrink (decreasing) afterwards (gamma, power)
141 // 3) lower bounded by end_multiplier
142 template <typename T>
144  public:
146  const int64_t num_iter,
147  const T start_multiplier,
148  const T gamma,
149  const T power,
150  const T end_multiplier)
151  : linear_warmup_lr_(start_multiplier, num_iter),
152  inv_lr_(gamma, power),
153  num_iter_(num_iter),
154  end_multiplier_(end_multiplier) {}
155  T operator()(const int64_t iter) const override {
156  if (iter < num_iter_) {
157  return linear_warmup_lr_(iter);
158  } else {
159  return std::max(end_multiplier_, inv_lr_(iter - num_iter_));
160  }
161  }
162  LinearWarmupLearningRate<T> linear_warmup_lr_;
163  InvLearningRate<T> inv_lr_;
164  int64_t num_iter_;
165  T end_multiplier_;
166 };
167 
168 } // namespace caffe2
169 
170 #endif // CAFFE2_SGD_LEARNING_RATE_FUNCTORS_H_
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...