Caffe2 - Python API
A deep learning, cross platform ML framework
optimizer_test_util.py
1 ## @package optimizer_test_util
2 # Module caffe2.python.optimizer_test_util
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 import unittest
9 import numpy as np
10 from caffe2.python import brew, core, workspace, cnn, optimizer
11 from caffe2.proto import caffe2_pb2
13  Initializer, PseudoFP16Initializer)
14 
15 from caffe2.python.model_helper import ModelHelper
16 
17 
18 class OptimizerTestBase(object):
19  """
20  This is an abstract base class.
21  Don't inherit from unittest.TestCase, and don't name it 'Test*'.
22  Do, however, do these things in classes which inherit from this.
23  """
24 
25  def _createDense(self, dtype=core.DataType.FLOAT):
26  perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
27  np.random.seed(123) # make test deterministic
28  numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16
29  initializer = Initializer if dtype == core.DataType.FLOAT else \
30  PseudoFP16Initializer
31  data = np.random.randint(
32  2,
33  size=(20, perfect_model.size)).astype(numpy_dtype)
34  label = np.dot(data, perfect_model)[:, np.newaxis]
35 
36  model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
37  out = brew.fc(
38  model,
39  'data', 'fc', perfect_model.size, 1, ('ConstantFill', {}),
40  ('ConstantFill', {}), axis=0,
41  WeightInitializer=initializer, BiasInitializer=initializer
42  )
43  if dtype == core.DataType.FLOAT16:
44  out = model.HalfToFloat(out, out + "_fp32")
45  sq = model.SquaredL2Distance([out, 'label'])
46  loss = model.AveragedLoss(sq, "avg_loss")
47  grad_map = model.AddGradientOperators([loss])
48  self.assertIsInstance(grad_map['fc_w'], core.BlobReference)
49  return (model, perfect_model, data, label)
50 
51  def testDense(self):
52  model, perfect_model, data, label = self._createDense()
53  optimizer = self.build_optimizer(model)
54 
55  workspace.FeedBlob('data', data[0])
56  workspace.FeedBlob('label', label[0])
57  workspace.RunNetOnce(model.param_init_net)
58  workspace.CreateNet(model.net, True)
59  for _ in range(2000):
60  idx = np.random.randint(data.shape[0])
61  workspace.FeedBlob('data', data[idx])
62  workspace.FeedBlob('label', label[idx])
63  workspace.RunNet(model.net.Proto().name)
64 
65  np.testing.assert_allclose(
66  perfect_model[np.newaxis, :],
67  workspace.FetchBlob('fc_w'),
68  atol=1e-2
69  )
70  self.check_optimizer(optimizer)
71 
72  @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
73  def testGPUDense(self, dtype=core.DataType.FLOAT):
74  device_opt = core.DeviceOption(caffe2_pb2.CUDA, 0)
75  with core.DeviceScope(device_opt):
76  model, _perfect_model, data, label = self._createDense(dtype)
77  if dtype == core.DataType.FLOAT16:
78  fc_fp32_for_host = model.HalfToFloat('fc', 'fc_fp32_for_host')
79  model.CopyGPUToCPU(fc_fp32_for_host, 'fc_cpu')
80  else:
81  model.CopyGPUToCPU('fc', 'fc_cpu')
82  workspace.FeedBlob('data', data[0])
83  workspace.FeedBlob('label', label[0])
84 
85  # Add some CPU ops
86  brew.fc(model, 'fc_cpu', 'fc2', dim_in=1, dim_out=10, axis=0)
87 
88  # Create optimizer in default device scope
89  self.build_optimizer(model)
90 
91  if self._skip_gpu:
92  return
93 
94  # Run net to see it does not crash
95  workspace.RunNetOnce(model.param_init_net)
96  workspace.CreateNet(model.net, True)
97  workspace.RunNet(model.net.Proto().name)
98 
99  def testSparse(self):
100  # to test duplicated indices we assign two indices to each weight and
101  # thus each weight might count once or twice
102  DUPLICATION = 2
103  perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
104  np.random.seed(123) # make test deterministic
105  data = np.random.randint(
106  2,
107  size=(20, perfect_model.size * DUPLICATION)).astype(np.float32)
108  label = np.dot(data, np.repeat(perfect_model, DUPLICATION))
109 
110  model = cnn.CNNModelHelper("NCHW", name="test")
111  # imitate what model wrapper does
112  w = model.param_init_net.ConstantFill(
113  [], 'w', shape=[perfect_model.size], value=0.0)
114  model.params.append(w)
115  picked = model.net.Gather([w, 'indices'], 'gather')
116  out = model.ReduceFrontSum(picked, 'sum')
117 
118  sq = model.SquaredL2Distance([out, 'label'])
119  loss = model.AveragedLoss(sq, "avg_loss")
120  grad_map = model.AddGradientOperators([loss])
121  self.assertIsInstance(grad_map['w'], core.GradientSlice)
122  optimizer = self.build_optimizer(model)
123 
124  workspace.CreateBlob('indices')
125  workspace.CreateBlob('label')
126 
127  for indices_type in [np.int32, np.int64]:
128  workspace.RunNetOnce(model.param_init_net)
129  workspace.CreateNet(model.net, True)
130  for _ in range(2000):
131  idx = np.random.randint(data.shape[0])
132  # transform into indices of binary features
133  indices = np.repeat(np.arange(perfect_model.size),
134  DUPLICATION)[data[idx] == 1]
135  if indices.size == 0:
136  continue
137  workspace.FeedBlob(
138  'indices',
139  indices.reshape((indices.size,)).astype(indices_type)
140  )
141  workspace.FeedBlob('label',
142  np.array(label[idx]).astype(np.float32))
143  workspace.RunNet(model.net.Proto().name)
144 
145  np.testing.assert_allclose(
146  perfect_model,
147  workspace.FetchBlob('w'),
148  atol=1e-2
149  )
150  self.check_optimizer(optimizer)
151 
152 
154  """
155  This is an abstract base class.
156  Don't inherit from unittest.TestCase, and don't name it 'Test*'.
157  Do, however, do these things in classes which inherit from this.
158  """
159 
160  def _gradient_ratio_reference(self, model, params, max_gradient_norm):
161  from caffe2.python import core
162  sum_squared_norms = 0.0
163  for param in params:
164  grad = (
165  model.param_to_grad[param]
166  if not isinstance(
167  model.param_to_grad[param],
168  core.GradientSlice,
169  ) else model.param_to_grad[param].values
170  )
171  val = workspace.FetchBlob(grad)
172  sum_squared_norms += np.power(np.linalg.norm(val), 2.0)
173  global_norm = np.sqrt(sum_squared_norms)
174  clip_norm = max_gradient_norm
175  norm_ratio = clip_norm / np.maximum(clip_norm, global_norm)
176  return norm_ratio
177 
178  def test_global_norm_based_gradient_clipping(self):
179  max_gradient_norm = 1.0
180  model, perfect_model, data, label = self._createDense()
181  opt = self.build_optimizer(model, max_gradient_norm=max_gradient_norm)
182 
183  params = []
184  for param in model.GetParams(top_scope=True):
185  if param in model.param_to_grad:
186  if not isinstance(
187  model.param_to_grad[param],
188  core.GradientSlice,
189  ):
190  params.append(param)
191 
192  workspace.FeedBlob('data', data[0])
193  workspace.FeedBlob('label', label[0])
194  workspace.RunNetOnce(model.param_init_net)
195  workspace.CreateNet(model.net, True)
196  self.assertIsNotNone(opt._lr_multiplier)
197 
198  # Run net once
199  idx = np.random.randint(data.shape[0])
200  workspace.FeedBlob('data', data[idx])
201  workspace.FeedBlob('label', label[idx])
202  workspace.RunNet(model.net.Proto().name)
203 
204  reference = self._gradient_ratio_reference(
205  model,
206  params,
207  max_gradient_norm,
208  )
209  norm_ratio = workspace.FetchBlob(
210  'norm_clipped_grad_update/norm_ratio')
211  np.testing.assert_almost_equal(norm_ratio, reference)
212  self.assertTrue(
213  reference < 1.0, "Bad test, gradient not being scaled."
214  )
215 
216  def test_lr_injection(self):
217  model, perfect_model, data, label = self._createDense()
218  opt = self.build_optimizer(
219  model, max_gradient_norm=1, allow_lr_injection=True
220  )
221 
222  workspace.FeedBlob('data', data[0])
223  workspace.FeedBlob('label', label[0])
224  workspace.RunNetOnce(model.param_init_net)
225  workspace.CreateNet(model.net, True)
226 
227  # Test LR injection initialized properly
228  self.assertIsNotNone(opt._lr_multiplier)
229  self.assertEqual(optimizer.get_lr_injection(), 1)
230 
231  # Test that we're able to modify the value of the lr_injection
232  optimizer.set_lr_injection(0)
233  self.assertEqual(optimizer.get_lr_injection(), 0)
234 
235  # Test that setting the lr_injector properly propogates to the
236  # lr_multiplier. Here, we have both lr_injector and norm_ratio that
237  # affect the lr_multiplier
238  workspace.RunNet(model.net.Proto().name)
239  self.assertEqual(workspace.FetchBlob('lr_multiplier'), 0)
def _createDense(self, dtype=core.DataType.FLOAT)
def _gradient_ratio_reference(self, model, params, max_gradient_norm)