3 from __future__
import absolute_import
4 from __future__
import division
5 from __future__
import print_function
6 from __future__
import unicode_literals
10 from caffe2.python import brew, core, workspace, cnn, optimizer
11 from caffe2.proto
import caffe2_pb2
13 Initializer, PseudoFP16Initializer)
20 This is an abstract base class. 21 Don't inherit from unittest.TestCase, and don't name it 'Test*'. 22 Do, however, do these things in classes which inherit from this. 25 def _createDense(self, dtype=core.DataType.FLOAT):
26 perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
28 numpy_dtype = np.float32
if dtype == core.DataType.FLOAT
else np.float16
29 initializer = Initializer
if dtype == core.DataType.FLOAT
else \
31 data = np.random.randint(
33 size=(20, perfect_model.size)).astype(numpy_dtype)
34 label = np.dot(data, perfect_model)[:, np.newaxis]
36 model =
ModelHelper(name=
"test", arg_scope={
'order':
'NCHW'})
39 'data',
'fc', perfect_model.size, 1, (
'ConstantFill', {}),
40 (
'ConstantFill', {}), axis=0,
41 WeightInitializer=initializer, BiasInitializer=initializer
43 if dtype == core.DataType.FLOAT16:
44 out = model.HalfToFloat(out, out +
"_fp32")
45 sq = model.SquaredL2Distance([out,
'label'])
46 loss = model.AveragedLoss(sq,
"avg_loss")
47 grad_map = model.AddGradientOperators([loss])
49 return (model, perfect_model, data, label)
53 optimizer = self.build_optimizer(model)
55 workspace.FeedBlob(
'data', data[0])
56 workspace.FeedBlob(
'label', label[0])
57 workspace.RunNetOnce(model.param_init_net)
58 workspace.CreateNet(model.net,
True)
60 idx = np.random.randint(data.shape[0])
61 workspace.FeedBlob(
'data', data[idx])
62 workspace.FeedBlob(
'label', label[idx])
63 workspace.RunNet(model.net.Proto().name)
65 np.testing.assert_allclose(
66 perfect_model[np.newaxis, :],
67 workspace.FetchBlob(
'fc_w'),
70 self.check_optimizer(optimizer)
72 @unittest.skipIf(
not workspace.has_gpu_support,
"No gpu support")
73 def testGPUDense(self, dtype=core.DataType.FLOAT):
74 device_opt = core.DeviceOption(caffe2_pb2.CUDA, 0)
75 with core.DeviceScope(device_opt):
76 model, _perfect_model, data, label = self.
_createDense(dtype)
77 if dtype == core.DataType.FLOAT16:
78 fc_fp32_for_host = model.HalfToFloat(
'fc',
'fc_fp32_for_host')
79 model.CopyGPUToCPU(fc_fp32_for_host,
'fc_cpu')
81 model.CopyGPUToCPU(
'fc',
'fc_cpu')
82 workspace.FeedBlob(
'data', data[0])
83 workspace.FeedBlob(
'label', label[0])
86 brew.fc(model,
'fc_cpu',
'fc2', dim_in=1, dim_out=10, axis=0)
89 self.build_optimizer(model)
95 workspace.RunNetOnce(model.param_init_net)
96 workspace.CreateNet(model.net,
True)
97 workspace.RunNet(model.net.Proto().name)
103 perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
105 data = np.random.randint(
107 size=(20, perfect_model.size * DUPLICATION)).astype(np.float32)
108 label = np.dot(data, np.repeat(perfect_model, DUPLICATION))
112 w = model.param_init_net.ConstantFill(
113 [],
'w', shape=[perfect_model.size], value=0.0)
114 model.params.append(w)
115 picked = model.net.Gather([w,
'indices'],
'gather')
116 out = model.ReduceFrontSum(picked,
'sum')
118 sq = model.SquaredL2Distance([out,
'label'])
119 loss = model.AveragedLoss(sq,
"avg_loss")
120 grad_map = model.AddGradientOperators([loss])
121 self.assertIsInstance(grad_map[
'w'], core.GradientSlice)
122 optimizer = self.build_optimizer(model)
124 workspace.CreateBlob(
'indices')
125 workspace.CreateBlob(
'label')
127 for indices_type
in [np.int32, np.int64]:
128 workspace.RunNetOnce(model.param_init_net)
129 workspace.CreateNet(model.net,
True)
130 for _
in range(2000):
131 idx = np.random.randint(data.shape[0])
133 indices = np.repeat(np.arange(perfect_model.size),
134 DUPLICATION)[data[idx] == 1]
135 if indices.size == 0:
139 indices.reshape((indices.size,)).astype(indices_type)
141 workspace.FeedBlob(
'label',
142 np.array(label[idx]).astype(np.float32))
143 workspace.RunNet(model.net.Proto().name)
145 np.testing.assert_allclose(
147 workspace.FetchBlob(
'w'),
150 self.check_optimizer(optimizer)
155 This is an abstract base class. 156 Don't inherit from unittest.TestCase, and don't name it 'Test*'. 157 Do, however, do these things in classes which inherit from this. 160 def _gradient_ratio_reference(self, model, params, max_gradient_norm):
162 sum_squared_norms = 0.0
165 model.param_to_grad[param]
167 model.param_to_grad[param],
169 )
else model.param_to_grad[param].values
171 val = workspace.FetchBlob(grad)
172 sum_squared_norms += np.power(np.linalg.norm(val), 2.0)
173 global_norm = np.sqrt(sum_squared_norms)
174 clip_norm = max_gradient_norm
175 norm_ratio = clip_norm / np.maximum(clip_norm, global_norm)
178 def test_global_norm_based_gradient_clipping(self):
179 max_gradient_norm = 1.0
180 model, perfect_model, data, label = self._createDense()
181 opt = self.build_optimizer(model, max_gradient_norm=max_gradient_norm)
184 for param
in model.GetParams(top_scope=
True):
185 if param
in model.param_to_grad:
187 model.param_to_grad[param],
192 workspace.FeedBlob(
'data', data[0])
193 workspace.FeedBlob(
'label', label[0])
194 workspace.RunNetOnce(model.param_init_net)
195 workspace.CreateNet(model.net,
True)
196 self.assertIsNotNone(opt._lr_multiplier)
199 idx = np.random.randint(data.shape[0])
200 workspace.FeedBlob(
'data', data[idx])
201 workspace.FeedBlob(
'label', label[idx])
202 workspace.RunNet(model.net.Proto().name)
209 norm_ratio = workspace.FetchBlob(
210 'norm_clipped_grad_update/norm_ratio')
211 np.testing.assert_almost_equal(norm_ratio, reference)
213 reference < 1.0,
"Bad test, gradient not being scaled." 216 def test_lr_injection(self):
217 model, perfect_model, data, label = self._createDense()
218 opt = self.build_optimizer(
219 model, max_gradient_norm=1, allow_lr_injection=
True 222 workspace.FeedBlob(
'data', data[0])
223 workspace.FeedBlob(
'label', label[0])
224 workspace.RunNetOnce(model.param_init_net)
225 workspace.CreateNet(model.net,
True)
228 self.assertIsNotNone(opt._lr_multiplier)
229 self.assertEqual(optimizer.get_lr_injection(), 1)
232 optimizer.set_lr_injection(0)
233 self.assertEqual(optimizer.get_lr_injection(), 0)
238 workspace.RunNet(model.net.Proto().name)
239 self.assertEqual(workspace.FetchBlob(
'lr_multiplier'), 0)
def _createDense(self, dtype=core.DataType.FLOAT)
def _gradient_ratio_reference(self, model, params, max_gradient_norm)