3 from __future__
import absolute_import
4 from __future__
import division
5 from __future__
import print_function
6 from __future__
import unicode_literals
11 from caffe2.proto
import caffe2_pb2
14 def _get_grad_blob(grad_map, input_to_check):
15 grad_blob = grad_map[input_to_check]
17 if isinstance(grad_blob, core.BlobReference):
18 return workspace.blobs[grad_blob]
23 assert isinstance(grad_blob, core.GradientSlice)
24 dense_grad =
'tmp_dense_grad' 25 sparse_to_dense_op = core.CreateOperator(
27 [grad_blob.indices, grad_blob.values, input_to_check],
30 workspace.RunOperatorOnce(sparse_to_dense_op)
31 return workspace.blobs[dense_grad]
34 def _get_grad(net, outputs, outputs_with_grad, input_values, inputs_with_grads):
35 grad_net = net.Clone(net.Name() +
"_copy")
36 grad_map = grad_net.AddGradientOperators(outputs_with_grad)
38 for name, value
in (input_values
or {}).items():
39 workspace.blobs[name] = value
41 for input_to_check
in inputs_with_grads:
42 assert input_to_check
in grad_map, (
43 '{} has no gradient, cannot check net gradient.'.format(
45 assert str(input_to_check)
in workspace.blobs
47 workspace.RunNetOnce(grad_net)
48 forward_results = [(output, workspace.blobs[output])
for output
in outputs]
49 grads = {input_to_check: _get_grad_blob(grad_map, input_to_check)
50 for input_to_check
in inputs_with_grads}
52 return forward_results, grads, grad_net
55 def _assert_close(value1, value2, threshold, err_msg=''):
56 np.testing.assert_allclose(
58 atol=threshold, rtol=threshold,
62 delta = np.abs(value1 - value2).flatten()
63 return np.mean(delta), max(delta)
68 def CompareNets(nets, outputs, outputs_with_grad_ids,
69 inputs_with_grads, input_values=
None,
70 threshold=0.0000001, print_net_images=
False):
71 def _get_output_with_grad_names(net_outputs):
72 return [net_outputs[i]
for i
in outputs_with_grad_ids]
75 for i, net
in enumerate(nets):
76 png = net_drawer.GetPydotGraph(net).create_png()
77 with open(
"caffe2_net_forward_" + str(i) + net.Name() +
".png",
83 _get_grad(net, net_outputs,
84 _get_output_with_grad_names(net_outputs),
85 input_values, inputs_with_grads)
86 for net, net_outputs
in zip(nets, outputs)
90 _, _, backward_nets = zip(*results)
91 for i, net
in enumerate(backward_nets):
92 png = net_drawer.GetPydotGraph(net).create_png()
93 with open(
"caffe2_net_" + str(i) + net.Name() +
".png",
'wb') \
97 first_net_results, first_net_grads, _ = results[0]
98 for net_results, net_grads, _
in results[1:]:
99 assert len(net_results) == len(first_net_results)
100 for idx, ((blob1, blob_value1), (blob2, blob_value2))
in enumerate(
101 zip(first_net_results, net_results)):
103 blob_value1, blob_value2, threshold,
104 err_msg=
"Different forward pass results for output id {}. " 105 "Corresponding output blobs: {} and {}".format(
108 assert net_grads.keys() == first_net_grads.keys()
109 for blob, blob_grad_value
in net_grads.items():
111 first_net_grads[blob], blob_grad_value, threshold,
112 err_msg=
"Different gradients for input {}".format(blob))
115 def Check(net, outputs_with_grad, input_values,
116 input_to_check, step_size=0.0001,
117 threshold=0.05, print_net=
True):
119 net_results, net_grads, full_net = _get_grad(
120 net, [], outputs_with_grad, input_values, [input_to_check])
121 analytic_grad = net_grads[input_to_check]
123 def GetLoss(new_value):
124 workspace.blobs[input_to_check] = new_value
125 workspace.RunNetOnce(full_net)
127 workspace.blobs[output]
128 for output
in outputs_with_grad
131 def GetValue(dim, delta):
132 input_value = input_values[input_to_check].copy()
133 input_value.flat[dim] += delta
136 grad_estimate = np.zeros_like(input_values[input_to_check])
137 for dim
in range(input_values[input_to_check].size):
138 pos_loss = GetLoss(GetValue(dim, step_size))
139 neg_loss = GetLoss(GetValue(dim, -step_size))
140 grad_estimate.flat[dim] = (pos_loss - neg_loss) / step_size / 2
142 err_msg =
"Error in gradient check for net_copy {}".format(
145 err_msg +=
": {}".format(net.Proto())
147 return _assert_close(analytic_grad, grad_estimate, threshold, err_msg)
150 """A gradient checker in Python. 152 This is not the most efficient way to check gradients, as the Python 153 interface will involve a lot of copies back and forth operations. Use at your 161 device_option=caffe2_pb2.DeviceOption(),
162 workspace_name=
"gradient_check" 170 self, op, grad_ops, x, input_name, grad_name, outputs_with_grads
176 workspace.RunOperatorOnce(op)
179 for idx
in outputs_with_grads:
180 name = op.output[idx]
181 arr = workspace.FetchBlob(name)
182 loss += (arr**2).sum()
186 workspace.RunOperatorsOnce(grad_ops)
188 if isinstance(grad_name, core.GradientSlice):
189 workspace.FeedBlob(
'zeros', np.zeros_like(x, dtype=np.float32))
190 workspace.FeedBlob(
'ones', np.ones(1, dtype=np.float32))
191 gv_cpu_op = core.CreateOperator(
192 'EnsureCPUOutput', grad_name.values, grad_name.values +
'_cpu',
195 gi_cpu_op = core.CreateOperator(
196 'EnsureCPUOutput', grad_name.indices, grad_name.indices +
'_cpu',
199 sparse_to_dense_op = core.CreateOperator(
200 'ScatterWeightedSum',
202 'zeros',
'ones', grad_name.indices +
'_cpu',
203 grad_name.values +
'_cpu',
'ones' 207 workspace.RunOperatorOnce(gv_cpu_op)
208 workspace.RunOperatorOnce(gi_cpu_op)
209 workspace.RunOperatorOnce(sparse_to_dense_op)
210 grad = workspace.FetchBlob(
'zeros')
212 grad = workspace.FetchBlob(grad_name)
222 input_device_options=
None 224 """Checks the operator in a very simple fashion by stacking a sum of 228 op: the operator to be checked. 229 inputs: the input data in numpy arrays. 230 input_to_check: an index specifying which input blob we should 232 outputs_with_grads: indices specifying which output blobs will we 233 need to check gradients with. For these outputs, we will collect a 234 squared sum and also feed in their gradients. 235 grad_operator: the gradient operator. If not given, we will get the 236 gradient operator from the gradient registry. 237 input_device_options: an optional mapping from input names to 238 DeviceOptions (to override the default DeviceOption) 240 boolean: True if it passes, False if it does not pass. 242 if input_device_options
is None:
243 input_device_options = {}
245 old_ws_name = workspace.CurrentWorkspace()
253 grad_ops, g_input = core.GradientRegistry.GetGradientForOp(
254 op, [s +
'_grad' for s
in op.output])
256 dims_to_check = inputs[input_to_check].size
258 for i, arr
in enumerate(inputs):
261 input_device_options.get(
265 input_name = op.input[input_to_check]
266 grad_name = g_input[input_to_check]
268 op, grad_ops, inputs[input_to_check], input_name, grad_name,
271 grad_estimate = np.zeros_like(inputs[input_to_check])
272 if grad_estimate.shape != grad.shape:
274 "Mismatched gradient shapes: estimated ({}), grad ({})".format(
275 grad_estimate.shape, grad.shape))
277 for current_dim
in range(dims_to_check):
279 inputs[input_to_check].flat[current_dim] += self.
_stepsize 281 op, grad_ops, inputs[input_to_check], input_name,
282 grad_name, outputs_with_grads
285 inputs[input_to_check].flat[current_dim] -= self.
_stepsize * 2
287 op, grad_ops, inputs[input_to_check], input_name,
288 grad_name, outputs_with_grads
291 inputs[input_to_check].flat[current_dim] += self.
_stepsize 292 grad_estimate.flat[current_dim] = (
293 pos_loss - neg_loss) / self.
_stepsize / 2
295 fail_mat = ~np.isclose(
298 idx = np.flatnonzero(fail_mat)
299 print(
'Failed. [idx, grad, grad_estimate] are:')
300 print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T)
309 workspace.ResetWorkspace()
310 workspace.SwitchWorkspace(old_ws_name)
311 return ret, grad, grad_estimate
def CheckSimple(self, op, inputs, input_to_check, outputs_with_grads, grad_ops=None, input_device_options=None)
def GetLossAndGrad(self, op, grad_ops, x, input_name, grad_name, outputs_with_grads)