Caffe2 - Python API
A deep learning, cross platform ML framework
gradient_checker.py
1 ## @package gradient_checker
2 # Module caffe2.python.gradient_checker
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 import numpy as np
9 
10 from caffe2.python import core, workspace, net_drawer
11 from caffe2.proto import caffe2_pb2
12 
13 
14 def _get_grad_blob(grad_map, input_to_check):
15  grad_blob = grad_map[input_to_check]
16 
17  if isinstance(grad_blob, core.BlobReference):
18  return workspace.blobs[grad_blob]
19 
20  # If grad_blob is not a single blob, it should be a gradient slice.
21  # To make it comparable with the estimiated gradient which is dense,
22  # we need to first convert grad_blob to dense gradient.
23  assert isinstance(grad_blob, core.GradientSlice)
24  dense_grad = 'tmp_dense_grad'
25  sparse_to_dense_op = core.CreateOperator(
26  'SparseToDense',
27  [grad_blob.indices, grad_blob.values, input_to_check],
28  dense_grad,
29  )
30  workspace.RunOperatorOnce(sparse_to_dense_op)
31  return workspace.blobs[dense_grad]
32 
33 
34 def _get_grad(net, outputs, outputs_with_grad, input_values, inputs_with_grads):
35  grad_net = net.Clone(net.Name() + "_copy")
36  grad_map = grad_net.AddGradientOperators(outputs_with_grad)
37 
38  for name, value in (input_values or {}).items():
39  workspace.blobs[name] = value
40 
41  for input_to_check in inputs_with_grads:
42  assert input_to_check in grad_map, (
43  '{} has no gradient, cannot check net gradient.'.format(
44  input_to_check))
45  assert str(input_to_check) in workspace.blobs
46 
47  workspace.RunNetOnce(grad_net)
48  forward_results = [(output, workspace.blobs[output]) for output in outputs]
49  grads = {input_to_check: _get_grad_blob(grad_map, input_to_check)
50  for input_to_check in inputs_with_grads}
51 
52  return forward_results, grads, grad_net
53 
54 
55 def _assert_close(value1, value2, threshold, err_msg=''):
56  np.testing.assert_allclose(
57  value1, value2,
58  atol=threshold, rtol=threshold,
59  err_msg=err_msg,
60  )
61 
62  delta = np.abs(value1 - value2).flatten()
63  return np.mean(delta), max(delta)
64 
65 
66 class NetGradientChecker(object):
67  @staticmethod
68  def CompareNets(nets, outputs, outputs_with_grad_ids,
69  inputs_with_grads, input_values=None,
70  threshold=0.0000001, print_net_images=False):
71  def _get_output_with_grad_names(net_outputs):
72  return [net_outputs[i] for i in outputs_with_grad_ids]
73 
74  if print_net_images:
75  for i, net in enumerate(nets):
76  png = net_drawer.GetPydotGraph(net).create_png()
77  with open("caffe2_net_forward_" + str(i) + net.Name() + ".png",
78  'wb') \
79  as f:
80  f.write(png)
81 
82  results = [
83  _get_grad(net, net_outputs,
84  _get_output_with_grad_names(net_outputs),
85  input_values, inputs_with_grads)
86  for net, net_outputs in zip(nets, outputs)
87  ]
88 
89  if print_net_images:
90  _, _, backward_nets = zip(*results)
91  for i, net in enumerate(backward_nets):
92  png = net_drawer.GetPydotGraph(net).create_png()
93  with open("caffe2_net_" + str(i) + net.Name() + ".png", 'wb') \
94  as f:
95  f.write(png)
96 
97  first_net_results, first_net_grads, _ = results[0]
98  for net_results, net_grads, _ in results[1:]:
99  assert len(net_results) == len(first_net_results)
100  for idx, ((blob1, blob_value1), (blob2, blob_value2)) in enumerate(
101  zip(first_net_results, net_results)):
102  _assert_close(
103  blob_value1, blob_value2, threshold,
104  err_msg="Different forward pass results for output id {}. "
105  "Corresponding output blobs: {} and {}".format(
106  idx, blob1, blob2))
107 
108  assert net_grads.keys() == first_net_grads.keys()
109  for blob, blob_grad_value in net_grads.items():
110  _assert_close(
111  first_net_grads[blob], blob_grad_value, threshold,
112  err_msg="Different gradients for input {}".format(blob))
113 
114  @staticmethod
115  def Check(net, outputs_with_grad, input_values,
116  input_to_check, step_size=0.0001,
117  threshold=0.05, print_net=True):
118 
119  net_results, net_grads, full_net = _get_grad(
120  net, [], outputs_with_grad, input_values, [input_to_check])
121  analytic_grad = net_grads[input_to_check]
122 
123  def GetLoss(new_value):
124  workspace.blobs[input_to_check] = new_value
125  workspace.RunNetOnce(full_net)
126  return sum([
127  workspace.blobs[output]
128  for output in outputs_with_grad
129  ]).sum()
130 
131  def GetValue(dim, delta):
132  input_value = input_values[input_to_check].copy()
133  input_value.flat[dim] += delta
134  return input_value
135 
136  grad_estimate = np.zeros_like(input_values[input_to_check])
137  for dim in range(input_values[input_to_check].size):
138  pos_loss = GetLoss(GetValue(dim, step_size))
139  neg_loss = GetLoss(GetValue(dim, -step_size))
140  grad_estimate.flat[dim] = (pos_loss - neg_loss) / step_size / 2
141 
142  err_msg = "Error in gradient check for net_copy {}".format(
143  net.Name())
144  if print_net:
145  err_msg += ": {}".format(net.Proto())
146 
147  return _assert_close(analytic_grad, grad_estimate, threshold, err_msg)
148 
150  """A gradient checker in Python.
151 
152  This is not the most efficient way to check gradients, as the Python
153  interface will involve a lot of copies back and forth operations. Use at your
154  own risk.
155  """
156 
157  def __init__(
158  self,
159  stepsize,
160  threshold,
161  device_option=caffe2_pb2.DeviceOption(),
162  workspace_name="gradient_check"
163  ):
164  self._stepsize = stepsize
165  self._threshold = threshold
166  self._device_option = device_option
167  self._workspace_name = workspace_name
168 
169  def GetLossAndGrad(
170  self, op, grad_ops, x, input_name, grad_name, outputs_with_grads
171  ):
172  # First, feed in the current input. Note that we are not changing
173  # anything else, so we don't need to feed in others.
174  workspace.FeedBlob(input_name, x, self._device_option)
175  # Run.
176  workspace.RunOperatorOnce(op)
177  loss = 0.
178  # Get Loss and feed in the gradients, run gradient ops.
179  for idx in outputs_with_grads:
180  name = op.output[idx]
181  arr = workspace.FetchBlob(name)
182  loss += (arr**2).sum()
183  workspace.FeedBlob(name + '_grad', arr, self._device_option)
184  loss /= 2.
185  # Run gradient ops
186  workspace.RunOperatorsOnce(grad_ops)
187  # Get gradients
188  if isinstance(grad_name, core.GradientSlice):
189  workspace.FeedBlob('zeros', np.zeros_like(x, dtype=np.float32))
190  workspace.FeedBlob('ones', np.ones(1, dtype=np.float32))
191  gv_cpu_op = core.CreateOperator(
192  'EnsureCPUOutput', grad_name.values, grad_name.values + '_cpu',
193  device_option=self._device_option
194  )
195  gi_cpu_op = core.CreateOperator(
196  'EnsureCPUOutput', grad_name.indices, grad_name.indices + '_cpu',
197  device_option=self._device_option
198  )
199  sparse_to_dense_op = core.CreateOperator(
200  'ScatterWeightedSum',
201  [
202  'zeros', 'ones', grad_name.indices + '_cpu',
203  grad_name.values + '_cpu', 'ones'
204  ],
205  'zeros',
206  )
207  workspace.RunOperatorOnce(gv_cpu_op)
208  workspace.RunOperatorOnce(gi_cpu_op)
209  workspace.RunOperatorOnce(sparse_to_dense_op)
210  grad = workspace.FetchBlob('zeros')
211  else:
212  grad = workspace.FetchBlob(grad_name)
213  return loss, grad
214 
215  def CheckSimple(
216  self,
217  op,
218  inputs,
219  input_to_check,
220  outputs_with_grads,
221  grad_ops=None,
222  input_device_options=None
223  ):
224  """Checks the operator in a very simple fashion by stacking a sum of
225  squares on the top.
226 
227  Inputs:
228  op: the operator to be checked.
229  inputs: the input data in numpy arrays.
230  input_to_check: an index specifying which input blob we should
231  check.
232  outputs_with_grads: indices specifying which output blobs will we
233  need to check gradients with. For these outputs, we will collect a
234  squared sum and also feed in their gradients.
235  grad_operator: the gradient operator. If not given, we will get the
236  gradient operator from the gradient registry.
237  input_device_options: an optional mapping from input names to
238  DeviceOptions (to override the default DeviceOption)
239  Outputs:
240  boolean: True if it passes, False if it does not pass.
241  """
242  if input_device_options is None:
243  input_device_options = {}
244  # Entering the checker workspace
245  old_ws_name = workspace.CurrentWorkspace()
246  if self._workspace_name != old_ws_name:
247  workspace.SwitchWorkspace(self._workspace_name, True)
248 
249  op.device_option.CopyFrom(self._device_option)
250  if grad_ops is None:
251  # TODO(jiayq): use the gradient registration instead of the old
252  # hack.
253  grad_ops, g_input = core.GradientRegistry.GetGradientForOp(
254  op, [s + '_grad' for s in op.output])
255 
256  dims_to_check = inputs[input_to_check].size
257  # First, feed in the input.
258  for i, arr in enumerate(inputs):
259  workspace.FeedBlob(
260  op.input[i], arr,
261  input_device_options.get(
262  op.input[i], self._device_option))
263 
264  # Get the loss and gradient for the original.
265  input_name = op.input[input_to_check]
266  grad_name = g_input[input_to_check]
267  loss, grad = self.GetLossAndGrad(
268  op, grad_ops, inputs[input_to_check], input_name, grad_name,
269  outputs_with_grads
270  )
271  grad_estimate = np.zeros_like(inputs[input_to_check])
272  if grad_estimate.shape != grad.shape:
273  raise Exception(
274  "Mismatched gradient shapes: estimated ({}), grad ({})".format(
275  grad_estimate.shape, grad.shape))
276 
277  for current_dim in range(dims_to_check):
278  # Positive gradient
279  inputs[input_to_check].flat[current_dim] += self._stepsize
280  pos_loss, _ = self.GetLossAndGrad(
281  op, grad_ops, inputs[input_to_check], input_name,
282  grad_name, outputs_with_grads
283  )
284  # Negative gradient
285  inputs[input_to_check].flat[current_dim] -= self._stepsize * 2
286  neg_loss, _ = self.GetLossAndGrad(
287  op, grad_ops, inputs[input_to_check], input_name,
288  grad_name, outputs_with_grads
289  )
290  # Recover the value
291  inputs[input_to_check].flat[current_dim] += self._stepsize
292  grad_estimate.flat[current_dim] = (
293  pos_loss - neg_loss) / self._stepsize / 2
294  # Now, check correctness
295  fail_mat = ~np.isclose(
296  grad, grad_estimate, atol=self._threshold, rtol=self._threshold)
297  if np.any(fail_mat):
298  idx = np.flatnonzero(fail_mat)
299  print('Failed. [idx, grad, grad_estimate] are:')
300  print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T)
301  ret = False
302  else:
303  ret = True
304  # After finishing, cleaning up things.
305  if self._workspace_name != old_ws_name:
306  # We reset the workspace to make sure everything intermediate is
307  # cleaned up. Note that there is no need to delete a workspace -
308  # when empty it takes a very limited amount of memory.
309  workspace.ResetWorkspace()
310  workspace.SwitchWorkspace(old_ws_name)
311  return ret, grad, grad_estimate
def CheckSimple(self, op, inputs, input_to_check, outputs_with_grads, grad_ops=None, input_device_options=None)
def GetLossAndGrad(self, op, grad_ops, x, input_name, grad_name, outputs_with_grads)