4 The Hypothesis library uses *property-based testing* to check 5 invariants about the code under test under a variety of random inputs. 7 The key idea here is to express properties of the code under test 8 (e.g. that it passes a gradient check, that it implements a reference 9 function, etc), and then generate random instances and verify they 10 satisfy these properties. 12 The main functions of interest are exposed on `HypothesisTestCase`. 13 You can usually just add a short function in this to generate an 14 arbitrary number of test cases for your operator. 16 The key functions are: 18 - `assertDeviceChecks(devices, op, inputs, outputs)`. This asserts that the 19 operator computes the same outputs, regardless of which device it is executed 21 - `assertGradientChecks(device, op, inputs, output_, 22 outputs_with_grads)`. This implements a standard numerical gradient checker 23 for the operator in question. 24 - `assertReferenceChecks(device, op, inputs, reference)`. This runs the 25 reference function (effectively calling `reference(*inputs)`, and comparing 26 that to the output of output. 28 `hypothesis_test_util.py` exposes some useful pre-built samplers. 30 - `hu.gcs` - a gradient checker device (`gc`) and device checker devices (`dc`) 32 - `hu.gcs_cpu_only` - a CPU-only gradient checker device (`gc`) and 33 device checker devices (`dc`). Used for when your operator is only 34 implemented on the CPU. 37 from __future__
import absolute_import
38 from __future__
import division
39 from __future__
import print_function
40 from __future__
import unicode_literals
41 from caffe2.proto
import caffe2_pb2
43 workspace, device_checker, gradient_checker, test_util, core)
48 import hypothesis.extra.numpy
49 import hypothesis.strategies
as st
56 if os.getenv(
'SANDCASTLE') ==
'1':
58 elif os.getenv(
'TW_JOB_USER') ==
'sandcastle':
64 return 'TRAVIS' in os.environ
67 hypothesis.settings.register_profile(
71 suppress_health_check=[hypothesis.HealthCheck.too_slow],
73 min_satisfying_examples=1,
75 verbosity=hypothesis.Verbosity.verbose))
77 hypothesis.settings.register_profile(
80 suppress_health_check=[hypothesis.HealthCheck.too_slow],
83 min_satisfying_examples=1,
84 verbosity=hypothesis.Verbosity.verbose))
85 hypothesis.settings.register_profile(
88 suppress_health_check=[hypothesis.HealthCheck.too_slow],
91 min_satisfying_examples=1,
92 verbosity=hypothesis.Verbosity.verbose))
93 hypothesis.settings.load_profile(
94 'sandcastle' if is_sandcastle()
else os.getenv(
'CAFFE2_HYPOTHESIS_PROFILE',
99 def dims(min_value=1, max_value=5):
100 return st.integers(min_value=min_value, max_value=max_value)
103 def elements_of_type(dtype=np.float32, filter_=None):
105 if dtype
in (np.float16, np.float32, np.float64):
106 elems = st.floats(min_value=-1.0, max_value=1.0)
107 elif dtype
is np.int32:
108 elems = st.integers(min_value=0, max_value=2 ** 31 - 1)
109 elif dtype
is np.int64:
110 elems = st.integers(min_value=0, max_value=2 ** 63 - 1)
111 elif dtype
is np.bool:
112 elems = st.booleans()
114 raise ValueError(
"Unexpected dtype without elements provided")
115 return elems
if filter_
is None else elems.filter(filter_)
118 def arrays(dims, dtype=np.float32, elements=None):
120 elements = elements_of_type(dtype)
121 return hypothesis.extra.numpy.arrays(
128 def tensor(min_dim=1,
133 dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
134 return dims_.flatmap(
135 lambda dims: arrays(dims, dtype, elements))
138 def tensor1d(min_len=1, max_len=64, dtype=np.float32, elements=None):
139 return tensor(1, 1, dtype, elements, min_value=min_len, max_value=max_len)
142 def segment_ids(size, is_sorted):
144 return st.just(np.empty(shape=[0], dtype=np.int32))
149 elements=st.booleans()).map(
150 lambda x: np.cumsum(x, dtype=np.int32) - x[0])
155 elements=st.integers(min_value=0, max_value=2 * size))
158 def lengths(size, min_segments=None, max_segments=None, **kwargs):
163 if min_segments
is None:
165 if max_segments
is None:
167 assert min_segments >= 0
168 assert min_segments <= max_segments
169 if size == 0
and max_segments == 0:
170 return st.just(np.empty(shape=[0], dtype=np.int32))
171 assert max_segments > 0,
"size is not 0, need at least one segment" 173 min_value=max(min_segments - 1, 0), max_value=max_segments - 1
176 hypothesis.extra.numpy.arrays(
177 np.int32, num_borders, elements=st.integers(
178 min_value=0, max_value=size
182 lambda x: np.append(x, np.array([0, size], dtype=np.int32))
183 ).map(sorted).map(np.diff)
186 def segmented_tensor(
192 segment_generator=segment_ids,
196 gen_empty = st.booleans()
if allow_empty
else st.just(
False)
197 data_dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
198 data_dims_ = st.tuples(
199 gen_empty, data_dims_
200 ).map(
lambda pair: ([0]
if pair[0]
else []) + pair[1])
201 return data_dims_.flatmap(
lambda data_dims: st.tuples(
202 arrays(data_dims, dtype, elements),
203 segment_generator(data_dims[0], is_sorted=is_sorted),
207 def lengths_tensor(min_segments=None, max_segments=None, *args, **kwargs):
208 gen = functools.partial(
209 lengths, min_segments=min_segments, max_segments=max_segments)
210 return segmented_tensor(*args, segment_generator=gen, **kwargs)
213 def sparse_segmented_tensor(min_dim=1, max_dim=4, dtype=np.float32,
214 is_sorted=
True, elements=
None, allow_empty=
False,
215 segment_generator=segment_ids, itype=np.int64,
217 gen_empty = st.booleans()
if allow_empty
else st.just(
False)
218 data_dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
219 all_dims_ = st.tuples(gen_empty, data_dims_).flatmap(
220 lambda pair: st.tuples(
222 (st.integers(min_value=1, max_value=pair[1][0])
if not pair[0]
225 return all_dims_.flatmap(
lambda dims: st.tuples(
226 arrays(dims[0], dtype, elements),
227 arrays(dims[1], dtype=itype, elements=st.integers(
228 min_value=0, max_value=dims[0][0] - 1)),
229 segment_generator(dims[1], is_sorted=is_sorted),
233 def sparse_lengths_tensor(**kwargs):
234 return sparse_segmented_tensor(segment_generator=lengths, **kwargs)
237 def tensors(n, min_dim=1, max_dim=4, dtype=np.float32, elements=None, **kwargs):
238 dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
239 return dims_.flatmap(
240 lambda dims: st.lists(
241 arrays(dims, dtype, elements),
246 def tensors1d(n, min_len=1, max_len=64, dtype=np.float32, elements=None):
248 n, 1, 1, dtype, elements, min_value=min_len, max_value=max_len
252 cpu_do = caffe2_pb2.DeviceOption()
253 gpu_do = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA)
254 device_options = [cpu_do] + ([gpu_do]
if workspace.has_gpu_support
else [])
256 expanded_device_options = [cpu_do] + (
257 [caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, cuda_gpu_id=i)
258 for i
in range(workspace.NumCudaDevices())]
259 if workspace.has_gpu_support
else [])
262 def device_checker_device_options():
263 return st.just(device_options)
266 def gradient_checker_device_option():
267 return st.sampled_from(device_options)
271 gc=gradient_checker_device_option(),
272 dc=device_checker_device_options()
275 gcs_cpu_only = dict(gc=st.sampled_from([cpu_do]), dc=st.just([cpu_do]))
276 gcs_gpu_only = dict(gc=st.sampled_from([gpu_do]), dc=st.just([gpu_do]))
279 @contextlib.contextmanager
280 def temp_workspace(name=b"temp_ws"):
281 old_ws_name = workspace.CurrentWorkspace()
282 workspace.SwitchWorkspace(name,
True)
284 workspace.ResetWorkspace()
285 workspace.SwitchWorkspace(old_ws_name)
292 input_device_options=
None,
295 if input_device_options
is None:
296 input_device_options = {}
297 op = copy.deepcopy(op)
298 op.device_option.CopyFrom(device_option)
299 net = caffe2_pb2.NetDef()
301 net.name = op.name
if op.name
else "test" 303 with temp_workspace():
304 for (n, b)
in zip(op.input, inputs):
308 device_option=input_device_options.get(n, device_option)
310 workspace.CreateNet(net)
311 ret = workspace.BenchmarkNet(net.name, 1, iterations,
True)
317 A unittest.TestCase subclass with some helper functions for 318 utilizing the `hypothesis` (hypothesis.readthedocs.io) library. 326 input_device_options=
None,
330 Asserts that the operator computes the same outputs, regardless of 331 which device it is executed on. 333 Useful for checking the consistency of GPU and CPU 334 implementations of operators. 338 @given(inputs=hu.tensors(n=2), in_place=st.booleans(), **hu.gcs) 339 def test_sum(self, inputs, in_place, gc, dc): 340 op = core.CreateOperator("Sum", ["X1", "X2"], 341 ["Y" if not in_place else "X1"]) 343 self.assertDeviceChecks(dc, op, [X1, X2], [0]) 347 device_options=device_options
350 dc.CheckSimple(op, inputs, outputs_to_check, input_device_options)
363 input_device_options=
None,
366 Implements a standard numerical gradient checker for the operator 369 Useful for checking the consistency of the forward and 370 backward implementations of operators. 374 @given(inputs=hu.tensors(n=2), in_place=st.booleans(), **hu.gcs) 375 def test_sum(self, inputs, in_place, gc, dc): 376 op = core.CreateOperator("Sum", ["X1", "X2"], 377 ["Y" if not in_place else "X1"]) 379 self.assertGradientChecks(gc, op, [X1, X2], 0, [0]) 384 device_option=device_option,
385 workspace_name=str(device_option),
387 res, grad, grad_estimated = gc.CheckSimple(
388 op, inputs, outputs_to_check, outputs_with_grads,
390 input_device_options=input_device_options
392 self.assertEqual(grad.shape, grad_estimated.shape)
395 "Gradient check failed for input " + str(op.input[outputs_to_check])
398 def _assertGradReferenceChecks(
407 grad_blob_name = output_to_grad +
'_grad' 408 grad_ops, grad_map = core.GradientRegistry.GetBackwardPass(
409 [op], {output_to_grad: grad_blob_name})
410 output_grad = workspace.FetchBlob(output_to_grad)
411 grad_ref_outputs = grad_reference(output_grad, ref_outputs, inputs)
412 workspace.FeedBlob(grad_blob_name, workspace.FetchBlob(output_to_grad))
413 workspace.RunOperatorsOnce(grad_ops)
415 self.assertEqual(len(grad_ref_outputs), len(inputs))
416 for (n, ref)
in zip(op.input, grad_ref_outputs):
417 grad_names = grad_map.get(n)
420 self.assertIsNone(ref)
426 val_name = grad_names
429 ref_vals, ref_indices = ref
430 val_name = grad_names.values
431 vals = workspace.FetchBlob(str(val_name))
432 np.testing.assert_allclose(
437 err_msg=
'Gradient {0} (x) is not matching the reference (y)' 440 if ref_indices
is not None:
441 indices = workspace.FetchBlob(str(grad_names.indices))
442 np.testing.assert_allclose(indices, ref_indices,
443 atol=1e-4, rtol=1e-4)
445 def _assertInferTensorChecks(self, name, shapes, types, output):
446 if name
not in shapes:
449 output = workspace.FetchBlob(name)
450 if type(output)
is np.ndarray:
451 if output.dtype == np.dtype(
'float64'):
452 correct_type = caffe2_pb2.TensorProto.DOUBLE
453 elif output.dtype == np.dtype(
'float32'):
454 correct_type = caffe2_pb2.TensorProto.FLOAT
455 elif output.dtype == np.dtype(
'int32'):
456 correct_type = caffe2_pb2.TensorProto.INT32
457 elif output.dtype == np.dtype(
'int64'):
458 correct_type = caffe2_pb2.TensorProto.INT64
460 correct_type =
"unknown {}".format(np.dtype)
462 correct_type = str(type(output))
464 np.testing.assert_array_equal(
465 np.array(shapes[name]).astype(np.int32),
466 np.array(output.shape).astype(np.int32),
467 err_msg=
'Shape {} mismatch: {} vs. {}'.format(
472 if correct_type != caffe2_pb2.TensorProto.INT32:
474 np.testing.assert_equal(
477 err_msg=
'Type {} mismatch: {} vs. {}'.format(
478 name, types[name], correct_type,
481 except AssertionError
as e:
484 logging.warning(str(e))
485 if os.getenv(
'CAFFE2_ASSERT_SHAPEINFERENCE') ==
'1':
494 input_device_options=
None,
499 outputs_to_check=
None,
502 This runs the reference Python function implementation 503 (effectively calling `reference(*inputs)`, and compares that 504 to the output of output, with an absolute/relative tolerance 505 given by the `threshold` parameter. 507 Useful for checking the implementation matches the Python 508 (typically NumPy) implementation of the same functionality. 512 @given(X=hu.tensor(), inplace=st.booleans(), **hu.gcs) 513 def test_softsign(self, X, inplace, gc, dc): 514 op = core.CreateOperator( 515 "Softsign", ["X"], ["X" if inplace else "Y"]) 518 return (X / (1 + np.abs(X)),) 520 self.assertReferenceChecks(gc, op, [X], softsign) 522 if input_device_options
is None:
523 input_device_options = {}
525 op = copy.deepcopy(op)
526 op.device_option.CopyFrom(device_option)
528 with temp_workspace():
529 if (len(op.input) > len(inputs)):
531 'must supply an input for each input on the op: %s vs %s' %
533 for (n, b)
in zip(op.input, inputs):
537 device_option=input_device_options.get(n, device_option)
540 net.Proto().op.extend([op])
541 test_shape_inference =
False 543 (shapes, types) = workspace.InferShapesAndTypes([net])
544 test_shape_inference =
True 545 except RuntimeError
as e:
548 logging.warning(str(e))
549 if os.getenv(
'CAFFE2_ASSERT_SHAPEINFERENCE') ==
'1':
551 workspace.RunNetOnce(net)
552 reference_outputs = reference(*inputs)
553 if not (isinstance(reference_outputs, tuple)
or 554 isinstance(reference_outputs, list)):
556 "You are providing a wrong reference implementation. A " 557 "proper one should return a tuple/list of numpy arrays.")
558 if not outputs_to_check:
559 self.assertEqual(len(reference_outputs), len(op.output))
560 outputs_to_check = list(range(len(op.output)))
562 for (output_index, ref)
in zip(outputs_to_check, reference_outputs):
563 output_blob_name = op.output[output_index]
564 output = workspace.FetchBlob(output_blob_name)
565 if output.dtype.kind
in (
'S',
'O'):
566 np.testing.assert_array_equal(output, ref)
570 np.testing.assert_allclose(
571 output, ref, atol=atol, rtol=threshold,
573 'Output {0} is not matching the reference'.format(
577 if test_shape_inference:
579 output_blob_name, shapes, types, output)
581 if grad_reference
is not None:
582 assert output_to_grad
is not None, \
583 "If grad_reference is set," \
584 "output_to_grad has to be set as well" 586 with core.DeviceScope(device_option):
588 op, inputs, reference_outputs,
589 output_to_grad, grad_reference,
593 def assertValidationChecks(
599 input_device_options=
None,
603 if input_device_options
is None:
604 input_device_options = {}
606 assert len(set(list(op.input) + list(op.output))) == \
607 len(op.input) + len(op.output), \
608 "in-place ops are not supported in as_kwargs mode" 609 op = copy.deepcopy(op)
610 op.device_option.CopyFrom(device_option)
612 with temp_workspace():
613 for (n, b)
in zip(op.input, inputs):
617 device_option=input_device_options.get(n, device_option)
620 workspace.RunNetOnce(init_net)
621 workspace.RunOperatorOnce(op)
622 outputs = [workspace.FetchBlob(n)
for n
in op.output]
624 validator(**dict(zip(
625 list(op.input) + list(op.output), inputs + outputs)))
627 validator(inputs=inputs, outputs=outputs)
629 def assertRunOpRaises(
634 input_device_options=
None,
635 exception=(Exception,),
638 if input_device_options
is None:
639 input_device_options = {}
641 op = copy.deepcopy(op)
642 op.device_option.CopyFrom(device_option)
644 with temp_workspace():
645 for (n, b)
in zip(op.input, inputs):
649 device_option=input_device_options.get(n, device_option)
652 self.assertRaises(exception, workspace.RunOperatorOnce, op)
654 self.assertRaisesRegexp(
655 exception, regexp, workspace.RunOperatorOnce, op)
def _assertInferTensorChecks(self, name, shapes, types, output)
def _assertGradReferenceChecks(self, op, inputs, ref_outputs, output_to_grad, grad_reference, threshold=1e-4)
def assertReferenceChecks(self, device_option, op, inputs, reference, input_device_options=None, threshold=1e-4, output_to_grad=None, grad_reference=None, atol=None, outputs_to_check=None)
def assertDeviceChecks(self, device_options, op, inputs, outputs_to_check, input_device_options=None, threshold=0.01)
def assertGradientChecks(self, device_option, op, inputs, outputs_to_check, outputs_with_grads, grad_ops=None, threshold=0.005, stepsize=0.05, input_device_options=None)