Caffe2 - Python API
A deep learning, cross platform ML framework
model_helper.py
1 ## @package model_helper
2 # Module caffe2.python.model_helper
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 from caffe2.python import core, scope, workspace, helpers
9 from caffe2.python.modeling import parameter_info
11  parameter_sharing_context,
12 )
14  OptimizerContext,
15  DEFAULT_OPTIM,
16 )
17 from caffe2.python.regularizer_context import RegularizerContext
18 
19 from future.utils import viewitems, viewkeys
20 from itertools import chain
21 
22 import logging
23 import six
24 
25 
26 # _known_working_ops are operators that do not need special care.
27 _known_working_ops = [
28  "Accuracy",
29  "Adam",
30  "Add",
31  "Adagrad",
32  "SparseAdagrad",
33  "AveragedLoss",
34  "Cast",
35  "Checkpoint",
36  "ConstantFill",
37  "Copy",
38  "CopyGPUToCPU",
39  "CopyCPUToGPU",
40  "DequeueBlobs",
41  "EnsureCPUOutput",
42  "ExpandDims",
43  "Flatten",
44  "FlattenToVec",
45  "LabelCrossEntropy",
46  "LearningRate",
47  "MakeTwoClass",
48  "MatMul",
49  "NCCLAllreduce",
50  "NHWC2NCHW",
51  "PackSegments",
52  "Print",
53  "PRelu",
54  "ReduceFrontSum",
55  "Scale",
56  "ScatterWeightedSum",
57  "Sigmoid",
58  "SortedSegmentSum",
59  "Snapshot", # Note: snapshot is deprecated, use Checkpoint
60  "Softmax",
61  "SoftmaxWithLoss",
62  "SquaredL2Distance",
63  "Squeeze",
64  "StopGradient",
65  "Summarize",
66  "Tanh",
67  "Transpose",
68  "UnpackSegments",
69  "WeightedSum",
70  "YellowFin"
71 ]
72 
73 
74 class ModelHelper(object):
75  """A helper model so we can manange models more easily. It contains net def
76  and parameter storages. You can add an Operator yourself, e.g.
77 
78  model = model_helper.ModelHelper(name="train_net")
79  # init your weight and bias as w and b
80  w = model.param_init_net.XavierFill(...)
81  b = model.param_init_net.ConstantFill(...)
82  fc1 = model.FC([input, w, b], output, **kwargs)
83 
84  or you can use helper functions in brew module without manually
85  defining parameter initializations and operators.
86 
87  model = model_helper.ModelHelper(name="train_net")
88  fc1 = brew.fc(model, input, output, dim_in, dim_out, **kwargs)
89 
90  """
91 
92  def __init__(self, name=None, init_params=True, allow_not_known_ops=True,
93  skip_sparse_optim=False, param_model=None, arg_scope=None):
94  self.name = name or "model"
95  self.net = core.Net(self.name)
96 
97  if param_model is not None:
98  self.param_init_net = param_model.param_init_net
99  self.param_to_grad = param_model.param_to_grad
100  self.params = param_model.params
101  self._parameters_info = param_model._parameters_info
102  self._computed_params = param_model._computed_params
103  else:
104  self.param_init_net = core.Net(self.name + '_init')
105  self.param_to_grad = {}
106  self.params = []
107  self._parameters_info = {}
108  self._computed_params = []
109 
110  self._param_info_deprecated = []
111  self._devices = []
112  self.gradient_ops_added = False
113  self.init_params = init_params
114  self.allow_not_known_ops = allow_not_known_ops
115  self.skip_sparse_optim = skip_sparse_optim
116  self.weights = []
117  self.biases = []
118  self._arg_scope = {
119  'order': "NCHW",
120  'use_cudnn': True,
121  'cudnn_exhaustive_search': False,
122  }
123  if arg_scope is not None:
124  # Please notice value as None is not acceptable. We are not checking it
125  # here because we already have check in MakeArgument.
126  self._arg_scope.update(arg_scope)
127 
128  @property
129  def arg_scope(self):
130  return self._arg_scope
131 
132  def get_name(self):
133  return self.name
134 
135  def _infer_param_shape(self, param):
136  for op in self.param_init_net.Proto().op:
137  if str(param) in op.output:
138  for arg in op.arg:
139  if arg.name == "shape":
140  return list(arg.ints)
141  return None
142 
143  def _update_param_info_deprecated(self):
144  assert len(self._param_info_deprecated) <= len(self.params)
145  for param in self.params[len(self._param_info_deprecated):]:
146  if not isinstance(param, core.BlobReference):
147  raise ValueError(
148  "Param %s must be a BlobReference!" % str(param))
149  self._param_info_deprecated.append(parameter_info.ParameterInfo(
150  param_id=len(self._param_info_deprecated),
151  param=param,
152  shape=self._infer_param_shape(param)))
153  for info in self._param_info_deprecated:
154  info.grad = self.param_to_grad.get(info.name)
155 
156  def _normalize_tags(self, tags):
157  tags = tags or []
158  return set(tags) if isinstance(tags, list) else set([tags])
159 
160  def create_param(self, param_name, shape, initializer, tags=None):
161  """
162  Creates parameter with a given name and initializer.
163 
164  If param_name is instance of BlobRefernce - then this blob will be used
165  to store parameter (no any logic will affect it's location).
166 
167  If param_name is instance of a string type, then the final blob will
168  be created in the CurrentNameScope with the respect of all parameter
169  sharing logic, i.e. 'resolved_name_scope/param_name'.
170 
171  Parameter sharing logic is going to override CurrentNameScope accoring
172  to the rules that are specified through ParameterSharing contexts,
173  all ParameterSharing contexts are applied recursively until there are no
174  extra overrides present, where on each step the best match will be
175  applied first.
176 
177  The following examples should clarify the way ParameterSharing logic
178  works:
179 
180  As an example if this function is called with parameter 'w':
181  a. Call from some scope 'global_scope' with no Parameter sharing:
182  'global_scope/w'
183  b. Call from scope 'scope_b', with override {'scope_b': 'scope_a'}:
184  'scope_a/w'
185  c. Call from scope 'scope_a', with override {'scope_a': ''}:
186  'scope_a/w'
187  d. Call from scope 'scope_b/shared', with overrides
188  {'scope_b/shared': 'scope_b', 'scope_b': 'scope_a'}:
189  'scope_a/w'
190  d. Call from scope 'scope_b/unshared', with overrides
191  {'scope_b/shared': 'scope_b', 'scope_b': 'scope_a'}:
192  'scope_a/unshared/w'
193  """
194  # ParameterSharing works only for case when param_name is instance of
195  # a string type. If param_name is a BlobReference - no attempt for
196  # ParameterSharing will be applied.
197  if isinstance(param_name, core.BlobReference):
198  param_name = str(param_name)
199  elif isinstance(param_name, six.string_types):
200  # Parameter name will be equal to current Namescope that got
201  # resolved with the respect of parameter sharing of the scopes.
202  param_name = parameter_sharing_context.get_parameter_name(
203  param_name)
204  else:
205  raise "Unsupported type for param_name"
206 
207  if param_name in self._parameters_info:
208  assert self._parameters_info[param_name].shape == shape
209  return self._parameters_info[param_name].blob
210 
211  param_info = initializer.create_param(
212  param_name=core.BlobReference(param_name),
213  init_net=self.param_init_net,
214  shape=shape,
215  )
216  optim_context = OptimizerContext.current()
217  for tag in self._normalize_tags(tags):
218  if optim_context.has_optimizer(tag):
219  # param_info will check optimizer has not been set
220  param_info.optimizer = optim_context.get_optimizer(tag)
221  if not param_info.optimizer and optim_context.has_optimizer(DEFAULT_OPTIM):
222  param_info.optimizer = optim_context.get_optimizer(DEFAULT_OPTIM)
223 
224  reg_context = RegularizerContext.current()
225  param_info.regularizer = reg_context
226 
227  self._parameters_info[param_name] = param_info
228  # Add param to legacy structs as well, so all other functions for
229  # parameters are still working.
230  self.AddParameter(param_info.blob, tags)
231  return param_info.blob
232 
233  def get_param_info(self, param):
234  assert isinstance(param, core.BlobReference), \
235  "Param {} is not a BlobReference".format(param)
236  return self._parameters_info.get(param, None)
237 
238  # This method is deprecated, use create_param method which
239  # also does parameter initialization when needed
240  def add_param_DEPRECATED(self, param, key=None, shape=None, length=None):
241  logging.warning("add_param method is DEPRECATED")
243  self.AddParameter(param)
244  if key is not None and self.net.input_record() is not None:
245  idx = self.net.input_record().field_blobs().index(key)
246  key = self.net.input_record().field_names()[idx]
247  shape = shape if shape is not None else self._infer_param_shape(param)
248  if not isinstance(param, core.BlobReference):
249  raise ValueError("Param %s must be a BlobReference!" % str(param))
250  self._param_info_deprecated.append(parameter_info.ParameterInfo(
251  param_id=len(self._param_info_deprecated),
252  param=param,
253  shape=shape,
254  key=key,
255  length=length,
256  ))
257  return self._param_info_deprecated[-1]
258 
259  # This method is deprecated, use get_param_info method
260  def param_info(self, grad_type=None, id=None):
261  logging.info("param_info method is DEPRECATED")
263  if id is not None:
264  assert grad_type is None
265  info = self._param_info_deprecated[id]
266  assert info.param_id == id
267  return info
268  elif grad_type is not None:
269  return [
270  info for info in self._param_info_deprecated
271  if info.grad_type() == grad_type]
272  else:
273  return self._param_info_deprecated
274 
275  def AddParameter(self, param, tags=None):
276  assert isinstance(param, core.BlobReference)
277  tags = self._normalize_tags(tags)
278  if parameter_info.ParameterTags.COMPUTED_PARAM in tags:
279  self._computed_params.append(param)
280  else:
281  self.params.append(param)
282 
283  if parameter_info.ParameterTags.WEIGHT in tags:
284  self.weights.append(param)
285  if parameter_info.ParameterTags.BIAS in tags:
286  self.biases.append(param)
287 
288  @staticmethod
289  def _NormalizeNamescope(namescope):
290  if namescope is None:
291  return scope.CurrentNameScope()
292  elif namescope == '' or namescope.endswith(scope._NAMESCOPE_SEPARATOR):
293  return namescope
294  else:
295  return namescope + scope._NAMESCOPE_SEPARATOR
296 
297  def GetParams(self, namescope=None, top_scope=False):
298  '''
299  Returns the params in current namescope
300  '''
301  namescope = ModelHelper._NormalizeNamescope(namescope)
302 
303  if namescope == '':
304  return self.params[:]
305  elif top_scope:
306  return [
307  p for p in self.params
308  if p.GetNameScope().startswith(namescope)
309  ]
310  else:
311  return [p for p in self.params if
312  p.GetNameScope().startswith(namescope)]
313 
314  def Proto(self):
315  return self.net.Proto()
316 
317  def InitProto(self):
318  return self.param_init_net.Proto()
319 
320  def RunAllOnGPU(self, *args, **kwargs):
321  self.param_init_net.RunAllOnGPU(*args, **kwargs)
322  self.net.RunAllOnGPU(*args, **kwargs)
323 
324  def CreateDB(self, blob_out, db, db_type, **kwargs):
325  dbreader = self.param_init_net.CreateDB(
326  [], blob_out, db=db, db_type=db_type, **kwargs)
327  return dbreader
328 
329  def AddGradientOperators(self, *args, **kwargs):
330  if self.gradient_ops_added:
331  raise RuntimeError("You cannot run AddGradientOperators twice.")
332  self.Validate()
333 
334  self.gradient_ops_added = True
335  self.grad_map = self.net.AddGradientOperators(*args, **kwargs)
336  self.param_to_grad = self.get_param_to_grad(self.params)
337 
338  # Populate ParameterInfo for all parameters if missing
339  # and add gradient blob information. So optimizers can use it
340  for param, grad in self.param_to_grad.items():
341  param_info = self.get_param_info(param)
342  if param_info:
343  param_info.grad = grad
344  else:
345  self._parameters_info[param] = parameter_info.ParameterInfo(
346  param_id=None,
347  param=param,
348  grad=grad,
349  )
350 
351  return self.grad_map
352 
353  def get_param_to_grad(self, params):
354  '''
355  Given a list of parameters returns a dict from a parameter
356  to a corresponding gradient
357  '''
358 
359  param_to_grad = {}
360  if not self.gradient_ops_added:
361  raise RuntimeError("You need to run AddGradientOperators first.")
362  # We need to use empty namescope when creating the gradients
363  # to prevent duplicating the namescope prefix for gradient blobs.
364  for p in params:
365  if str(p) in self.grad_map:
366  param_to_grad[p] = self.grad_map[str(p)]
367  return param_to_grad
368 
369  def GetOptimizationParamInfo(self, params=None):
370  '''
371  Returns a map for param => grad.
372  If params is not specified, all parameters will be considered.
373  '''
374  if not self.gradient_ops_added:
375  raise RuntimeError("Need to call AddGradientOperators first")
376 
377  param_to_grad = self.param_to_grad
378  if params:
379  param_to_grad = self.get_param_to_grad(params)
380 
381  return [
382  self.get_param_info(param) for param, grad in viewitems(param_to_grad)
383  if (
384  not self.skip_sparse_optim or
385  not isinstance(grad, core.GradientSlice)
386  )
387  ]
388 
389  def _Validate(self):
390  '''
391  Check for duplicate params
392  '''
393  params_list = [str(p) for p in self.params]
394  params_set = set(params_list)
395 
396  dupes = []
397  if len(params_set) != len(params_list):
398  params_list = sorted(params_list)
399  for j, p in enumerate(params_list):
400  if j > 0 and params_list[j - 1] == p:
401  if p not in dupes:
402  dupes.append(p)
403 
404  return dupes
405 
406  def Validate(self):
407  dupes = self._Validate()
408  assert dupes == [], "Duplicate params: {}".format(dupes)
409 
410  def GetComputedParams(self, namescope=None):
411  '''
412  Returns the computed params in current namescope. 'Computed params'
413  are such parameters that are not optimized via gradient descent but are
414  directly computed from data, such as the running mean and variance
415  of Spatial Batch Normalization.
416  '''
417  namescope = ModelHelper._NormalizeNamescope(namescope)
418 
419  if namescope == '':
420  return self._computed_params[:]
421  else:
422  return [p for p in self._computed_params
423  if p.GetNameScope().startswith(namescope)]
424 
425  def GetAllParams(self, namescope=None):
426  return self.GetParams(namescope) + self.GetComputedParams(namescope)
427 
429  self, unused_blob_in, blob_out, batch_size, db, db_type, **kwargs
430  ):
431  """TensorProtosDBInput."""
432  assert len(unused_blob_in) == 0, \
433  """You cannot pass reader to model_helper.TensorProtosDBInput.
434  Use model.net.TensorProtosDBInput instead to create the op."""
435 
436  return helpers.db_input.db_input(
437  self, blob_out, batch_size, db, db_type, **kwargs)
438 
439  def GetDevices(self):
440  assert len(self._devices) > 0, \
441  "Use data_parallel_model to run model on multiple GPUs."
442  return self._devices
443 
444  def __getattr__(self, op_type):
445  """Catch-all for all other operators, mostly those without params."""
446  if op_type.startswith('__'):
447  raise AttributeError(op_type)
448 
449  if not core.IsOperator(op_type):
450  raise AttributeError(
451  'Method ' + op_type + ' is not a registered operator.' +
452  ' Did you mean: [' +
453  ','.join(workspace.C.nearby_opnames(op_type)) + ']'
454  )
455  if op_type not in _known_working_ops:
456  if not self.allow_not_known_ops:
457  raise AttributeError(
458  "Operator {} is not known to be safe".format(op_type))
459 
460  logging.warning("You are creating an op that the ModelHelper "
461  "does not recognize: {}.".format(op_type))
462  return self.net.__getattr__(op_type)
463 
464  def __dir__(self):
465  return sorted(set(chain(
466  dir(type(self)),
467  viewkeys(self.__dict__),
468  _known_working_ops
469  )))
470 
471 
472 def ExtractPredictorNet(
473  net_proto,
474  input_blobs,
475  output_blobs,
476  device=None,
477  renames=None,
478  disabled_inputs=None,
479 ):
480  '''
481  Takes a model net for training and returns a net which can be
482  used for prediction. For example, all gradient operators and
483  input operators are removed.
484  @param net_proto protobuf of the net you want to process (net.Proto())
485  @param input_blobs list/set of blob names that are the inputs of predictor
486  @param output_blobs list/set of blob names that are outputs of predictor
487  @param device optional device option that is assigned
488  @param renames dictionary of blob name to a new name (optional)
489  @param disabled_inputs optional set of blobs that are 'switched off'. This
490  will cause branches with those blobs as inputs to be removed
491  '''
492  predict_net = core.Net(net_proto.name + "_predict")
493  predict_proto = predict_net.Proto()
494 
495  orig_external_inputs = set(net_proto.external_input)
496  orig_external_outputs = set(net_proto.external_output)
497  input_blobs = {str(b) for b in input_blobs}
498  known_blobs = set(orig_external_inputs).union(input_blobs)
499  output_blobs = {str(b) for b in output_blobs}
500  external_inputs = set(input_blobs)
501  external_outputs = set(output_blobs)
502 
503  if renames is None:
504  renames = {}
505 
506  if disabled_inputs is not None:
507  known_blobs = known_blobs - set(disabled_inputs)
508 
509  ops = list(net_proto.op)
510 
511  # Find the range of ops that we should include
512  try:
513  first_op_with_input = min(
514  [
515  j for j in range(len(ops))
516  if input_blobs.intersection(ops[j].input) and ops[j].type !=
517  'StopGradient'
518  ]
519  )
520  except ValueError:
521  raise Exception("No ops with input={}".format(input_blobs))
522  try:
523  last_op_with_output = max(
524  [
525  j for j in range(len(ops))
526  if output_blobs.intersection(ops[j].output)
527  ]
528  )
529  except ValueError:
530  raise Exception("No ops with output={}".format(output_blobs))
531 
532  def validate_op(op):
533  # Check that the op does not have is_test = 0 set. This is a common
534  # pitfall with SpatialBN op, at lest.
535  for arg in op.arg:
536  if arg.name == "is_test" and arg.i == 0:
537  raise Exception(
538  "An operator had is_test=0, did you try to extract a " +
539  "predictor from a train model (instead of test model)?" +
540  " Op was: {}".format(str(op))
541  )
542 
543  def rename_list(proto_list):
544  # proto lists don't support assignments
545  new_list = proto_list[:]
546  for j, b in enumerate(new_list):
547  if b in renames:
548  new_list[j] = renames[b]
549 
550  del proto_list[:]
551  proto_list.extend(new_list)
552 
553  # Iterate through the ops and only include those whose inputs
554  # we can satisfy.
555  for op in ops[first_op_with_input:(last_op_with_output + 1)]:
556  if known_blobs.issuperset(op.input):
557 
558  # Special handling for recurrent nets
559  # TODO: when standard argument type for "nets" is introduced,
560  # this can be more general
561  if op.type == 'RecurrentNetwork':
562  for arg in op.arg:
563  if arg.name == 'backward_step_net':
564  arg.ClearField(str('n'))
565  elif arg.name == 'step_net':
566  for step_op in arg.n.op:
567  rename_list(step_op.input)
568  rename_list(step_op.output)
569  if device is not None:
570  step_op.device_option.device_type = device.device_type
571  step_op.device_option.cuda_gpu_id = device.cuda_gpu_id
572 
573  rename_list(arg.n.external_input)
574  rename_list(arg.n.external_output)
575 
576  # Add additional external inputs
577  external_inputs.update(
578  set(arg.n.external_input).intersection(
579  orig_external_inputs
580  )
581  )
582 
583  if device is not None:
584  op.device_option.device_type = device.device_type
585  op.device_option.cuda_gpu_id = device.cuda_gpu_id
586  validate_op(op)
587  predict_proto.op.extend([op])
588  known_blobs.update(op.output)
589  external_inputs.update(
590  set(op.input).intersection(orig_external_inputs)
591  )
592  external_outputs.update(
593  set(op.output).intersection(orig_external_outputs)
594  )
595 
596  else:
597  logging.debug(
598  "Op {} had unknown inputs: {}".format(
599  op.type, set(op.input).difference(known_blobs)
600  )
601  )
602 
603  # Predictor net's external inputs and outputs include only those
604  # that are part of this net.
605  predict_proto.external_input.extend(external_inputs)
606  predict_proto.external_output.extend(external_outputs)
607 
608  rename_list(predict_proto.external_input)
609  rename_list(predict_proto.external_output)
610 
611  renamed_input_blobs = []
612  for b in input_blobs:
613  if b in renames:
614  renamed_input_blobs.append(renames[b])
615  else:
616  renamed_input_blobs.append(b)
617 
618  for op in predict_proto.op:
619  rename_list(op.input)
620  rename_list(op.output)
621 
622  return predict_net, list(
623  set(predict_proto.external_input) - set(renamed_input_blobs)
624  )
def AddParameter(self, param, tags=None)
def create_param(self, param_name, shape, initializer, tags=None)
def GetOptimizationParamInfo(self, params=None)
def TensorProtosDBInput(self, unused_blob_in, blob_out, batch_size, db, db_type, kwargs)
def GetComputedParams(self, namescope=None)
def GetParams(self, namescope=None, top_scope=False)