Caffe2 - Python API
A deep learning, cross platform ML framework
layer_model_helper.py
1 ## @package layer_model_helper
2 # Module caffe2.python.layer_model_helper
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 from caffe2.python import core, model_helper, schema, scope, utils
10  ParameterInfo,
11 )
13  parameter_sharing_context,
14 )
15 from caffe2.python.modeling.net_modifier import NetModifier
16 
17 from caffe2.python.optimizer import get_param_device
18 from caffe2.python.regularizer import Regularizer
19 from caffe2.python.layers import layers
20 from caffe2.proto import caffe2_pb2
21 from future.utils import viewitems, viewvalues
22 
23 import logging
24 import numpy as np
25 import six
26 import copy
27 logger = logging.getLogger(__name__)
28 
29 
31  """
32  Model helper for building models on top of layers abstractions.
33 
34  Each layer is the abstraction that is higher level than Operator. Layer
35  is responsible for ownership of it's own parameters and can easily be
36  instantiated in multiple nets possible with different sets of ops.
37  As an example: one can easily instantiate predict and train nets from
38  the same set of layers, where predict net will have subset of the
39  operators from train net.
40  """
41 
42  def __init__(self, name, input_feature_schema, trainer_extra_schema,
43  keep_blobs=False):
44  ''' TODO(amalevich): more documnetation on input args
45  '''
46 
47  super(LayerModelHelper, self).__init__(name=name)
48  self._layer_names = set()
49  self._layers = []
50  self._param_to_shape = {}
51 
52  # seed default
53  self._seed = None
54  self._sequence_seed = True
55 
56  # optimizer bookkeeping
57  self.param_to_optim = {}
58  self.param_to_reg = {}
59 
60  self._default_optimizer = None
61  self._loss = None
62  self._output_schema = None
63 
64  self._post_grad_net_modifiers = []
65  self._final_net_modifiers = []
66 
67  # breakdown map; breakdown features are categorical (like dense) but not
68  # necessarily used to represent data for training
69  self._breakdown_map = None
70 
71  # Connect Schema to self.net. That particular instance of schmea will be
72  # use for generation of the Layers accross the network and would be used
73  # for connection with Readers.
74  self._input_feature_schema = schema.NewRecord(
75  self.net,
76  input_feature_schema
77  ) if not keep_blobs else input_feature_schema.clone()
78  self._trainer_extra_schema = schema.NewRecord(
79  self.net,
80  trainer_extra_schema
81  ) if not keep_blobs else trainer_extra_schema.clone()
83 
84  self._preproc_output_schema = None
85 
87  self.param_init_net = self.create_init_net('param_init_net')
88  self._initialize_params = True
89 
90  def clear_output_schema(self):
91  self._output_schema = None
92 
93  def set_initialize_params(self, initialize_params):
94  self._initialize_params = initialize_params
95 
96  def add_metric_field(self, name, value):
97  assert name not in self._metrics_schema.fields, (
98  "Try to add metric field twice: {}".format(name))
100  (name, value)
101  )
102 
103  @staticmethod
104  def _get_global_constant_initializer_op(
105  blob_name, array=None, dtype=None, initializer=None
106  ):
107  # to add a global constant to model, one first need to get the
108  # initializer
109  if array is not None:
110  assert initializer is None,\
111  "Only one from array and initializer should be specified"
112  if dtype is None:
113  array = np.array(array)
114  else:
115  array = np.array(array, dtype=dtype)
116 
117  # TODO: make GivenTensor generic
118  op_name = None
119  if array.dtype == np.int32:
120  op_name = 'GivenTensorIntFill'
121  elif array.dtype == np.int64:
122  op_name = 'GivenTensorInt64Fill'
123  elif array.dtype == np.str:
124  op_name = 'GivenTensorStringFill'
125  elif array.dtype == np.bool:
126  op_name = 'GivenTensorBoolFill'
127  else:
128  op_name = 'GivenTensorFill'
129 
130  def initializer(blob_name):
131  return core.CreateOperator(
132  op_name, [],
133  blob_name,
134  shape=array.shape,
135  values=array.flatten().tolist()
136  )
137  else:
138  assert initializer is not None
139  initializer_op = initializer(blob_name)
140  return initializer_op
141 
142  def add_global_constant(
143  self, name, array=None, dtype=None, initializer=None
144  ):
145  assert isinstance(name, six.string_types), (
146  'name should be a string as we are using it as map key')
147  # This is global namescope for constants. They will be created in all
148  # init_nets and there should be very few of them.
149  assert name not in self.global_constants, \
150  "%s already added in global_constants" % name
151  blob_name = self.net.NextBlob(name)
152  self.global_constants[name] = blob_name
153  initializer_op = LayerModelHelper._get_global_constant_initializer_op(
154  blob_name, array, dtype, initializer
155  )
156  assert blob_name not in self.global_constant_initializers, \
157  "there is already a initializer op associated with blob %s" % \
158  blob_name
159  self.global_constant_initializers[blob_name] = initializer_op
160  return blob_name
161 
162  def maybe_add_global_constant(self, name, *args, **kwargs):
163  # To ad hoc add new global constants without duplication
164  # if the name was already registered in global_constants, it will not be
165  # added even if the intended value is different from its original value
166 
167  if name in self.global_constants:
168  blob_name = self.global_constants[name]
169  initializer_op = \
170  LayerModelHelper._get_global_constant_initializer_op(
171  blob_name, *args, **kwargs
172  )
173  # check if the original initializer is the same as the one intended
174  # now
175  assert utils.OpAlmostEqual(
176  initializer_op,
177  self.global_constant_initializers[blob_name],
178  'debug_info'
179  ), \
180  "conflict initializers for global constant %s, " \
181  "previous %s, now %s" % (
182  blob_name, str(initializer_op),
183  str(self.global_constant_initializers[blob_name]))
184  return blob_name
185  return self.add_global_constant(name, *args, **kwargs)
186 
187  def _init_global_constants(self):
188  self.global_constants = {}
190  self.add_global_constant('ONE', 1.0)
191  self.add_global_constant('ZERO', 0.0)
192  self.add_global_constant('ZERO_RANGE', [0, 0], dtype='int32')
193 
194  def _add_global_constants(self, init_net):
195  for initializer_op in viewvalues(self.global_constant_initializers):
196  init_net._net.op.extend([initializer_op])
197 
198  def create_init_net(self, name):
199  init_net = core.Net(name)
200  self._add_global_constants(init_net)
201  return init_net
202 
203  def _validate_param_shape(self, param_name, shape):
204  if param_name not in self._param_to_shape:
205  return
206 
207  ref_shape = self._param_to_shape[param_name]
208 
209  if shape != ref_shape:
210  raise ValueError(
211  "Got inconsistent shapes between shared parameters "
212  "when trying to map a blob in scope {0} to {1}. ref_shape : "
213  " {2}, shape : {3}".format(
214  scope.CurrentNameScope(), param_name, ref_shape, shape)
215  )
216 
217  def create_param(self, param_name, shape, initializer, optimizer=None,
218  ps_param=None, regularizer=None):
219  if isinstance(param_name, core.BlobReference):
220  param_name = str(param_name)
221  elif isinstance(param_name, six.string_types):
222  # Parameter name will be equal to current Namescope that got
223  # resolved with the respect of parameter sharing of the scopes.
224  param_name = parameter_sharing_context.get_parameter_name(
225  param_name)
226  else:
227  raise "Unsupported type for param_name"
228 
229  param_blob = core.BlobReference(param_name)
230 
231  if len(initializer) == 1:
232  init_op_args = {}
233  else:
234  assert len(initializer) == 2
235  init_op_args = copy.deepcopy(initializer[1])
236  if shape is not None:
237  assert 'shape' not in init_op_args
238  init_op_args.update({'shape': shape})
239 
240  initializer_op = None
241  if self._initialize_params:
242  initializer_op = core.CreateOperator(
243  initializer[0],
244  [],
245  param_blob,
246  **init_op_args
247  )
248 
249  param = layers.LayerParameter(
250  parameter=param_blob,
251  initializer=initializer_op,
252  optimizer=optimizer,
253  ps_param=ps_param,
254  regularizer=regularizer
255  )
256 
257  self._validate_param_shape(param_name, shape)
258 
259  self._param_to_shape[param_name] = shape
260 
261  return param
262 
263  def next_layer_name(self, prefix):
264  base_name = core.ScopedName(prefix)
265  name = base_name
266  index = 0
267  while name in self._layer_names:
268  name = base_name + '_auto_' + str(index)
269  index += 1
270 
271  self._layer_names.add(name)
272  return name
273 
274  def add_layer(self, layer):
275  self._layers.append(layer)
276  for param in layer.get_parameters():
277  assert isinstance(param.parameter, core.BlobReference)
278 
279  self.param_to_optim[str(param.parameter)] = \
280  param.optimizer or self.default_optimizer
281 
282  self.params.append(param.parameter)
283  if isinstance(param, layers.LayerParameter):
284  self.param_to_reg[param.parameter] = param.regularizer
285  elif isinstance(param, ParameterInfo):
286  # TODO:
287  # Currently, LSTM and RNNcells, which use ModelHelper instead of
288  # LayerModelHelper as super class, are called in pooling_methods
289  # In ModelHelper, regularization is not supported in create_param
290  # We will unify the way of create_param of ModelHelper and
291  # LayerModelHelper in the future.
292  logger.info('regularization is unsupported for ParameterInfo object')
293  else:
294  raise ValueError(
295  'unknown object type besides ParameterInfo and LayerParameter: {}'
296  .format(param)
297  )
298 
299  # The primary value of adding everything to self.net - generation of the
300  # operators right away, i.e. if error happens it'll be detected
301  # immediately. Other than this - create_x_net should be called.
302  layer.add_operators(self.net, self.param_init_net)
303  return layer.output_schema
304 
305  def get_parameter_blobs(self):
306  param_blobs = []
307  for layer in self._layers:
308  for param in layer.get_parameters():
309  param_blobs.append(param.parameter)
310 
311  return param_blobs
312 
313  def add_post_grad_net_modifiers(self, modifier):
314  assert modifier not in self._post_grad_net_modifiers,\
315  "{0} is already in {1}".format(modifier, self._post_grad_net_modifiers)
316  assert isinstance(modifier, NetModifier),\
317  "{} has to be a NetModifier instance".format(modifier)
318  self._post_grad_net_modifiers.append(modifier)
319 
320  def add_final_net_modifiers(self, modifier):
321  assert modifier not in self._final_net_modifiers,\
322  "{0} is already in {1}".format(modifier, self._final_net_modifiers)
323  assert isinstance(modifier, NetModifier),\
324  "{} has to be a NetModifier instance".format(modifier)
325  self._final_net_modifiers.append(modifier)
326 
327  @property
328  def seed(self):
329  return self._seed
330 
331  @property
332  def sequence_seed(self):
333  return self._sequence_seed
334 
335  def store_seed(self, seed, sequence_seed=True):
336  # Store seed config that will be applied to each op in the net.
337  self._seed = seed
338  # If sequence_seed is True, the i-th op has rand_seed=`seed + i`
339  self._sequence_seed = sequence_seed
340 
341  def apply_seed(self, net):
342  if self._seed:
343  net.set_rand_seed(self._seed, self._sequence_seed)
344 
345  @property
346  def default_optimizer(self):
347  return self._default_optimizer
348 
349  @default_optimizer.setter
350  def default_optimizer(self, optimizer):
351  self._default_optimizer = optimizer
352 
353  @property
354  def input_feature_schema(self):
355  return self._input_feature_schema
356 
357  @property
358  def trainer_extra_schema(self):
359  return self._trainer_extra_schema
360 
361  @property
362  def metrics_schema(self):
363  """
364  Returns the schema that represents model output that should be used for
365  metric reporting.
366 
367  During the training/evaluation this schema will be appended to the
368  schema that represents model output.
369  """
370  return self._metrics_schema
371 
372  @property
373  def output_schema(self):
374  assert self._output_schema is not None
375  return self._output_schema
376 
377  @output_schema.setter
378  def output_schema(self, schema):
379  assert self._output_schema is None
380  self._output_schema = schema
381 
382  @property
383  def preproc_output_schema(self):
384  assert self._preproc_output_schema is not None
385  return self._preproc_output_schema
386 
387  @preproc_output_schema.setter
388  def preproc_output_schema(self, schema):
389  assert self._preproc_output_schema is None
390  self._preproc_output_schema = schema
391 
392  @property
393  def loss(self):
394  assert self._loss is not None
395  return self._loss
396 
397  @loss.setter
398  def loss(self, loss):
399  assert self._loss is None
400  self._loss = loss
401 
402  def has_loss(self):
403  return self._loss is not None
404 
405  def add_loss(self, loss, name='unnamed'):
406  assert loss is not None, "Added loss should not be None"
407  assert isinstance(loss, schema.Scalar) or isinstance(
408  loss, schema.Struct
409  ), "Added loss should be a scalar or a struct"
410  if self._loss is None:
411  self._loss = schema.Struct((name, loss))
412  else:
413  prefix_base = name + '_auto_'
414  index = 0
415  prefix = name
416  while prefix in self._loss:
417  prefix = prefix_base + str(index)
418  index += 1
419  loss_struct = schema.Struct((prefix, loss))
420  self._loss = self._loss + loss_struct
421 
422  def add_output_schema(self, name, value):
423  assert value is not None, \
424  'Added output schema {} should not be None'.format(name)
425  assert isinstance(value, schema.Scalar) or \
426  isinstance(value, schema.Struct), \
427  'Added output schema {} should be a scalar or a struct.\n\
428  Now it is {}.'.format(name, type(value))
429  if self._output_schema is None: # be the first field
430  self._output_schema = schema.Struct((name, value))
431  else: # merge with other fields
432  assert name not in self._output_schema.fields, \
433  'Output Schema Field {} already exists'.format(name)
434  self._output_schema = \
435  self._output_schema + schema.Struct((name, value))
436 
437  def add_trainer_extra_schema(self, trainer_extra_schema):
438  trainer_extra_record = schema.NewRecord(self.net, trainer_extra_schema)
439  self._trainer_extra_schema += trainer_extra_record
440 
441  def __getattr__(self, layer):
442  def is_functional_layer(layer):
443  if core.IsOperator(layer):
444  return True
445  elif layer.startswith('FunctionalLayer'):
446  return True
447  else:
448  return False
449 
450  def resolve_functional_layer(layer):
451  if core.IsOperator(layer):
452  return layer
453  elif layer.startswith('FunctionalLayer'):
454  return layer[len('FunctionalLayer'):]
455  else:
456  raise ValueError(
457  '%s cannot be resolved as functional layer' % layer
458  )
459 
460  if layer.startswith('__'):
461  raise AttributeError(layer)
462 
463  # TODO(amalevich): Add add support for ifbpy inline documentation
464  if layers.layer_exists(layer):
465  def wrapper(*args, **kwargs):
466  new_layer = layers.create_layer(layer, self, *args, **kwargs)
467  if kwargs.get("output_to_metrics", False):
468  new_layer.export_output_for_metrics()
469  if kwargs.get("params_to_metrics", False):
470  new_layer.export_params_for_metrics()
471  return self.add_layer(new_layer)
472  return wrapper
473  elif is_functional_layer(layer):
474  # TODO(xlwang): Desginated layer shadows the usage of an op as a
475  # single layer. To enforce using an op (e.g. Split) as functional
476  # layer, one can call 'model.FunctionalLayerSplit'
477  layer = resolve_functional_layer(layer)
478  def wrapper(*args, **kwargs):
479  def apply_operator(net, in_record, out_record, **kwargs):
480  # TODO(amalevich): Switch to net.operator as soon as it gets
481  # landed
482  net.__getattr__(layer)(in_record.field_blobs(),
483  out_record.field_blobs(),
484  **kwargs)
485 
486  if 'name' not in kwargs:
487  kwargs['name'] = layer
488 
489  new_layer = layers.create_layer(
490  'Functional',
491  self, *args, function=apply_operator,
492  **kwargs
493  )
494 
495  if kwargs.get("output_to_metrics", False):
496  new_layer.export_output_for_metrics()
497  if kwargs.get("params_to_metrics", False):
498  new_layer.export_params_for_metrics()
499 
500  return self.add_layer(new_layer)
501  return wrapper
502  else:
503  raise ValueError(
504  "Trying to create non-registered layer: {}".format(layer))
505 
506  @property
507  def layers(self):
508  return self._layers
509 
510  def apply_regularizers_on_loss(
511  self,
512  train_net,
513  train_init_net,
514  blob_to_device=None,
515  ):
516  for param, regularizer in viewitems(self.param_to_reg):
517  if regularizer is None or regularizer.apply_after_optimizer:
518  continue
519  assert isinstance(regularizer, Regularizer)
520  added_loss_blob = regularizer(train_net, train_init_net, param)
521  self.add_loss(
522  schema.Scalar(blob=added_loss_blob),
523  str(added_loss_blob)
524  )
525 
526  def apply_regularizers_after_optimizer(
527  self,
528  train_net,
529  train_init_net,
530  grad_map,
531  blob_to_device=None,
532  ):
533  for param, regularizer in viewitems(self.param_to_reg):
534  if regularizer is None or not regularizer.apply_after_optimizer:
535  continue
536  assert isinstance(regularizer, Regularizer)
537  regularizer(
538  train_net, train_init_net, param, grad_map.get(str(param)))
539 
540  def apply_post_grad_net_modifiers(
541  self,
542  trainer_net,
543  trainer_init_net,
544  grad_map,
545  blob_to_device=None,
546  ):
547  param_grad_map = {param: grad_map[param]
548  for param in self.param_to_optim.keys() if param in grad_map}
549 
550  for modifier in self._post_grad_net_modifiers:
551  modifier(trainer_net, trainer_init_net, param_grad_map,
552  blob_to_device=blob_to_device)
553 
554  def apply_final_net_modifiers(
555  self,
556  trainer_net,
557  trainer_init_net,
558  grad_map,
559  blob_to_device=None,
560  ):
561  for modifier in self._final_net_modifiers:
562  modifier(trainer_net, trainer_init_net, grad_map,
563  blob_to_device=blob_to_device)
564 
565  def apply_optimizers(
566  self,
567  train_net,
568  train_init_net,
569  grad_map,
570  blob_to_device=None,
571  ):
572  CPU = core.DeviceOption(caffe2_pb2.CPU)
573  # if given, blob_to_device is a map from blob to device_option
574  blob_to_device = blob_to_device or {}
575  for param, optimizer in viewitems(self.param_to_optim):
576  assert optimizer is not None, \
577  "default optimizer must have been set in add_layer"
578  # note that not all params has gradient and thus we sent None if
579  # gradient does not exists
580  device = get_param_device(
581  param,
582  grad_map.get(str(param)),
583  param_to_device=blob_to_device,
584  default_device=CPU,
585  )
586  with core.DeviceScope(device):
587  optimizer(
588  train_net, train_init_net, param, grad_map.get(str(param)))
589 
590  def _GetOne(self):
591  return self.global_constants['ONE']
592 
593  # An optimizer which allows us to do NO optimization
594  def NoOptim(self, *args, **kwargs):
595  pass
596 
597  @property
598  def breakdown_map(self):
599  return self._breakdown_map
600 
601  @breakdown_map.setter
602  def breakdown_map(self, breakdown_map):
603  # TODO(xlwang): provide more rich feature information in breakdown_map;
604  # and change the assertion accordingly
605  assert isinstance(breakdown_map, dict)
606  assert all(isinstance(k, six.string_types) for k in breakdown_map)
607  assert sorted(list(breakdown_map.values())) == range(len(breakdown_map))
608  self._breakdown_map = breakdown_map
Module caffe2.python.optimizer.
def add_global_constant(self, name, array=None, dtype=None, initializer=None)
Module caffe2.python.layers.layers.
def __init__(self, name, input_feature_schema, trainer_extra_schema, keep_blobs=False)