Caffe2 - Python API
A deep learning, cross platform ML framework
homotopy_weight.py
1 # @package homotopy_weight
2 # Module caffe2.fb.python.layers.homotopy_weight
3 
4 from __future__ import absolute_import
5 from __future__ import division
6 from __future__ import print_function
7 from __future__ import unicode_literals
8 
9 from caffe2.python import core, schema
10 from caffe2.python.layers.layers import ModelLayer
11 import numpy as np
12 import logging
13 logger = logging.getLogger(__name__)
14 '''
15 Homotopy Weighting between two weights x, y by doing: alpha x + (1-alpha) y
16 where x is a decreasing scalar parameter ranging from [min, max] (default, [0,
17 1]);
18 
19 Homotopy methods first solves an "easy" problem (one to which the solution is
20 well known), and is gradually transformed into the target problem
21 '''
22 
23 
25  def __init__(
26  self,
27  model,
28  input_record,
29  name='homotopy_weight',
30  min_weight=0.,
31  max_weight=1.,
32  half_life=1e6,
33  quad_life=3e6,
34  atomic_iter=None,
35  **kwargs
36  ):
37  super(HomotopyWeight,
38  self).__init__(model, name, input_record, **kwargs)
40  np.float32, self.get_next_blob_reference('homotopy_weight')
41  )
42  data = self.input_record.field_blobs()
43  assert len(data) == 2
44  self.x = data[0]
45  self.y = data[1]
46  # TODO: currently model building does not have access to iter counter or
47  # learning rate; it's added at optimization time;
48  self.use_external_iter = (atomic_iter is not None)
49  self.atomic_iter = (
50  atomic_iter if self.use_external_iter else self.create_atomic_iter()
51  )
52  # to map lr to [min, max]; alpha = scale * lr + offset
53  assert max_weight > min_weight
54  self.scale = float(max_weight - min_weight)
55  self.offset = self.model.add_global_constant(
56  '%s_offset_1dfloat' % self.name, float(min_weight)
57  )
58  self.gamma, self.power = self.solve_inv_lr_params(half_life, quad_life)
59 
60  def solve_inv_lr_params(self, half_life, quad_life):
61  # ensure that the gamma, power is solvable
62  assert half_life > 0
63  # convex monotonically decreasing
64  assert quad_life > 2 * half_life
65  t = float(quad_life) / float(half_life)
66  x = t * (1.0 + np.sqrt(2.0)) / 2.0 - np.sqrt(2.0)
67  gamma = (x - 1.0) / float(half_life)
68  power = np.log(2.0) / np.log(x)
69  logger.info(
70  'homotopy_weighting: found lr param: gamma=%g, power=%g' %
71  (gamma, power)
72  )
73  return gamma, power
74 
75  def create_atomic_iter(self):
76  self.mutex = self.create_param(
77  param_name=('%s_mutex' % self.name),
78  shape=None,
79  initializer=('CreateMutex', ),
80  optimizer=self.model.NoOptim,
81  )
82  self.atomic_iter = self.create_param(
83  param_name=('%s_atomic_iter' % self.name),
84  shape=[1],
85  initializer=(
86  'ConstantFill', {
87  'value': 0,
88  'dtype': core.DataType.INT64
89  }
90  ),
91  optimizer=self.model.NoOptim,
92  )
93  return self.atomic_iter
94 
95  def update_weight(self, net):
96  alpha = net.NextScopedBlob('alpha')
97  beta = net.NextScopedBlob('beta')
98  lr = net.NextScopedBlob('lr')
99  comp_lr = net.NextScopedBlob('complementary_lr')
100  scaled_lr = net.NextScopedBlob('scaled_lr')
101  scaled_comp_lr = net.NextScopedBlob('scaled_complementary_lr')
102  if not self.use_external_iter:
103  net.AtomicIter([self.mutex, self.atomic_iter], [self.atomic_iter])
104  net.LearningRate(
105  [self.atomic_iter],
106  [lr],
107  policy='inv',
108  gamma=self.gamma,
109  power=self.power,
110  base_lr=1.0,
111  )
112  net.Sub([self.model.global_constants['ONE'], lr], [comp_lr])
113  net.Scale([lr], [scaled_lr], scale=self.scale)
114  net.Scale([comp_lr], [scaled_comp_lr], scale=self.scale)
115  net.Add([lr, self.offset], [alpha])
116  net.Add([comp_lr, self.offset], beta)
117  return alpha, beta
118 
119  def add_ops(self, net):
120  alpha, beta = self.update_weight(net)
121  # alpha x + beta y
122  net.WeightedSum([self.x, alpha, self.y, beta], self.output_schema())
def get_next_blob_reference(self, name)
Definition: layers.py:346
def solve_inv_lr_params(self, half_life, quad_life)
def create_param(self, param_name, shape, initializer, optimizer, ps_param=None, regularizer=None)
Definition: layers.py:331