Caffe2 - Python API
A deep learning, cross platform ML framework
crf.py
1 ## @package crf
2 # Module caffe2.python.crf
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 from caffe2.python import core, recurrent, model_helper, brew
8 import numpy as np
9 
10 '''
11 Due to a limitation in ReccurentNetworkOp, this layer only supports batch_size=1
12 In order to support batch_size > 1, we will have to implement the CRFUnit
13 and its gradient in C++ and handle the different batches there.
14 '''
15 
16 
17 class CRFWithLoss(object):
18  def __init__(self, model, num_classes, transitions_blob=None):
19  self.model = model
20  self.num_classes = num_classes
21  self.num_classes_padded = num_classes + 2 # After adding BOS and EOS
22  if not transitions_blob:
23  transitions_blob = self.model.param_init_net.UniformFill(
24  [],
25  [core.ScopedBlobReference('crf_transitions')],
26  shape=[self.num_classes_padded, self.num_classes_padded],
27  min=-1.0,
28  max=1.0
29  )
30  self.transitions = transitions_blob
31  self.model.params.append(self.transitions)
32 
33  def crf_loss(self, predictions, labels, seq_lengths=None):
34  # Since the transitions matrix is a shared parameter, need to
35  # take a snapshot of it at the beginning since it can be updated
36  # in between the operators that uses it when doing parallel updates
37  transitions_snapshot = self.model.net.Copy(
38  self.transitions, core.ScopedBlobReference('transitions_snapshot')
39  )
40  # Compute best path unary score from the logits
41  path_unary_score = self._gather_entries_sum(
42  predictions, labels, self.num_classes
43  )
44  # Append BOS and EOS entries to the predictions and labels
45  predictions = self._pad_predictions(predictions)
46  labels = self._pad_labels(labels)
47  # Compute best path binary scores from the transitions matrix
48  path_binary_score = self._path_binary_scores(
49  labels, transitions_snapshot, seq_lengths
50  )
51  path_total_score = self.model.net.Add(
52  [path_binary_score, path_unary_score],
53  core.ScopedBlobReference('path_total')
54  )
55  # Compute all paths score
56  zero_index = self.model.param_init_net.ConstantFill(
57  [], shape=[1], value=0
58  )
59  initial_state = self.model.net.Gather(
60  [predictions, zero_index],
61  core.ScopedBlobReference('rnn_initial'),
62  dense_gradient=True
63  )
64  input_data, _ = self.model.net.RemovePadding(
65  [predictions],
66  padding_width=1,
67  end_padding_width=0,
68  outputs=2,
69  )
70  input_data = self.model.net.ExpandDims(
71  [input_data],
72  core.ScopedBlobReference('rnn_input_data'),
73  dims=[1]
74  )
75  # Due to a bug in RecurrentNetworkGradientOp, we need to copy the
76  # transitions blob before sending it to the recurrent network
77  transitions_copy = self.model.net.Copy(
78  transitions_snapshot, core.ScopedBlobReference('transitions_copy')
79  )
80  all_paths_scores = self._crf_forward(
81  input_data, initial_state, transitions_copy
82  )
83  loss = self.model.net.Sub(
84  [all_paths_scores, path_total_score],
85  core.ScopedBlobReference('crf_loss')
86  )
87  return loss
88 
89  def _pad_predictions(self, predictions):
90  # This function will introduce two labels for beginning of sequence
91  # And end of sequence, it will make the necessary udpates to the
92  # the predictions blob
93 
94  low_score = -1000.0 # An arbitray very low number
95  b_scores = np.array(
96  [[low_score] * self.num_classes + [0, low_score]]
97  ).astype(np.float32)
98 
99  e_scores = np.array(
100  [[low_score] * self.num_classes + [low_score, 0]]
101  ).astype(np.float32)
102 
103  b_scores = self.model.param_init_net.GivenTensorFill(
104  [], "b_scores", shape=[1, self.num_classes_padded], values=b_scores
105  )
106  e_scores = self.model.param_init_net.GivenTensorFill(
107  [], "e_scores", shape=[1, self.num_classes_padded], values=e_scores
108  )
109 
110  zero_index = self.model.net.ConstantFill(
111  [], shape=[1, ], value=0
112  )
113  length = self.model.net.Gather(
114  [self.model.net.Shape([predictions]), zero_index],
115  )
116  length = self.model.net.Cast(length, to='int32')
117  t_range = self.model.net.LengthsRangeFill(length)
118  padding = self.model.net.ConstantFill([t_range], value=low_score)
119  padding = self.model.net.ExpandDims(padding, dims=[1])
120  padded_predictions, _ = self.model.net.Concat(
121  [predictions, padding, padding],
122  outputs=2,
123  axis=1
124  )
125  padded_predictions_concat, _ = self.model.net.Concat(
126  [b_scores, padded_predictions, e_scores],
127  outputs=2,
128  axis=0
129  )
130  return padded_predictions_concat
131 
132  def _pad_labels(self, labels):
133  bos_i = self.num_classes
134  eos_i = self.num_classes + 1
135  bos_i_b = self.model.param_init_net.ConstantFill(
136  [], shape=[1], value=bos_i
137  )
138  eos_i_b = self.model.param_init_net.ConstantFill(
139  [], shape=[1], value=eos_i
140  )
141  labels = self.model.net.Cast([labels], to='int64')
142  padded_labels, _ = self.model.net.Concat(
143  [bos_i_b, labels, eos_i_b],
144  axis=0,
145  outputs=2
146  )
147  return padded_labels
148 
149  def _path_binary_scores(self, labels, transitions, seq_lengths=None):
150  column_ids, _ = self.model.net.RemovePadding(
151  [labels],
152  outputs=2,
153  padding_width=1,
154  end_padding_width=0
155  )
156  row_ids, _ = self.model.net.RemovePadding(
157  [labels],
158  outputs=2,
159  padding_width=0,
160  end_padding_width=1
161  )
162  # Since there is no multi-dimensional gather, I flatten the matrix to
163  # a 1-d vector and transform the ids to (row_ids * num_columns +
164  # column_ids) and do gather in 1-d
165  num_columns_blob = self.model.net.ConstantFill(
166  [row_ids],
167  value=self.num_classes_padded,
168  )
169  flattened_ids = self.model.net.Mul([row_ids, num_columns_blob])
170  flattened_ids = self.model.net.Add([flattened_ids, column_ids])
171  flattened_transitions = self.model.net.FlattenToVec([transitions])
172  entries = self.model.net.Gather(
173  [flattened_transitions, flattened_ids],
174  dense_gradient=True
175  )
176  return self.model.ReduceFrontSum(entries)
177 
178  def _gather_entries_sum(self, in_data, indices, index_size):
179  indices = self.model.net.Cast([indices], to='int64')
180  index_size_blob = self.model.param_init_net.ConstantFill(
181  [],
182  shape=[1],
183  value=index_size,
184  )
185  query_one_hot = self.model.net.OneHot(
186  [indices, index_size_blob]
187  )
188  flattend_query = self.model.net.FlattenToVec(query_one_hot)
189  flattend_data = self.model.net.FlattenToVec(in_data)
190  query_scores = self.model.net.DotProduct(
191  [flattend_query, flattend_data]
192  )
193  final_sum = self.model.net.ReduceFrontSum([query_scores])
194  return final_sum
195 
196  def _crf_forward(
197  self,
198  input_blob,
199  initial_state,
200  transitions_copy,
201  seq_lengths=None
202  ):
203  # Build the RNN net and get the last timestep output
204  out_last = self.build_crf_net(
205  input_blob, initial_state, transitions_copy
206  )
207  out_last, _ = self.model.net.Reshape(
208  [out_last],
209  outputs=2,
210  shape=(self.num_classes_padded,)
211  )
212  zero_segment_id = self.model.param_init_net.ConstantFill(
213  [],
214  value=0,
215  shape=[self.num_classes_padded],
216  dtype=core.DataType.INT32,
217  )
218 
219  # Compute the accumlated total score of all the paths
220  accum_score = self.model.net.SortedSegmentRangeLogSumExp(
221  [out_last, zero_segment_id]
222  )
223  accum_score, _ = self.model.net.Reshape(
224  accum_score,
225  outputs=2,
226  shape=()
227  )
228  return accum_score
229 
230  def build_crf_net(self, input_blob, initial_state, transitions):
231  '''
232  Adds the crf_net recurrent operator to the model.
233 
234  model: model_helper.ModelHelper object new operators would be added
235  to
236 
237  input_blob: the input sequence in a format T x N x D
238  where T is sequence size, N - batch size and D - input dimention
239  ##Only supports batch-size 1##
240 
241  seq_lengths: blob containing sequence lengths (unused)
242  '''
243 
244  scope = 'crf_net'
245 
246  def s(name):
247  ''
248  # We have to manually scope due to our internal/external blob
249  # relationships.
250  return "{}/{}".format(str(scope), str(name))
251 
252  step_model = model_helper.ModelHelper(name='crf_step',
253  param_model=self.model)
254  input_t, cell_t_prev, _ = (
255  step_model.net.AddExternalInputs(
256  core.ScopedBlobReference('input_t'),
257  core.ScopedBlobReference('cell_t_prev'),
258  transitions
259  )
260  )
261  zero_segment_id = step_model.param_init_net.ConstantFill(
262  [],
263  [s('zero_segment_id')],
264  value=0,
265  shape=[self.num_classes_padded],
266  dtype=core.DataType.INT32,
267  )
268 
269  # A hack to bypass model cloning for test
270  step_model.param_init_net.AddExternalOutput(zero_segment_id)
271  """ the CRF step """
272  # Do tile
273  prev_transpose = brew.transpose(
274  step_model,
275  cell_t_prev,
276  [s('prev_transpose')],
277  axes=(0, 2, 1),
278  )
279  prev_tiled = step_model.net.Tile(
280  prev_transpose,
281  [s('prev_tiled')],
282  tiles=self.num_classes_padded,
283  axis=2,
284  )
285  input_t_tiled = step_model.net.Tile(
286  input_t,
287  [s('input_t_tiled')],
288  tiles=self.num_classes_padded,
289  axis=1,
290  )
291  input_with_prev = step_model.net.Add(
292  [prev_tiled, input_t_tiled],
293  [s('input_with_prev')]
294  )
295  all_with_transitions = step_model.net.Add(
296  [input_with_prev, transitions],
297  [s('prev_with_transitions')],
298  broadcast=1,
299  use_grad_hack=1,
300  )
301  all_with_transitions_reshaped, _ = step_model.net.Reshape(
302  all_with_transitions,
303  [s('all_with_transitions_reshaped'), s('all_with_transitions_orig')],
304  shape=(self.num_classes_padded, self.num_classes_padded)
305  )
306  cell_t = step_model.net.SortedSegmentRangeLogSumExp(
307  [all_with_transitions_reshaped, zero_segment_id],
308  [s('cell_t')],
309  )
310  step_model.net.AddExternalOutputs(cell_t)
311  """ recurrent network """
312  cell_input_blob = initial_state
313  out_all, out_last = recurrent.recurrent_net(
314  net=self.model.net,
315  cell_net=step_model.net,
316  inputs=[(input_t, input_blob)],
317  initial_cell_inputs=[
318  (cell_t_prev, cell_input_blob),
319  ],
320  links={
321  cell_t_prev: cell_t,
322  },
323  scope=scope,
324  outputs_with_grads=(1,)
325  )
326  return out_last
327 
328  def update_predictions(self, classes):
329 
330  def crf_update_predictions_op(inputs, outputs):
331  # This operator will compute the best path of classes by performing
332  # Viterbi decoding and then updates the predictions to make the tag
333  # On the best path has the highest score among the others
334  predictions = inputs[0].data
335  transitions = inputs[1].data
336  predictions = inputs[0].data
337  predictions_shape = inputs[0].shape
338  outputs[0].reshape(predictions_shape)
339 
340  trellis = np.zeros(predictions_shape)
341  backpointers = np.zeros(predictions_shape, dtype=np.int32)
342  trellis[0] = predictions[0]
343 
344  for t in range(1, predictions_shape[0]):
345  v = np.expand_dims(trellis[t - 1], 1) + transitions
346  trellis[t] = predictions[t] + np.max(v, 0)
347  backpointers[t] = np.argmax(v, 0)
348 
349  viterbi = [np.argmax(trellis[-1])]
350  for bp in reversed(backpointers[1:]):
351  viterbi.append(bp[viterbi[-1]])
352  viterbi.reverse()
353 
354  new_predictions = np.zeros(predictions_shape)
355  old_bests = []
356  for i, w_predictions in enumerate(predictions):
357  # Get the current tag with the maximum score
358  new_predictions[i] = predictions[i]
359  old_best = np.argmax(w_predictions)
360  old_bests.append(old_best)
361  # Swap the scores of the current best tag and the tag on the
362  # Viterbi path
363  w_predictions[viterbi[i]], w_predictions[old_best] = \
364  w_predictions[old_best], w_predictions[viterbi[i]]
365  new_predictions[i] = w_predictions
366  # Remove the BOS and EOS entries from the predictions matrix
367  orig_predictions = new_predictions[1:-1, 0:-2]
368  outputs[0].reshape(orig_predictions.shape)
369  outputs[0].data[...] = orig_predictions
370  padded_classes = self._pad_predictions(classes)
371  new_classes = self.model.net.Python(crf_update_predictions_op)(
372  [padded_classes, self.transitions],
373  core.ScopedBlobReference('post_crf_classes')
374  )
375  return new_classes
def build_crf_net(self, input_blob, initial_state, transitions)
Definition: crf.py:230
def _pad_predictions(self, predictions)
Definition: crf.py:89
def _path_binary_scores(self, labels, transitions, seq_lengths=None)
Definition: crf.py:149
def _crf_forward(self, input_blob, initial_state, transitions_copy, seq_lengths=None)
Definition: crf.py:202
def _gather_entries_sum(self, in_data, indices, index_size)
Definition: crf.py:178
def _pad_labels(self, labels)
Definition: crf.py:132