Caffe2 - C++ API
A deep learning, cross platform ML framework
fused_rowwise_8bit_conversion_ops.h
1 #ifndef CAFFE2_OPERATORS_FUSED_ROWWISE_8BIT_CONVERSION_OPS_H_
2 #define CAFFE2_OPERATORS_FUSED_ROWWISE_8BIT_CONVERSION_OPS_H_
3 
4 #include "caffe2/core/context.h"
5 #include "caffe2/core/logging.h"
6 #include "caffe2/core/operator.h"
7 #include "caffe2/operators/reducer_functors.h"
8 #include "caffe2/utils/math.h"
9 
10 namespace caffe2 {
11 
12 #define IS_LITTLE_ENDIAN \
13  [] { \
14  const int32_t kValue = 1; \
15  return reinterpret_cast<const uint8_t*>(&kValue)[0] == 1; \
16  }()
17 
18 template <class Context>
20  public:
21  static constexpr float kEqualityThreshold = 1e-7f;
22  static constexpr float kEpsilon = 1e-8f;
23 
24  USE_OPERATOR_CONTEXT_FUNCTIONS;
25  USE_SIMPLE_CTOR_DTOR(FloatToFused8BitRowwiseQuantizedOp)
26 
27  bool RunOnDevice() override {
28  CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
29 
30  const auto& input = Input(DATA_FLOAT);
31  auto* output = Output(DATA_FUSED_SCALE_BIAS_INT8);
32 
33  const auto input_rows = input.dim(0);
34  const auto input_columns = input.dim(1);
35  CAFFE_ENFORCE_EQ(input.ndim(), 2, "Expect input to be a matrix");
36 
37  // The "fused" representation stores the scale and bias with the row-wise
38  // quantized data in one tensor. Since we quantize with 8 bits (1 byte) and
39  // represent the scale and bias with 32-bit floats, we'll use the last 8
40  // bytes of each row for scale (4 bytes) and bias (4 bytes).
41  // | ... int8 data ... | scale | bias |
42  // | number_of_columns | 4B | 4B |
43  const std::vector<TIndex> output_dimensions = {input_rows,
44  input_columns + 8};
45  output->Resize(output_dimensions);
46 
47  const auto* input_data = input.template data<float>();
48  auto* output_data = output->template mutable_data<uint8_t>();
49  const auto output_columns = output->dim(1);
50 
51  for (size_t row = 0; row < input_rows; ++row) {
52  ConstEigenVectorArrayMap<float> input_row(
53  input_data + row * input_columns, input_columns);
54 
55  uint8_t* output_row = output_data + row * output_columns;
56  EigenVectorArrayMap<uint8_t> output_row_values(output_row, input_columns);
57  EigenVectorArrayMap<float> output_row_scale_bias(
58  reinterpret_cast<float*>(output_row + input_columns), 2);
59 
60  const float minimum_element = input_row.minCoeff();
61  const float maximum_element = input_row.maxCoeff();
62  const float range = maximum_element - minimum_element;
63 
64  output_row_scale_bias(0) = range / 255.0f;
65  output_row_scale_bias(1) = minimum_element;
66  const auto inverse_scale = 255.0f / (range + kEpsilon);
67  output_row_values = ((input_row - minimum_element) * inverse_scale)
68  .round()
69  .cast<uint8_t>();
70  }
71 
72  return true;
73  }
74 
75  private:
76  INPUT_TAGS(DATA_FLOAT);
77  OUTPUT_TAGS(DATA_FUSED_SCALE_BIAS_INT8);
78 };
79 
80 template <class Context>
82  public:
83  USE_OPERATOR_CONTEXT_FUNCTIONS;
84  USE_SIMPLE_CTOR_DTOR(Fused8BitRowwiseQuantizedToFloatOp)
85 
86  bool RunOnDevice() override {
87  CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
88 
89  const auto& input = Input(DATA_FUSED_SCALE_BIAS_INT8);
90  auto* output = Output(DATA_FLOAT);
91 
92  const auto input_rows = input.dim(0);
93  const auto input_columns = input.dim(1);
94  CAFFE_ENFORCE_EQ(input.ndim(), 2, "Expect input to be a matrix");
95 
96  // The last 8 bytes per row are the scale and the bias. The rest of
97  // input_columns is the number of values in the original row.
98  const std::vector<TIndex> output_dimensions = {input_rows,
99  input_columns - 8};
100  output->Resize(output_dimensions);
101  const auto output_columns = output->dim(1);
102 
103  const auto* input_data = input.template data<uint8_t>();
104  auto* output_data = output->template mutable_data<float>();
105 
106  for (size_t row = 0; row < input_rows; ++row) {
107  const uint8_t* input_row = input_data + row * input_columns;
108  ConstEigenVectorArrayMap<uint8_t> input_row_values(
109  input_row, output_columns);
110  ConstEigenVectorArrayMap<float> input_row_scale_bias(
111  reinterpret_cast<const float*>(input_row + output_columns), 2);
112 
113  EigenVectorArrayMap<float> output_row(
114  output_data + row * output_columns, output_columns);
115 
116  output_row = input_row_values.cast<float>() * input_row_scale_bias(0) +
117  input_row_scale_bias(1);
118  }
119  return true;
120  }
121 
122  private:
123  INPUT_TAGS(DATA_FUSED_SCALE_BIAS_INT8);
124  OUTPUT_TAGS(DATA_FLOAT);
125 };
126 
127 #undef IS_LITTLE_ENDIAN
128 
129 } // namespace caffe2
130 
131 #endif // CAFFE2_OPERATORS_FUSED_ROWWISE_8BIT_CONVERSION_OPS_H_
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...