2 #ifndef CAFFE2_OPERATORS_LENGTHS_REDUCER_ROWWISE_8bits_OP_H_ 3 #define CAFFE2_OPERATORS_LENGTHS_REDUCER_ROWWISE_8bits_OP_H_ 6 #include "caffe2/core/context.h" 7 #include "caffe2/core/logging.h" 8 #include "caffe2/core/operator.h" 9 #include "caffe2/operators/reducer_functors.h" 10 #include "caffe2/perfkernels/embedding_lookup.h" 11 #include "caffe2/utils/math.h" 16 const float kEqualityThreshold = 1e-10f;
23 class OutDataT =
float>
26 USE_OPERATOR_CONTEXT_FUNCTIONS;
29 bool RunOnDevice()
override {
31 this, Input(INDICES));
34 template <
typename IndexType>
35 bool DoRunWithType() {
36 auto& dataInput = Input(DATA);
37 auto& lengthsInput = Input(LENGTHS);
38 auto* output = Output(0);
39 auto* scale_bias = Input(SCALE_BIAS).template data<float>();
40 CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(),
"LENGTHS must be a vector");
41 const TIndex outputSize = lengthsInput.dim(0);
43 auto& indicesInput = Input(INDICES);
45 2, Input(SCALE_BIAS).ndim(),
"scale_bias has to be matrix");
48 Input(SCALE_BIAS).dim(0),
49 "scale_bias must have the same first dim as data");
52 Input(SCALE_BIAS).dim(1),
53 "the second dim of scale_bias has to be equal to 2");
54 CAFFE_ENFORCE_EQ(1, indicesInput.ndim(),
"INDICES must be a vector");
55 const IndexType* indices = indicesInput.template data<IndexType>();
56 TIndex dataToReduceSize = indicesInput.dim(0);
58 const int* lengths = lengthsInput.template data<int>();
59 vector<TIndex> shape = dataInput.dims();
60 shape[0] = outputSize;
61 output->Resize(shape);
62 const float* w =
nullptr;
64 w = Input(WEIGHTS).template data<float>();
66 TIndex in_block_size = dataInput.size_from_dim(1);
67 OutDataT* out = output->template mutable_data<OutDataT>();
68 const uint8_t* input_data = dataInput.template data<uint8_t>();
71 const TIndex indices_size = indicesInput.size();
72 const TIndex N = dataInput.dim(0);
92 INDICES = 1 + USE_WEIGHTS,
93 LENGTHS = 2 + USE_WEIGHTS,
94 SCALE_BIAS = 3 + USE_WEIGHTS
98 template <
class Context>
101 USE_OPERATOR_CONTEXT_FUNCTIONS;
103 bool RunOnDevice()
override {
104 auto& input = Input(DATA_FLOAT);
105 auto* output = Output(DATA_UINT8);
106 auto* scale_bias = Output(SCALE_BIAS);
107 auto* input_data = input.template data<float>();
108 output->ResizeLike(input);
109 vector<TIndex> scale_bias_dims = {input.dim(0), 2};
110 scale_bias->Resize(scale_bias_dims);
111 auto* output_data = output->template mutable_data<uint8_t>();
112 float* scale_bias_data = scale_bias->template mutable_data<float>();
113 size_t n_blocks = input.dim(0);
114 size_t block_size = input.size_from_dim(1);
115 for (
size_t i = 0; i < n_blocks; ++i) {
116 ConstEigenVectorArrayMap<float> input_row(
117 input_data + i * block_size, block_size);
118 EigenVectorArrayMap<uint8_t> output_row(
119 output_data + i * block_size, block_size);
120 auto min_element = input_row.minCoeff();
121 auto max_element = input_row.maxCoeff();
122 if (max_element - min_element < kEqualityThreshold) {
123 scale_bias_data[2 * i] = 1.0f;
124 scale_bias_data[2 * i + 1] = min_element;
125 memset(output_data + i * block_size, 0, block_size);
127 scale_bias_data[2 * i] = (max_element - min_element) / 255.0f;
128 scale_bias_data[2 * i + 1] = min_element;
129 const float inv_scale = 1.0f / scale_bias_data[2 * i];
130 output_row = ((input_row - scale_bias_data[2 * i + 1]) * inv_scale)
132 .template cast<uint8_t>();
139 INPUT_TAGS(DATA_FLOAT);
140 OUTPUT_TAGS(DATA_UINT8, SCALE_BIAS);
143 template <
class Context>
146 USE_OPERATOR_CONTEXT_FUNCTIONS;
148 bool RunOnDevice()
override {
149 auto& input = Input(DATA_UINT8);
150 auto& scale_bias = Input(SCALE_BIAS);
151 auto* output = Output(DATA_FLOAT);
152 CAFFE_ENFORCE_EQ(2, scale_bias.ndim(),
"scale_bias has to be matrix");
156 "scale_bias must have the same first dim as data");
160 "the second dim of scale_bias has to be equal to 2");
161 output->ResizeLike(input);
162 auto* input_data = input.template data<uint8_t>();
163 auto* scale_bias_data = scale_bias.template data<float>();
165 auto* output_data = output->template mutable_data<float>();
166 size_t block_size = input.size_from_dim(1);
167 size_t n_blocks = input.dim(0);
169 for (
size_t i = 0; i < n_blocks; ++i) {
170 ConstEigenVectorArrayMap<uint8_t> input_row(
171 input_data + i * block_size, block_size);
172 EigenVectorArrayMap<float> output_row(
173 output_data + i * block_size, block_size);
174 output_row = input_row.template cast<float>() * scale_bias_data[2 * i] +
175 scale_bias_data[2 * i + 1];
181 INPUT_TAGS(DATA_UINT8, SCALE_BIAS);
182 OUTPUT_TAGS(DATA_FLOAT);
185 #endif // CAFFE2_OPERATORS_LENGTHS_REDUCER_ROWWISE_8bits_H_
void EmbeddingLookup(const TIndex block_size, const TIndex output_size, const TIndex index_size, const TIndex data_size, const InType *input, const IndexType *indices, const int *lengths, const float *weights, const float *scale_bias, bool normalize_by_lengths, OutType *out)
Embedding lookup with reduction.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...