1 #include "caffe2/operators/batch_box_cox_op.h" 3 #include "caffe2/core/operator.h" 4 #include "caffe2/core/tensor.h" 8 #endif // CAFFE2_USE_MKL 17 void TileArrayIntoVector(
const T* a,
int D,
int K, vector<T>* b) {
19 for (
int k = 0; k < K; k++) {
20 std::copy(a, a + D, b->begin() + k * D);
24 void TileIndicesInPlace(vector<int>* v,
int D,
int K) {
27 for (
int k = 1; k < K; k++) {
28 for (
int j = 0; j < n; j++) {
29 (*v)[k * n + j] = (*v)[j] + k * D;
36 void PackV(
const int N,
const T* a,
const int* ia, T* y);
38 void UnpackV(
const int N,
const T* a, T* y,
const int* iy);
40 void Pow(
const int N,
const T* a,
const T* b, T* y);
42 #define DELEGATE_PACKV_FUNCTION(T, OriginalFunc) \ 44 void PackV<T>(const int N, const T* a, const int* ia, T* y) { \ 45 OriginalFunc(N, a, ia, y); \ 47 DELEGATE_PACKV_FUNCTION(
float, vsPackV)
48 DELEGATE_PACKV_FUNCTION(
double, vdPackV)
49 #undef DELEGATE_PACKV_FUNCTION 51 #define DELEGATE_UNPACKV_FUNCTION(T, OriginalFunc) \ 53 void UnpackV<T>(const int N, const T* a, T* y, const int* iy) { \ 54 OriginalFunc(N, a, y, iy); \ 56 DELEGATE_UNPACKV_FUNCTION(
float, vsUnpackV)
57 DELEGATE_UNPACKV_FUNCTION(
double, vdUnpackV)
58 #undef DELEGATE_UNPACKV_FUNCTION 60 #define DELEGATE_SIMPLE_BINARY_FUNCTION(T, Funcname, OriginalFunc) \ 62 void Funcname<T>(const int N, const T* a, const T* b, T* y) { \ 63 OriginalFunc(N, a, b, y); \ 65 DELEGATE_SIMPLE_BINARY_FUNCTION(
float, Pow, vsPow)
66 DELEGATE_SIMPLE_BINARY_FUNCTION(
double, Pow, vdPow)
67 #undef DELEGATE_SIMPLE_BINARY_FUNCTION 70 #endif // CAFFE2_USE_MKL 74 bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
75 auto& data = Input(DATA);
76 auto& lambda1 = Input(LAMBDA1);
77 auto& lambda2 = Input(LAMBDA2);
78 CAFFE_ENFORCE_GE(data.ndim(), 1);
80 auto D = data.size_from_dim(1);
82 auto* output = Output(0);
83 output->ResizeLike(Input(DATA));
84 auto* output_ptr = output->template mutable_data<T>();
86 if (data.size() <= 0) {
90 CAFFE_ENFORCE_EQ(lambda1.size(), D);
91 CAFFE_ENFORCE_EQ(lambda2.size(), D);
93 const auto* data_ptr = data.template data<T>();
94 const auto* lambda1_ptr = lambda1.template data<T>();
95 const auto* lambda2_ptr = lambda2.template data<T>();
97 const T k_eps =
static_cast<T
>(1e-6);
100 if (min_block_size_ < 1) {
101 BoxCoxNaive(N, D, data_ptr, lambda1_ptr, lambda2_ptr, k_eps, output_ptr);
106 nonzeros_.reserve(D);
108 for (TIndex j = 0; j < D; j++) {
109 if (lambda1_ptr[j] == 0) {
112 nonzeros_.push_back(j);
117 const int K = std::min(N, (min_block_size_ + D - 1) / D);
122 TypedCachedBuffers<T>& b = GetBuffers<T>();
123 if (nonzeros_.size() == D) {
126 TileArrayIntoVector(lambda1_ptr, D, K, &b.lambda1_);
127 TileArrayIntoVector(lambda2_ptr, D, K, &b.lambda2_);
128 DCHECK_EQ(K * D, b.lambda1_.size());
129 DCHECK_EQ(K * D, b.lambda2_.size());
130 for (; i < N - K + 1; i += K, data_ptr += K * D, output_ptr += K * D) {
140 for (; i < N; i++, data_ptr += D, output_ptr += D) {
142 D, data_ptr, lambda1_ptr, lambda2_ptr, k_eps, output_ptr);
144 }
else if (zeros_.size() == D) {
147 TileArrayIntoVector(lambda2_ptr, D, K, &b.lambda2_z_);
148 DCHECK_EQ(K * D, b.lambda2_z_.size());
149 for (; i < N - K + 1; i += K, data_ptr += K * D, output_ptr += K * D) {
151 K * D, data_ptr, b.lambda2_z_.data(), k_eps, output_ptr);
154 for (; i < N; i++, data_ptr += D, output_ptr += D) {
155 BoxCoxZeroLambda(D, data_ptr, lambda2_ptr, k_eps, output_ptr);
158 int n = nonzeros_.size();
160 TileIndicesInPlace(&nonzeros_, 0, K);
161 TileIndicesInPlace(&zeros_, 0, K);
165 b.lambda1_.resize(nonzeros_.size());
166 b.lambda2_.resize(nonzeros_.size());
167 b.lambda2_z_.resize(zeros_.size());
168 PackV(nonzeros_.size(), lambda1_ptr, nonzeros_.data(), b.lambda1_.data());
169 PackV(nonzeros_.size(), lambda2_ptr, nonzeros_.data(), b.lambda2_.data());
170 PackV(zeros_.size(), lambda2_ptr, zeros_.data(), b.lambda2_z_.data());
173 b.accumulator_.resize(std::max(nonzeros_.size(), zeros_.size()));
177 zeros_.resize(D - n);
178 TileIndicesInPlace(&nonzeros_, D, K);
179 TileIndicesInPlace(&zeros_, D, K);
180 DCHECK_EQ(nonzeros_.size(), b.lambda1_.size());
181 DCHECK_EQ(nonzeros_.size(), b.lambda2_.size());
182 DCHECK_EQ(zeros_.size(), b.lambda2_z_.size());
183 for (; i < N - K + 1; i += K, data_ptr += K * D, output_ptr += K * D) {
192 b.accumulator_.data(),
197 zeros_.resize(D - n);
199 for (; i < N; i++, data_ptr += D, output_ptr += D) {
208 b.accumulator_.data(),
213 #else // CAFFE2_USE_MKL 214 BoxCoxNaive(N, D, data_ptr, lambda1_ptr, lambda2_ptr, k_eps, output_ptr);
215 #endif // CAFFE2_USE_MKL 220 template <
typename T>
221 void BatchBoxCoxOp<CPUContext>::BoxCoxNaive(
225 const T* lambda1_ptr,
226 const T* lambda2_ptr,
229 for (TIndex i = 0; i < N; i++) {
230 for (TIndex j = 0; j < D; j++, data_ptr++, output_ptr++) {
231 T lambda1_v = lambda1_ptr[j];
232 T lambda2_v = lambda2_ptr[j];
233 T tmp = std::max(*data_ptr + lambda2_v, k_eps);
234 if (lambda1_v == 0) {
235 *output_ptr = std::log(tmp);
237 *output_ptr = (std::pow(tmp, lambda1_v) - 1) / lambda1_v;
243 #ifdef CAFFE2_USE_MKL 246 template <
typename T>
247 void BatchBoxCoxOp<CPUContext>::BoxCoxNonzeroLambda(
254 caffe2::math::Add(D, data_ptr, lambda2, out, &context_);
255 for (TIndex j = 0; j < D; j++) {
256 out[j] = std::max(out[j], k_eps);
258 Pow(D, out, lambda1, out);
259 for (TIndex j = 0; j < D; j++) {
262 caffe2::math::Div(D, out, lambda1, out, &context_);
266 template <
typename T>
267 void BatchBoxCoxOp<CPUContext>::BoxCoxZeroLambda(
273 caffe2::math::Add(D, data_ptr, lambda2, output_ptr, &context_);
274 for (TIndex j = 0; j < D; j++) {
275 output_ptr[j] = std::max(output_ptr[j], k_eps);
277 caffe2::math::Log(D, output_ptr, output_ptr, &context_);
281 template <
typename T>
282 void BatchBoxCoxOp<CPUContext>::BoxCoxMixedLambda(
284 const vector<int>& nonzeros,
285 const vector<int>& zeros,
292 PackV(nonzeros.size(), data_ptr, nonzeros.data(), buffer);
293 BoxCoxNonzeroLambda(nonzeros.size(), buffer, lambda1, lambda2, k_eps, buffer);
294 UnpackV(nonzeros.size(), buffer, output_ptr, nonzeros.data());
296 PackV(zeros.size(), data_ptr, zeros.data(), buffer);
297 BoxCoxZeroLambda(zeros.size(), buffer, lambda2_z, k_eps, buffer);
298 UnpackV(zeros.size(), buffer, output_ptr, zeros.data());
302 #define DEFINE_CACHED_BUFFERS(T, tag) \ 305 BatchBoxCoxOp<CPUContext>::TypedCachedBuffers<T>& \ 306 BatchBoxCoxOp<CPUContext>::GetBuffers<T>() { \ 307 if (!buffers_ || buffers_->type_ != tag) { \ 308 buffers_.reset(new BatchBoxCoxOp<CPUContext>::TypedCachedBuffers<T>()); \ 309 buffers_->type_ = tag; \ 311 return *static_cast<TypedCachedBuffers<T>*>(buffers_.get()); \ 313 DEFINE_CACHED_BUFFERS(
float, 1);
314 DEFINE_CACHED_BUFFERS(
double, 2);
315 #undef DEFINE_CACHED_BUFFERS 317 #endif // CAFFE2_USE_MKL 321 REGISTER_CPU_OPERATOR(BatchBoxCox, BatchBoxCoxOp<CPUContext>);
322 OPERATOR_SCHEMA(BatchBoxCox)
325 .IdenticalTypeAndShapeOfInput(0)
326 .AllowInplace({{0, 0}})
328 Input `data` is a N * D matrix. Apply box-cox transform for each column. 329 `lambda1` and `lambda2` is of size D that defines the hyper-parameters for 330 the transform of each column `x` of the input `data`: 332 ln(x + lambda2), if lambda1 == 0 333 ((x + lambda2)^lambda1 - 1)/lambda1, if lambda1 != 0 336 .Input(0, "data",
"input float or double N * D matrix")
337 .Input(1,
"lambda1",
"tensor of size D with the same type as data")
338 .Input(2,
"lambda2",
"tensor of size D with the same type as data")
339 .Output(0,
"output",
"output matrix that applied box-cox transform");
341 GRADIENT_NOT_IMPLEMENTED_YET(BatchBoxCox);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...