1 #include "caffe2/operators/distance_op.h" 6 bool SquaredL2DistanceOp<float, CPUContext>::RunOnDevice() {
9 auto* distance = Output(0);
10 CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
11 for (
int i = 0; i < X.ndim(); ++i) {
12 CAFFE_ENFORCE_EQ(X.dim32(i), Y.dim32(i));
14 int N = X.ndim() > 0 ? X.dim32(0) : 1;
16 int D = N > 0 ? X.size() / N : 0;
17 float* distance_data = distance->mutable_data<
float>();
18 const float* X_data = X.data<
float>();
19 const float* Y_data = Y.data<
float>();
20 for (
int i = 0; i < N; ++i) {
21 float Xscale, Yscale, cross;
22 math::Dot<float, CPUContext>(
23 D, X_data + i * D, X_data + i * D, &Xscale, &context_);
24 math::Dot<float, CPUContext>(
25 D, Y_data + i * D, Y_data + i * D, &Yscale, &context_);
26 math::Dot<float, CPUContext>(
27 D, X_data + i * D, Y_data + i * D, &cross, &context_);
28 distance_data[i] = (Xscale + Yscale) * 0.5 - cross;
34 bool L1DistanceOp<float, CPUContext>::RunOnDevice() {
37 auto* distance = Output(0);
38 CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
39 for (
int i = 0; i < X.ndim(); ++i) {
40 CAFFE_ENFORCE_EQ(X.dim32(i), Y.dim32(i));
42 int N = X.ndim() > 0 ? X.dim32(0) : 1;
44 int D = N > 0 ? X.size() / N : 0;
46 const float* X_data = X.data<
float>();
47 const float* Y_data = Y.data<
float>();
49 for (
int i = 0; i < N; ++i) {
50 (distance->mutable_data<
float>())[i] =
51 (ConstEigenVectorMap<float>(X_data + i * D, D).array() -
52 ConstEigenVectorMap<float>(Y_data + i * D, D).array())
60 bool L1DistanceGradientOp<float, CPUContext>::RunOnDevice() {
63 auto& dDistance = Input(2);
66 CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
67 for (
int i = 0; i < X.ndim(); ++i) {
68 CAFFE_ENFORCE_EQ(X.dim32(i), Y.dim32(i));
70 int N = X.ndim() > 0 ? X.dim32(0) : 1;
71 int D = N > 0 ? X.size() / N : 0;
72 CAFFE_ENFORCE(X.ndim() == Y.ndim());
73 for (
int i = 0; i < X.ndim(); ++i) {
74 CAFFE_ENFORCE(X.dim32(i) == Y.dim32(i));
76 CAFFE_ENFORCE(dDistance.ndim() == 1);
77 CAFFE_ENFORCE(dDistance.dim32(0) == N);
81 for (
int i = 0; i < N; ++i) {
83 for (
int j = 0; j < D; ++j) {
85 (X.data<
float>())[offset + j] - (Y.data<
float>())[offset + j];
86 const float kEps = 1e-12f;
88 dX->mutable_data<
float>()[offset + j] = -(dDistance.data<
float>())[i];
89 dY->mutable_data<
float>()[offset + j] = (dDistance.data<
float>())[i];
90 }
else if (temp > kEps) {
91 dX->mutable_data<
float>()[offset + j] = (dDistance.data<
float>())[i];
92 dY->mutable_data<
float>()[offset + j] = -(dDistance.data<
float>())[i];
94 dX->mutable_data<
float>()[offset + j] = 0;
95 dY->mutable_data<
float>()[offset + j] = 0;
103 bool CosineSimilarityOp<float, CPUContext>::RunOnDevice() {
104 auto& X = Input(X_IN);
105 auto& Y = Input(Y_IN);
106 auto* result = Output(COS_OUT);
107 CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
108 for (
int i = 0; i < X.ndim(); ++i) {
109 CAFFE_ENFORCE_EQ(X.dim32(i), Y.dim32(i));
111 const int N = X.ndim() > 0 ? X.dim32(0) : 1;
112 const int D = X.size_from_dim(1);
114 float* result_data = result->mutable_data<
float>();
115 const float* X_data = X.data<
float>();
116 const float* Y_data = Y.data<
float>();
118 const float kEps = 1e-12f;
119 for (
int i = 0; i < N; ++i) {
121 math::Dot<float, CPUContext>(
122 D, X_data + offset, X_data + offset, &X2, &context_);
123 math::Dot<float, CPUContext>(
124 D, Y_data + offset, Y_data + offset, &Y2, &context_);
125 math::Dot<float, CPUContext>(
126 D, X_data + offset, Y_data + offset, result_data + i, &context_);
127 result_data[i] /= std::sqrt(std::max(X2, kEps) * std::max(Y2, kEps));
133 bool CosineSimilarityGradientOp<float, CPUContext>::RunOnDevice() {
134 auto& X = Input(X_IN);
135 auto& Y = Input(Y_IN);
136 auto& dCos = Input(DER_COS_IN);
137 auto* dX = Output(DER_X_OUT);
138 auto* dY = Output(DER_Y_OUT);
139 const int N = X.ndim() > 0 ? X.dim32(0) : 1;
140 const int D = X.size_from_dim(1);
141 CAFFE_ENFORCE(X.ndim() == Y.ndim());
142 for (
int i = 0; i < X.ndim(); ++i) {
143 CAFFE_ENFORCE(X.dim32(i) == Y.dim32(i));
145 CAFFE_ENFORCE(dCos.ndim() == 1);
146 CAFFE_ENFORCE(dCos.dim32(0) == N);
150 const auto* X_data = X.template data<float>();
151 const auto* Y_data = Y.template data<float>();
152 const auto* dCos_data = dCos.template data<float>();
153 auto* dX_data = dX->template mutable_data<float>();
154 auto* dY_data = dY->template mutable_data<float>();
156 const float kEps = 1e-12f;
157 for (
int i = 0; i < N; ++i) {
162 math::Dot<float, CPUContext>(
163 D, X_data + offset, X_data + offset, &XN, &context_);
164 XN = std::sqrt(std::max(XN, kEps));
166 math::Dot<float, CPUContext>(
167 D, Y_data + offset, Y_data + offset, &YN, &context_);
168 YN = std::sqrt(std::max(YN, kEps));
172 math::Dot<float, CPUContext>(
173 D, X_data + offset, Y_data + offset, &XY, &context_);
175 math::Scale<float, CPUContext>(
176 D, dCos_data[i] / XYN, Y_data + offset, dX_data + offset, &context_);
179 -dCos_data[i] * XY / (XN * XN * XYN),
184 math::Scale<float, CPUContext>(
185 D, dCos_data[i] / XYN, X_data + offset, dY_data + offset, &context_);
188 -dCos_data[i] * XY / (YN * YN * XYN),
198 bool DotProductOp<float, CPUContext>::RunOnDevice() {
199 auto& X = Input(X_IN);
200 auto& Y = Input(Y_IN);
201 auto* result = Output(DOT_OUT);
202 CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
203 for (
int i = 0; i < X.ndim(); ++i) {
204 CAFFE_ENFORCE_EQ(X.dim32(i), Y.dim32(i),
"dimension at ", i);
208 N = X.ndim() > 0 ? X.dim32(0) : 1;
215 float* result_data = result->template mutable_data<float>();
216 const float* X_data = X.template data<float>();
217 const float* Y_data = Y.template data<float>();
218 for (
int i = 0; i < N; ++i) {
220 math::Dot<float, CPUContext>(
221 D, X_data + offset, Y_data + offset, result_data + i, &context_);
226 OpSchema::Cost CostInferenceForDotProduct(
227 const OperatorDef& def,
228 const vector<TensorShape>& in) {
229 struct OpSchema::Cost c = PointwiseCostInference<1>(def, in);
235 bool DotProductGradientOp<float, CPUContext>::RunOnDevice() {
236 auto& X = Input(X_IN);
237 auto& Y = Input(Y_IN);
238 auto& dDot = Input(DER_DOT_IN);
239 auto* dX = Output(DER_X_OUT);
240 auto* dY = Output(DER_Y_OUT);
243 N = X.ndim() > 0 ? X.dim32(0) : 1;
249 CAFFE_ENFORCE(X.ndim() == Y.ndim());
250 for (
int i = 0; i < X.ndim(); ++i) {
251 CAFFE_ENFORCE(X.dim32(i) == Y.dim32(i));
253 CAFFE_ENFORCE(dDot.ndim() == 1);
254 CAFFE_ENFORCE(dDot.dim32(0) == N);
258 const auto* X_data = X.template data<float>();
259 const auto* Y_data = Y.template data<float>();
260 const auto* dDot_data = dDot.template data<float>();
261 auto* dX_data = dX->template mutable_data<float>();
262 auto* dY_data = dY->template mutable_data<float>();
263 for (
int i = 0; i < N; ++i) {
265 math::Scale<float, CPUContext>(
266 D, dDot_data[i], X_data + offset, dY_data + offset, &context_);
267 math::Scale<float, CPUContext>(
268 D, dDot_data[i], Y_data + offset, dX_data + offset, &context_);
274 bool DotProductWithPaddingOp<float, CPUContext>::RunOnDevice() {
275 auto& X = Input(X_IN);
276 auto& Y = Input(Y_IN);
277 auto* result = Output(DOT_OUT);
278 CAFFE_ENFORCE_EQ(X.ndim(), Y.ndim());
279 CAFFE_ENFORCE_EQ(X.dim32(0), Y.dim32(0));
281 int N, D, DX, DY, restD;
283 N = X.ndim() > 0 ? X.dim32(0) : 1;
292 D = std::min(DX, DY);
293 restD = std::max(DX, DY) - D;
295 float* result_data = result->mutable_data<
float>();
296 const float* X_data = X.data<
float>();
297 const float* Y_data = Y.data<
float>();
298 for (
int i = 0; i < N; ++i) {
299 auto offsetX = i * DX, offsetY = i * DY;
302 const float *L_data, *S_data;
305 L_data = X_data + offsetX;
306 S_data = Y_data + offsetY;
310 L_data = Y_data + offsetY;
311 S_data = X_data + offsetX;
317 for (
int j = 0; j < DL / DS; j++) {
318 math::Dot<float, CPUContext>(
319 DS, L_data + j * DS, S_data, &tmp, &context_);
322 *(result_data + i) = sum;
324 math::Dot<float, CPUContext>(
325 D, X_data + offsetX, Y_data + offsetY, result_data + i, &context_);
328 if (!replicate_ && DX != DY) {
329 const float* rest_data;
332 rest_data = X_data + offsetX + D;
334 rest_data = Y_data + offsetY + D;
336 math::Sum<float, CPUContext>(restD, rest_data, &rest_sum, &context_);
337 result_data[i] += rest_sum * pad_value_;
344 REGISTER_CPU_OPERATOR(SquaredL2Distance,
345 SquaredL2DistanceOp<float, CPUContext>);
346 REGISTER_CPU_OPERATOR(SquaredL2DistanceGradient,
347 SquaredL2DistanceGradientOp<float, CPUContext>);
349 OPERATOR_SCHEMA(SquaredL2Distance)
352 .IdenticalTypeAndShapeOfInputDim(0, 0)
354 Given two input float tensors X, Y, and produces one output float tensor 355 of the L2 difference between X and Y that is computed as ||(X - Y)^2 / 2||. 357 .Input(0, "X",
"1D or 2D input tensor")
358 .Input(1,
"Y",
"1D or 2D input tensor (must have the same shape as X)")
359 .Output(0,
"Z",
"1D output tensor");
361 OPERATOR_SCHEMA(SquaredL2DistanceGradient).NumInputs(3).NumOutputs(2);
364 using GradientMakerBase::GradientMakerBase;
365 vector<OperatorDef> GetGradientDefs()
override {
367 "SquaredL2DistanceGradient",
"",
368 vector<string>{I(0), I(1), GO(0)},
369 vector<string>{GI(0), GI(1)});
376 REGISTER_CPU_OPERATOR(
380 OPERATOR_SCHEMA(L1Distance)
383 .IdenticalTypeAndShapeOfInputDim(0, 0)
385 Given two input float tensors X, Y, and produces one output float tensor 386 of the L1 difference between X and Y, computed as L1(x,y) = sum over |x-y| 388 .Input(0, "X",
"1D or 2D input tensor")
389 .Input(1,
"Y",
"1D or 2D input tensor (must have the same shape as X)")
390 .Output(0,
"Z",
"1D output tensor");
392 OPERATOR_SCHEMA(L1DistanceGradient).NumInputs(3).NumOutputs(2);
395 using GradientMakerBase::GradientMakerBase;
396 vector<OperatorDef> GetGradientDefs()
override {
398 "L1DistanceGradient",
400 vector<string>{I(0), I(1), GO(0)},
401 vector<string>{GI(0), GI(1)});
409 REGISTER_CPU_OPERATOR(
413 OPERATOR_SCHEMA(DotProduct)
416 .IdenticalTypeAndShapeOfInputDim(0, 0)
418 Given two input float tensors X, Y, and produces one output float tensor 419 of the dot product between X and Y. 421 .Input(0, "X",
"1D or 2D input tensor")
422 .Input(1,
"Y",
"1D or 2D input tensor (must have the same shape as X)")
423 .Output(0,
"Z",
"1D output tensor")
424 .CostInferenceFunction(
427 OPERATOR_SCHEMA(DotProductGradient).NumInputs(3).NumOutputs(2);
430 using GradientMakerBase::GradientMakerBase;
431 vector<OperatorDef> GetGradientDefs()
override {
433 "DotProductGradient",
435 vector<string>{I(0), I(1), GO(0)},
436 vector<string>{GI(0), GI(1)});
443 REGISTER_CPU_OPERATOR(
444 CosineSimilarityGradient,
447 OPERATOR_SCHEMA(CosineSimilarity)
450 .IdenticalTypeAndShapeOfInputDim(0, 0)
452 Given two input float tensors X, Y, and produces one output float tensor 453 of the cosine similarity between X and Y. 455 .Input(0, "X",
"1D or 2D input tensor")
456 .Input(1,
"Y",
"1D or 2D input tensor (must have the same shape as X)")
457 .Output(0,
"Z",
"1D output tensor");
459 OPERATOR_SCHEMA(CosineSimilarityGradient).NumInputs(3).NumOutputs(2);
462 using GradientMakerBase::GradientMakerBase;
463 vector<OperatorDef> GetGradientDefs()
override {
465 "CosineSimilarityGradient",
467 vector<string>{I(0), I(1), GO(0)},
468 vector<string>{GI(0), GI(1)});
474 REGISTER_CPU_OPERATOR(
475 DotProductWithPadding,
477 REGISTER_CPU_OPERATOR(
478 DotProductWithPaddingGradient,
481 OPERATOR_SCHEMA(DotProductWithPadding)
485 Given two input float tensors X, Y with different shapes and produces one 486 output float tensor of the dot product between X and Y. We currently support 487 two kinds of strategies to achieve this. Before doing normal dot_product 1) 488 pad the smaller tensor (using pad_value) to the same shape as the other one. 489 2) replicate the smaller tensor to the same shape as the other one. Note the 490 first dimension of X, Y must be equal. Only the second dimension of X or Y 493 .Input(0, "X",
"1D or 2D input tensor")
494 .Input(1,
"Y",
"1D or 2D input tensor")
495 .Output(0,
"Z",
"1D output tensor")
496 .IdenticalTypeAndShapeOfInputDim(0, 0)
497 .Arg(
"pad_value",
"the padding value for tensors with smaller dimension")
498 .Arg(
"replicate",
"whether to replicate the smaller tensor or not");
500 OPERATOR_SCHEMA(DotProductWithPaddingGradient).NumInputs(3).NumOutputs(2);
503 using GradientMakerBase::GradientMakerBase;
504 vector<OperatorDef> GetGradientDefs()
override {
506 bool replicate =
false;
507 if (ArgumentHelper::HasArgument(Def(),
"pad_value")) {
508 pad_value = GetArgument(Def(),
"pad_value").f();
510 if (ArgumentHelper::HasArgument(Def(),
"replicate")) {
511 replicate = GetArgument(Def(),
"replicate").i();
515 vector<Argument>{MakeArgument<float>(
"pad_value", pad_value),
516 MakeArgument<bool>(
"replicate", replicate)};
519 "DotProductWithPaddingGradient",
521 vector<string>{I(0), I(1), GO(0)},
522 vector<string>{GI(0), GI(1)},
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...
std::function< struct Cost(const OperatorDef &, const vector< TensorShape > &)> CostInferenceFunctionType
Registers a function that takes in an OperatorDef and a series of input shapes and returns the total ...