1 #include "caffe2/operators/rank_loss_op.h" 9 inline T logLogit(T x) {
10 static const auto kMinLogDiff = std::log(std::numeric_limits<T>::epsilon());
12 if (x < kMinLogDiff) {
15 if (x > -kMinLogDiff) {
18 return std::log(std::exp(x) + 1);
22 template <
typename T,
class Context>
23 bool PairWiseLossOp<T, Context>::RunOnDevice() {
24 auto& X = Input(XVALUE);
25 auto& label = Input(LABEL);
26 auto* Y = Output(YVALUE);
28 int N = X.ndim() > 0 ? X.dim32(0) : 0;
31 Y->template mutable_data<T>();
35 const int32_t* lengths_vec;
37 if (InputSize() > LENGTHS) {
38 auto& lengths = Input(LENGTHS);
39 CAFFE_ENFORCE_EQ(lengths.ndim(), 1);
40 len_size = lengths.size();
41 lengths_vec = lengths.template data<int32_t>();
44 math::Sum<int, Context>(len_size, lengths_vec, &len_sum, &context_);
46 CAFFE_ENFORCE_EQ(len_sum, N);
53 auto* Ydata = Y->template mutable_data<T>();
57 (label.ndim() == 1) || (label.ndim() == 2 && label.dim32(1) == 1));
58 CAFFE_ENFORCE_EQ(label.dim32(0), N);
59 CAFFE_ENFORCE_EQ(1, D);
61 const auto* Xdata = X.template data<T>();
62 const auto* labelData = label.template data<T>();
64 for (
int idx = 0; idx < len_size; ++idx) {
67 for (
int i = offset; i < offset + lengths_vec[idx]; ++i) {
68 for (
int j = offset; j < i; ++j) {
69 if (std::abs(labelData[i] - labelData[j]) <
70 std::numeric_limits<T>::epsilon()) {
75 auto sign = labelData[i] > labelData[j] ? 1 : -1;
76 Ydata[idx] += logLogit(sign * (Xdata[j] - Xdata[i]));
80 Ydata[idx] /= numPairs;
82 offset += lengths_vec[idx];
87 template <
class T,
class Context>
88 bool PairWiseLossGradientOp<T, Context>::RunOnDevice() {
89 auto& X = Input(XVALUE);
90 auto& label = Input(LABEL);
91 auto& dY = Input(DYVALUE);
92 auto* dX = Output(DXVALUE);
93 int N = X.ndim() > 0 ? X.dim32(0) : 0;
94 CAFFE_ENFORCE_EQ(N, X.size());
96 (label.ndim() == 1) || (label.ndim() == 2 && label.dim32(1) == 1));
97 CAFFE_ENFORCE_EQ(label.dim32(0), N);
99 math::Set<T, CPUContext>(
100 dX->size(), 0.f, dX->template mutable_data<T>(), &context_);
106 const int32_t* lengths_vec;
108 if (InputSize() > LENGTHS) {
109 auto& lengths = Input(LENGTHS);
110 CAFFE_ENFORCE_EQ(lengths.ndim(), 1);
111 len_size = lengths.size();
112 lengths_vec = lengths.template data<int32_t>();
115 math::Sum<int, Context>(len_size, lengths_vec, &len_sum, &context_);
117 CAFFE_ENFORCE_EQ(len_sum, N);
122 CAFFE_ENFORCE_EQ(dY.ndim(), 1);
123 CAFFE_ENFORCE_EQ(dY.dim32(0), len_size);
125 const T* Xdata = X.template data<T>();
126 const T* dYdata = dY.template data<T>();
127 const T* labelData = label.template data<T>();
128 T* dXdata = dX->template mutable_data<T>();
130 for (
int idx = 0; idx < len_size; ++idx) {
132 for (
int i = offset; i < offset + lengths_vec[idx]; ++i) {
133 for (
int j = offset; j < i; ++j) {
134 if (std::abs(labelData[i] - labelData[j]) <
135 std::numeric_limits<T>::epsilon()) {
140 auto sign = labelData[i] > labelData[j] ? 1 : -1;
142 sign * dYdata[idx] / (1 + exp(-sign * (Xdata[j] - Xdata[i])));
148 for (
int i = offset; i < offset + lengths_vec[idx]; ++i) {
149 dXdata[i] /= numPairs;
152 offset += lengths_vec[idx];
158 REGISTER_CPU_OPERATOR(PairWiseLoss, PairWiseLossOp<float, CPUContext>);
159 REGISTER_CPU_OPERATOR(
160 PairWiseLossGradient,
161 PairWiseLossGradientOp<float, CPUContext>);
163 OPERATOR_SCHEMA(PairWiseLoss)
167 Operator computes the pair wise loss between all pairs within a batch 168 using the logit loss function on the difference in scores between pairs 173 "Input blob from the previous layer, which is almost always " 174 "the result of a softmax operation; X is a 2D array of size N x 1" 175 "where N is the batch size. For more info: " 176 "D. Sculley, Large Scale Learning to Rank. " 177 "https://www.eecs.tufts.edu/~dsculley/papers/large-scale-rank.pdf")
178 .Input(1,
"label",
"Blob containing the labels used to compare the input")
182 "Optional input blob that contains the lengths" 183 "of multiple sessions. The summation of this blob must be equal" 184 "to the size of blob X. If lengths blob is provided, the output" 185 "blob has the same size as lengths blob, and the cross entropy" 186 "is computed within each session.")
187 .Output(0,
"Y",
"Output blob after the cross entropy computation");
188 OPERATOR_SCHEMA(PairWiseLossGradient).NumInputs(3, 4).NumOutputs(1);
190 class GetPairWiseLossGradient :
public GradientMakerBase {
191 using GradientMakerBase::GradientMakerBase;
192 vector<OperatorDef> GetGradientDefs()
override {
193 vector<string> blob_names{I(0), I(1), GO(0)};
196 if (def_.input_size() == 3) {
197 blob_names.push_back(I(2));
199 return SingleGradientDef(
200 "PairWiseLossGradient",
"", blob_names, vector<string>{GI(0)});
203 REGISTER_GRADIENT(PairWiseLoss, GetPairWiseLossGradient);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...