proxygen
TDigestTest.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2013-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <folly/stats/TDigest.h>
18 
19 #include <chrono>
20 #include <random>
21 
23 
24 using namespace folly;
25 
26 /*
27  * Tests run at a reasonable speed with these settings, but it is good to
28  * occasionally test with kNumRandomRuns = 3000 and kNumSamples = 50000
29  */
30 const int32_t kNumSamples = 3000;
32 const int32_t kSeed = 0;
33 
34 TEST(TDigest, Basic) {
35  TDigest digest(100);
36 
37  std::vector<double> values;
38  for (int i = 1; i <= 100; ++i) {
39  values.push_back(i);
40  }
41 
42  digest = digest.merge(values);
43 
44  EXPECT_EQ(100, digest.count());
45  EXPECT_EQ(5050, digest.sum());
46  EXPECT_EQ(50.5, digest.mean());
47  EXPECT_EQ(1, digest.min());
48  EXPECT_EQ(100, digest.max());
49 
50  EXPECT_EQ(1, digest.estimateQuantile(0.001));
51  EXPECT_EQ(2.0 - 0.5, digest.estimateQuantile(0.01));
52  EXPECT_EQ(50.375, digest.estimateQuantile(0.5));
53  EXPECT_EQ(100.0 - 0.5, digest.estimateQuantile(0.99));
54  EXPECT_EQ(100, digest.estimateQuantile(0.999));
55 }
56 
57 TEST(TDigest, Merge) {
58  TDigest digest(100);
59 
60  std::vector<double> values;
61  for (int i = 1; i <= 100; ++i) {
62  values.push_back(i);
63  }
64  digest = digest.merge(values);
65 
66  values.clear();
67  for (int i = 101; i <= 200; ++i) {
68  values.push_back(i);
69  }
70  digest = digest.merge(values);
71 
72  EXPECT_EQ(200, digest.count());
73  EXPECT_EQ(20100, digest.sum());
74  EXPECT_EQ(100.5, digest.mean());
75  EXPECT_EQ(1, digest.min());
76  EXPECT_EQ(200, digest.max());
77 
78  EXPECT_EQ(1, digest.estimateQuantile(0.001));
79  EXPECT_EQ(4.0 - 1.5, digest.estimateQuantile(0.01));
80  EXPECT_EQ(100.25, digest.estimateQuantile(0.5));
81  EXPECT_EQ(200.0 - 1.5, digest.estimateQuantile(0.99));
82  EXPECT_EQ(200, digest.estimateQuantile(0.999));
83 }
84 
85 TEST(TDigest, MergeSmall) {
86  TDigest digest(100);
87 
88  std::vector<double> values;
89  values.push_back(1);
90 
91  digest = digest.merge(values);
92 
93  EXPECT_EQ(1, digest.count());
94  EXPECT_EQ(1, digest.sum());
95  EXPECT_EQ(1, digest.mean());
96  EXPECT_EQ(1, digest.min());
97  EXPECT_EQ(1, digest.max());
98 
99  EXPECT_EQ(1.0, digest.estimateQuantile(0.001));
100  EXPECT_EQ(1.0, digest.estimateQuantile(0.01));
101  EXPECT_EQ(1.0, digest.estimateQuantile(0.5));
102  EXPECT_EQ(1.0, digest.estimateQuantile(0.99));
103  EXPECT_EQ(1.0, digest.estimateQuantile(0.999));
104 }
105 
106 TEST(TDigest, MergeLarge) {
107  TDigest digest(100);
108 
109  std::vector<double> values;
110  for (int i = 1; i <= 1000; ++i) {
111  values.push_back(i);
112  }
113  digest = digest.merge(values);
114 
115  EXPECT_EQ(1000, digest.count());
116  EXPECT_EQ(500500, digest.sum());
117  EXPECT_EQ(500.5, digest.mean());
118  EXPECT_EQ(1, digest.min());
119  EXPECT_EQ(1000, digest.max());
120 
121  EXPECT_EQ(1.5, digest.estimateQuantile(0.001));
122  EXPECT_EQ(10.5, digest.estimateQuantile(0.01));
123  EXPECT_EQ(500.25, digest.estimateQuantile(0.5));
124  EXPECT_EQ(990.25, digest.estimateQuantile(0.99));
125  EXPECT_EQ(999.5, digest.estimateQuantile(0.999));
126 }
127 
128 TEST(TDigest, MergeLargeAsDigests) {
129  std::vector<TDigest> digests;
130  TDigest digest(100);
131 
132  std::vector<double> values;
133  for (int i = 1; i <= 1000; ++i) {
134  values.push_back(i);
135  }
136  // Ensure that the values do not monotonically increase across digests.
137  std::shuffle(
138  values.begin(), values.end(), std::mt19937(std::random_device()()));
139  for (int i = 0; i < 10; ++i) {
140  std::vector<double> unsorted_values(
141  values.begin() + (i * 100), values.begin() + (i + 1) * 100);
142  digests.push_back(digest.merge(unsorted_values));
143  }
144 
145  digest = TDigest::merge(digests);
146 
147  EXPECT_EQ(1000, digest.count());
148  EXPECT_EQ(500500, digest.sum());
149  EXPECT_EQ(500.5, digest.mean());
150  EXPECT_EQ(1, digest.min());
151  EXPECT_EQ(1000, digest.max());
152 
153  EXPECT_EQ(1.5, digest.estimateQuantile(0.001));
154  EXPECT_EQ(10.5, digest.estimateQuantile(0.01));
155  EXPECT_EQ(990.25, digest.estimateQuantile(0.99));
156  EXPECT_EQ(999.5, digest.estimateQuantile(0.999));
157 }
158 
159 TEST(TDigest, NegativeValues) {
160  std::vector<TDigest> digests;
161  TDigest digest(100);
162 
163  std::vector<double> values;
164  for (int i = 1; i <= 100; ++i) {
165  values.push_back(i);
166  values.push_back(-i);
167  }
168 
169  digest = digest.merge(values);
170 
171  EXPECT_EQ(200, digest.count());
172  EXPECT_EQ(0, digest.sum());
173  EXPECT_EQ(0, digest.mean());
174  EXPECT_EQ(-100, digest.min());
175  EXPECT_EQ(100, digest.max());
176 
177  EXPECT_EQ(-100, digest.estimateQuantile(0.0));
178  EXPECT_EQ(-100, digest.estimateQuantile(0.001));
179  EXPECT_EQ(-98.5, digest.estimateQuantile(0.01));
180  EXPECT_EQ(98.5, digest.estimateQuantile(0.99));
181  EXPECT_EQ(100, digest.estimateQuantile(0.999));
182  EXPECT_EQ(100, digest.estimateQuantile(1.0));
183 }
184 
185 TEST(TDigest, NegativeValuesMergeDigests) {
186  std::vector<TDigest> digests;
187  TDigest digest(100);
188 
189  std::vector<double> values;
190  std::vector<double> negativeValues;
191  for (int i = 1; i <= 100; ++i) {
192  values.push_back(i);
193  negativeValues.push_back(-i);
194  }
195 
196  auto digest1 = digest.merge(values);
197  auto digest2 = digest.merge(negativeValues);
198 
199  std::array<TDigest, 2> a{{digest1, digest2}};
200 
201  digest = TDigest::merge(a);
202 
203  EXPECT_EQ(200, digest.count());
204  EXPECT_EQ(0, digest.sum());
205  EXPECT_EQ(0, digest.mean());
206  EXPECT_EQ(-100, digest.min());
207  EXPECT_EQ(100, digest.max());
208 
209  EXPECT_EQ(-100, digest.estimateQuantile(0.0));
210  EXPECT_EQ(-100, digest.estimateQuantile(0.001));
211  EXPECT_EQ(-98.5, digest.estimateQuantile(0.01));
212  EXPECT_EQ(98.5, digest.estimateQuantile(0.99));
213  EXPECT_EQ(100, digest.estimateQuantile(0.999));
214  EXPECT_EQ(100, digest.estimateQuantile(1.0));
215 }
216 
217 TEST(TDigest, ConstructFromCentroids) {
218  std::vector<TDigest::Centroid> centroids{};
219 
220  TDigest digest(100);
221  std::vector<double> values;
222  for (int i = 1; i <= 100; ++i) {
223  values.push_back(i);
224  }
225  auto digest1 = digest.merge(values);
226 
227  size_t centroid_count = digest1.getCentroids().size();
228 
229  TDigest digest2(
230  digest1.getCentroids(),
231  digest1.sum(),
232  digest1.count(),
233  digest1.max(),
234  digest1.min(),
235  100);
236 
237  EXPECT_EQ(digest1.sum(), digest2.sum());
238  EXPECT_EQ(digest1.count(), digest2.count());
239  EXPECT_EQ(digest1.min(), digest2.min());
240  EXPECT_EQ(digest1.max(), digest2.max());
241  EXPECT_EQ(digest1.getCentroids().size(), digest2.getCentroids().size());
242 
243  TDigest digest3(
244  digest1.getCentroids(),
245  digest1.sum(),
246  digest1.count(),
247  digest1.max(),
248  digest1.min(),
249  centroid_count - 1);
250  EXPECT_EQ(digest1.sum(), digest3.sum());
251  EXPECT_EQ(digest1.count(), digest3.count());
252  EXPECT_EQ(digest1.min(), digest3.min());
253  EXPECT_EQ(digest1.max(), digest3.max());
254  EXPECT_NE(digest1.getCentroids().size(), digest3.getCentroids().size());
255 }
256 
257 TEST(TDigest, LargeOutlierTest) {
258  folly::TDigest digest(100);
259 
260  std::vector<double> values;
261  for (double i = 0; i < 19; ++i) {
262  values.push_back(i);
263  }
264  values.push_back(1000000);
265 
266  std::sort(values.begin(), values.end());
267  digest = digest.merge(values);
268  EXPECT_LT(
269  (int64_t)digest.estimateQuantile(0.5),
270  (int64_t)digest.estimateQuantile(0.90));
271 }
272 
273 TEST(TDigest, FloatingPointSortedTest) {
274  // When combining centroids, floating point accuracy can lead to us building
275  // and unsorted digest if we are not careful. This tests that we are properly
276  // sorting the digest.
277  double val = 1.4;
278  TDigest digest1(100);
279  std::vector<double> values1;
280  for (int i = 1; i <= 100; ++i) {
281  values1.push_back(val);
282  }
283  digest1 = digest1.merge(values1);
284 
285  TDigest digest2(100);
286  std::vector<double> values2;
287  for (int i = 1; i <= 100; ++i) {
288  values2.push_back(val);
289  }
290  digest2 = digest2.merge(values2);
291 
292  std::array<TDigest, 2> a{{digest1, digest2}};
293  auto mergeDigest1 = TDigest::merge(a);
294 
295  TDigest digest3(100);
296  std::vector<double> values3;
297  for (int i = 1; i <= 100; ++i) {
298  values3.push_back(val);
299  }
300  digest3 = digest2.merge(values3);
301  std::array<TDigest, 2> b{{digest3, mergeDigest1}};
302  auto mergeDigest2 = TDigest::merge(b);
303 
304  auto centroids = mergeDigest2.getCentroids();
305  EXPECT_EQ(std::is_sorted(centroids.begin(), centroids.end()), true);
306 }
307 
309  : public ::testing::TestWithParam<
310  std::tuple<std::pair<bool, size_t>, double, bool>> {};
311 
312 TEST_P(DistributionTest, ReasonableError) {
313  std::pair<bool, size_t> underlyingDistribution;
314  bool logarithmic;
315  size_t modes;
316  double quantile;
317  double reasonableError = 0;
318  bool digestMerge;
319 
320  std::tie(underlyingDistribution, quantile, digestMerge) = GetParam();
321 
322  std::tie(logarithmic, modes) = underlyingDistribution;
323  if (quantile == 0.001 || quantile == 0.999) {
324  reasonableError = 0.001;
325  } else if (quantile == 0.01 || quantile == 0.99) {
326  reasonableError = 0.01;
327  } else if (quantile == 0.25 || quantile == 0.5 || quantile == 0.75) {
328  reasonableError = 0.04;
329  }
330 
331  std::vector<double> errors;
332 
333  std::default_random_engine generator;
334  generator.seed(kSeed);
335  for (size_t iter = 0; iter < kNumRandomRuns; ++iter) {
336  TDigest digest(100);
337 
338  std::vector<double> values;
339 
340  if (logarithmic) {
341  std::lognormal_distribution<double> distribution(0.0, 1.0);
342 
343  for (size_t i = 0; i < kNumSamples; ++i) {
344  auto mode = (int)distribution(generator) % modes;
345  values.push_back(distribution(generator) + 100.0 * mode);
346  }
347  } else {
348  std::uniform_int_distribution<int> distributionPicker(0, modes - 1);
349 
350  std::vector<std::normal_distribution<double>> distributions;
351  for (size_t i = 0; i < modes; ++i) {
352  distributions.emplace_back(100.0 * (i + 1), 25);
353  }
354 
355  for (size_t i = 0; i < kNumSamples; ++i) {
356  auto distributionIdx = distributionPicker(generator);
357  values.push_back(distributions[distributionIdx](generator));
358  }
359  }
360 
361  std::vector<TDigest> digests;
362  for (size_t i = 0; i < kNumSamples / 1000; ++i) {
363  folly::Range<const double*> r(values, i * 1000, 1000);
364  if (digestMerge) {
365  digests.push_back(digest.merge(r));
366  } else {
367  digest = digest.merge(r);
368  }
369  }
370 
371  std::sort(values.begin(), values.end());
372 
373  if (digestMerge) {
374  digest = TDigest::merge(digests);
375  }
376 
377  double est = digest.estimateQuantile(quantile);
378  auto it = std::lower_bound(values.begin(), values.end(), est);
379  int32_t actualRank = std::distance(values.begin(), it);
380  double actualQuantile = ((double)actualRank) / kNumSamples;
381  errors.push_back(actualQuantile - quantile);
382  }
383 
384  double sum = 0.0;
385 
386  for (auto error : errors) {
387  sum += error;
388  }
389 
390  double mean = sum / kNumRandomRuns;
391 
392  double numerator = 0.0;
393  for (auto error : errors) {
394  numerator += pow(error - mean, 2);
395  }
396 
397  double stddev = std::sqrt(numerator / (kNumRandomRuns - 1));
398 
399  EXPECT_GE(reasonableError, stddev);
400 }
401 
403  ReasonableErrors,
405  ::testing::Combine(
406  ::testing::Values(
407  std::make_pair(true, 1),
408  std::make_pair(true, 3),
409  std::make_pair(false, 1),
410  std::make_pair(false, 10)),
411  ::testing::Values(0.001, 0.01, 0.25, 0.50, 0.75, 0.99, 0.999),
412  ::testing::Bool()));
std::atomic< int64_t > sum(0)
const int32_t kNumRandomRuns
Definition: TDigestTest.cpp:31
char b
INSTANTIATE_TEST_CASE_P(ReasonableErrors, DistributionTest,::testing::Combine(::testing::Values(std::make_pair(true, 1), std::make_pair(true, 3), std::make_pair(false, 1), std::make_pair(false, 10)),::testing::Values(0.001, 0.01, 0.25, 0.50, 0.75, 0.99, 0.999),::testing::Bool()))
std::default_random_engine generator
const int32_t kSeed
Definition: TDigestTest.cpp:32
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
const std::vector< Centroid > & getCentroids() const
Definition: TDigest.h:134
const int32_t kNumSamples
Definition: TDigestTest.cpp:30
double val
Definition: String.cpp:273
double estimateQuantile(double q) const
Definition: TDigest.cpp:312
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
#define EXPECT_GE(val1, val2)
Definition: gtest.h:1932
double sum() const
Definition: TDigest.h:114
requires And< SemiMovable< VN >... > &&SemiMovable< E > auto error(E e)
Definition: error.h:48
double count() const
Definition: TDigest.h:118
folly::Optional< PskKeyExchangeMode > mode
TEST_P(DistributionTest, ReasonableError)
char a
double mean() const
Definition: TDigest.h:110
TDigest merge(presorted_t, Range< const double * > sortedValues) const
Definition: TDigest.cpp:126
#define EXPECT_NE(val1, val2)
Definition: gtest.h:1926
bool_constant< B > Bool
Definition: TypeList.h:81
double max() const
Definition: TDigest.h:126
#define EXPECT_LT(val1, val2)
Definition: gtest.h:1930
TEST(SequencedExecutor, CPUThreadPoolExecutor)
double min() const
Definition: TDigest.h:122
std::vector< int > values(1'000)