proxygen
TDigestTest.cpp File Reference
#include <folly/stats/TDigest.h>
#include <chrono>
#include <random>
#include <folly/portability/GTest.h>

Go to the source code of this file.

Classes

class  DistributionTest
 

Functions

 TEST (TDigest, Basic)
 
 TEST (TDigest, Merge)
 
 TEST (TDigest, MergeSmall)
 
 TEST (TDigest, MergeLarge)
 
 TEST (TDigest, MergeLargeAsDigests)
 
 TEST (TDigest, NegativeValues)
 
 TEST (TDigest, NegativeValuesMergeDigests)
 
 TEST (TDigest, ConstructFromCentroids)
 
 TEST (TDigest, LargeOutlierTest)
 
 TEST (TDigest, FloatingPointSortedTest)
 
 TEST_P (DistributionTest, ReasonableError)
 
 INSTANTIATE_TEST_CASE_P (ReasonableErrors, DistributionTest,::testing::Combine(::testing::Values(std::make_pair(true, 1), std::make_pair(true, 3), std::make_pair(false, 1), std::make_pair(false, 10)),::testing::Values(0.001, 0.01, 0.25, 0.50, 0.75, 0.99, 0.999),::testing::Bool()))
 

Variables

const int32_t kNumSamples = 3000
 
const int32_t kNumRandomRuns = 10
 
const int32_t kSeed = 0
 

Function Documentation

INSTANTIATE_TEST_CASE_P ( ReasonableErrors  ,
DistributionTest  ,
::testing::Combine(::testing::Values(std::make_pair(true, 1), std::make_pair(true, 3), std::make_pair(false, 1), std::make_pair(false, 10)),::testing::Values(0.001, 0.01, 0.25, 0.50, 0.75, 0.99, 0.999),::testing::Bool())   
)

Referenced by TEST_P().

TEST ( TDigest  ,
Basic   
)

Definition at line 34 of file TDigestTest.cpp.

References folly::TDigest::count(), folly::TDigest::estimateQuantile(), EXPECT_EQ, i, folly::TDigest::max(), folly::TDigest::mean(), folly::TDigest::merge(), folly::TDigest::min(), folly::TDigest::sum(), and values().

34  {
35  TDigest digest(100);
36 
37  std::vector<double> values;
38  for (int i = 1; i <= 100; ++i) {
39  values.push_back(i);
40  }
41 
42  digest = digest.merge(values);
43 
44  EXPECT_EQ(100, digest.count());
45  EXPECT_EQ(5050, digest.sum());
46  EXPECT_EQ(50.5, digest.mean());
47  EXPECT_EQ(1, digest.min());
48  EXPECT_EQ(100, digest.max());
49 
50  EXPECT_EQ(1, digest.estimateQuantile(0.001));
51  EXPECT_EQ(2.0 - 0.5, digest.estimateQuantile(0.01));
52  EXPECT_EQ(50.375, digest.estimateQuantile(0.5));
53  EXPECT_EQ(100.0 - 0.5, digest.estimateQuantile(0.99));
54  EXPECT_EQ(100, digest.estimateQuantile(0.999));
55 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
std::vector< int > values(1'000)
TEST ( TDigest  ,
Merge   
)

Definition at line 57 of file TDigestTest.cpp.

References folly::TDigest::count(), folly::TDigest::estimateQuantile(), EXPECT_EQ, i, folly::TDigest::max(), folly::TDigest::mean(), folly::TDigest::merge(), folly::TDigest::min(), folly::TDigest::sum(), and values().

57  {
58  TDigest digest(100);
59 
60  std::vector<double> values;
61  for (int i = 1; i <= 100; ++i) {
62  values.push_back(i);
63  }
64  digest = digest.merge(values);
65 
66  values.clear();
67  for (int i = 101; i <= 200; ++i) {
68  values.push_back(i);
69  }
70  digest = digest.merge(values);
71 
72  EXPECT_EQ(200, digest.count());
73  EXPECT_EQ(20100, digest.sum());
74  EXPECT_EQ(100.5, digest.mean());
75  EXPECT_EQ(1, digest.min());
76  EXPECT_EQ(200, digest.max());
77 
78  EXPECT_EQ(1, digest.estimateQuantile(0.001));
79  EXPECT_EQ(4.0 - 1.5, digest.estimateQuantile(0.01));
80  EXPECT_EQ(100.25, digest.estimateQuantile(0.5));
81  EXPECT_EQ(200.0 - 1.5, digest.estimateQuantile(0.99));
82  EXPECT_EQ(200, digest.estimateQuantile(0.999));
83 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
std::vector< int > values(1'000)
TEST ( TDigest  ,
MergeSmall   
)

Definition at line 85 of file TDigestTest.cpp.

References folly::TDigest::count(), folly::TDigest::estimateQuantile(), EXPECT_EQ, folly::TDigest::max(), folly::TDigest::mean(), folly::TDigest::merge(), folly::TDigest::min(), folly::TDigest::sum(), and values().

85  {
86  TDigest digest(100);
87 
88  std::vector<double> values;
89  values.push_back(1);
90 
91  digest = digest.merge(values);
92 
93  EXPECT_EQ(1, digest.count());
94  EXPECT_EQ(1, digest.sum());
95  EXPECT_EQ(1, digest.mean());
96  EXPECT_EQ(1, digest.min());
97  EXPECT_EQ(1, digest.max());
98 
99  EXPECT_EQ(1.0, digest.estimateQuantile(0.001));
100  EXPECT_EQ(1.0, digest.estimateQuantile(0.01));
101  EXPECT_EQ(1.0, digest.estimateQuantile(0.5));
102  EXPECT_EQ(1.0, digest.estimateQuantile(0.99));
103  EXPECT_EQ(1.0, digest.estimateQuantile(0.999));
104 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
std::vector< int > values(1'000)
TEST ( TDigest  ,
MergeLarge   
)

Definition at line 106 of file TDigestTest.cpp.

References folly::TDigest::count(), folly::TDigest::estimateQuantile(), EXPECT_EQ, i, folly::TDigest::max(), folly::TDigest::mean(), folly::TDigest::merge(), folly::TDigest::min(), folly::TDigest::sum(), and values().

106  {
107  TDigest digest(100);
108 
109  std::vector<double> values;
110  for (int i = 1; i <= 1000; ++i) {
111  values.push_back(i);
112  }
113  digest = digest.merge(values);
114 
115  EXPECT_EQ(1000, digest.count());
116  EXPECT_EQ(500500, digest.sum());
117  EXPECT_EQ(500.5, digest.mean());
118  EXPECT_EQ(1, digest.min());
119  EXPECT_EQ(1000, digest.max());
120 
121  EXPECT_EQ(1.5, digest.estimateQuantile(0.001));
122  EXPECT_EQ(10.5, digest.estimateQuantile(0.01));
123  EXPECT_EQ(500.25, digest.estimateQuantile(0.5));
124  EXPECT_EQ(990.25, digest.estimateQuantile(0.99));
125  EXPECT_EQ(999.5, digest.estimateQuantile(0.999));
126 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
std::vector< int > values(1'000)
TEST ( TDigest  ,
MergeLargeAsDigests   
)

Definition at line 128 of file TDigestTest.cpp.

References folly::TDigest::count(), folly::TDigest::estimateQuantile(), EXPECT_EQ, i, folly::TDigest::max(), folly::TDigest::mean(), folly::TDigest::merge(), folly::TDigest::min(), folly::TDigest::sum(), and values().

128  {
129  std::vector<TDigest> digests;
130  TDigest digest(100);
131 
132  std::vector<double> values;
133  for (int i = 1; i <= 1000; ++i) {
134  values.push_back(i);
135  }
136  // Ensure that the values do not monotonically increase across digests.
137  std::shuffle(
138  values.begin(), values.end(), std::mt19937(std::random_device()()));
139  for (int i = 0; i < 10; ++i) {
140  std::vector<double> unsorted_values(
141  values.begin() + (i * 100), values.begin() + (i + 1) * 100);
142  digests.push_back(digest.merge(unsorted_values));
143  }
144 
145  digest = TDigest::merge(digests);
146 
147  EXPECT_EQ(1000, digest.count());
148  EXPECT_EQ(500500, digest.sum());
149  EXPECT_EQ(500.5, digest.mean());
150  EXPECT_EQ(1, digest.min());
151  EXPECT_EQ(1000, digest.max());
152 
153  EXPECT_EQ(1.5, digest.estimateQuantile(0.001));
154  EXPECT_EQ(10.5, digest.estimateQuantile(0.01));
155  EXPECT_EQ(990.25, digest.estimateQuantile(0.99));
156  EXPECT_EQ(999.5, digest.estimateQuantile(0.999));
157 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
void merge(unsigned int iters, size_t maxSize, size_t bufSize)
std::vector< int > values(1'000)
TEST ( TDigest  ,
NegativeValues   
)

Definition at line 159 of file TDigestTest.cpp.

References folly::TDigest::count(), folly::TDigest::estimateQuantile(), EXPECT_EQ, i, folly::TDigest::max(), folly::TDigest::mean(), folly::TDigest::merge(), folly::TDigest::min(), folly::TDigest::sum(), and values().

159  {
160  std::vector<TDigest> digests;
161  TDigest digest(100);
162 
163  std::vector<double> values;
164  for (int i = 1; i <= 100; ++i) {
165  values.push_back(i);
166  values.push_back(-i);
167  }
168 
169  digest = digest.merge(values);
170 
171  EXPECT_EQ(200, digest.count());
172  EXPECT_EQ(0, digest.sum());
173  EXPECT_EQ(0, digest.mean());
174  EXPECT_EQ(-100, digest.min());
175  EXPECT_EQ(100, digest.max());
176 
177  EXPECT_EQ(-100, digest.estimateQuantile(0.0));
178  EXPECT_EQ(-100, digest.estimateQuantile(0.001));
179  EXPECT_EQ(-98.5, digest.estimateQuantile(0.01));
180  EXPECT_EQ(98.5, digest.estimateQuantile(0.99));
181  EXPECT_EQ(100, digest.estimateQuantile(0.999));
182  EXPECT_EQ(100, digest.estimateQuantile(1.0));
183 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
std::vector< int > values(1'000)
TEST ( TDigest  ,
NegativeValuesMergeDigests   
)

Definition at line 185 of file TDigestTest.cpp.

References a, folly::TDigest::count(), folly::TDigest::estimateQuantile(), EXPECT_EQ, i, folly::TDigest::max(), folly::TDigest::mean(), folly::TDigest::merge(), folly::TDigest::min(), folly::TDigest::sum(), and values().

185  {
186  std::vector<TDigest> digests;
187  TDigest digest(100);
188 
189  std::vector<double> values;
190  std::vector<double> negativeValues;
191  for (int i = 1; i <= 100; ++i) {
192  values.push_back(i);
193  negativeValues.push_back(-i);
194  }
195 
196  auto digest1 = digest.merge(values);
197  auto digest2 = digest.merge(negativeValues);
198 
199  std::array<TDigest, 2> a{{digest1, digest2}};
200 
201  digest = TDigest::merge(a);
202 
203  EXPECT_EQ(200, digest.count());
204  EXPECT_EQ(0, digest.sum());
205  EXPECT_EQ(0, digest.mean());
206  EXPECT_EQ(-100, digest.min());
207  EXPECT_EQ(100, digest.max());
208 
209  EXPECT_EQ(-100, digest.estimateQuantile(0.0));
210  EXPECT_EQ(-100, digest.estimateQuantile(0.001));
211  EXPECT_EQ(-98.5, digest.estimateQuantile(0.01));
212  EXPECT_EQ(98.5, digest.estimateQuantile(0.99));
213  EXPECT_EQ(100, digest.estimateQuantile(0.999));
214  EXPECT_EQ(100, digest.estimateQuantile(1.0));
215 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
char a
void merge(unsigned int iters, size_t maxSize, size_t bufSize)
std::vector< int > values(1'000)
TEST ( TDigest  ,
ConstructFromCentroids   
)

Definition at line 217 of file TDigestTest.cpp.

References EXPECT_EQ, EXPECT_NE, folly::TDigest::getCentroids(), i, folly::TDigest::merge(), and values().

217  {
218  std::vector<TDigest::Centroid> centroids{};
219 
220  TDigest digest(100);
221  std::vector<double> values;
222  for (int i = 1; i <= 100; ++i) {
223  values.push_back(i);
224  }
225  auto digest1 = digest.merge(values);
226 
227  size_t centroid_count = digest1.getCentroids().size();
228 
229  TDigest digest2(
230  digest1.getCentroids(),
231  digest1.sum(),
232  digest1.count(),
233  digest1.max(),
234  digest1.min(),
235  100);
236 
237  EXPECT_EQ(digest1.sum(), digest2.sum());
238  EXPECT_EQ(digest1.count(), digest2.count());
239  EXPECT_EQ(digest1.min(), digest2.min());
240  EXPECT_EQ(digest1.max(), digest2.max());
241  EXPECT_EQ(digest1.getCentroids().size(), digest2.getCentroids().size());
242 
243  TDigest digest3(
244  digest1.getCentroids(),
245  digest1.sum(),
246  digest1.count(),
247  digest1.max(),
248  digest1.min(),
249  centroid_count - 1);
250  EXPECT_EQ(digest1.sum(), digest3.sum());
251  EXPECT_EQ(digest1.count(), digest3.count());
252  EXPECT_EQ(digest1.min(), digest3.min());
253  EXPECT_EQ(digest1.max(), digest3.max());
254  EXPECT_NE(digest1.getCentroids().size(), digest3.getCentroids().size());
255 }
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
#define EXPECT_NE(val1, val2)
Definition: gtest.h:1926
std::vector< int > values(1'000)
TEST ( TDigest  ,
LargeOutlierTest   
)

Definition at line 257 of file TDigestTest.cpp.

References EXPECT_LT, i, int64_t, folly::TDigest::merge(), and values().

257  {
258  folly::TDigest digest(100);
259 
260  std::vector<double> values;
261  for (double i = 0; i < 19; ++i) {
262  values.push_back(i);
263  }
264  values.push_back(1000000);
265 
266  std::sort(values.begin(), values.end());
267  digest = digest.merge(values);
268  EXPECT_LT(
269  (int64_t)digest.estimateQuantile(0.5),
270  (int64_t)digest.estimateQuantile(0.90));
271 }
#define EXPECT_LT(val1, val2)
Definition: gtest.h:1930
std::vector< int > values(1'000)
TEST ( TDigest  ,
FloatingPointSortedTest   
)

Definition at line 273 of file TDigestTest.cpp.

References a, b, EXPECT_EQ, i, folly::TDigest::merge(), and val.

273  {
274  // When combining centroids, floating point accuracy can lead to us building
275  // and unsorted digest if we are not careful. This tests that we are properly
276  // sorting the digest.
277  double val = 1.4;
278  TDigest digest1(100);
279  std::vector<double> values1;
280  for (int i = 1; i <= 100; ++i) {
281  values1.push_back(val);
282  }
283  digest1 = digest1.merge(values1);
284 
285  TDigest digest2(100);
286  std::vector<double> values2;
287  for (int i = 1; i <= 100; ++i) {
288  values2.push_back(val);
289  }
290  digest2 = digest2.merge(values2);
291 
292  std::array<TDigest, 2> a{{digest1, digest2}};
293  auto mergeDigest1 = TDigest::merge(a);
294 
295  TDigest digest3(100);
296  std::vector<double> values3;
297  for (int i = 1; i <= 100; ++i) {
298  values3.push_back(val);
299  }
300  digest3 = digest2.merge(values3);
301  std::array<TDigest, 2> b{{digest3, mergeDigest1}};
302  auto mergeDigest2 = TDigest::merge(b);
303 
304  auto centroids = mergeDigest2.getCentroids();
305  EXPECT_EQ(std::is_sorted(centroids.begin(), centroids.end()), true);
306 }
char b
#define EXPECT_EQ(val1, val2)
Definition: gtest.h:1922
double val
Definition: String.cpp:273
char a
void merge(unsigned int iters, size_t maxSize, size_t bufSize)
TEST_P ( DistributionTest  ,
ReasonableError   
)

Definition at line 312 of file TDigestTest.cpp.

References folly::pushmi::operators::error(), folly::TDigest::estimateQuantile(), EXPECT_GE, generator, i, INSTANTIATE_TEST_CASE_P(), int32_t, kNumRandomRuns, kNumSamples, kSeed, folly::TDigest::merge(), mode, sum(), and values().

312  {
313  std::pair<bool, size_t> underlyingDistribution;
314  bool logarithmic;
315  size_t modes;
316  double quantile;
317  double reasonableError = 0;
318  bool digestMerge;
319 
320  std::tie(underlyingDistribution, quantile, digestMerge) = GetParam();
321 
322  std::tie(logarithmic, modes) = underlyingDistribution;
323  if (quantile == 0.001 || quantile == 0.999) {
324  reasonableError = 0.001;
325  } else if (quantile == 0.01 || quantile == 0.99) {
326  reasonableError = 0.01;
327  } else if (quantile == 0.25 || quantile == 0.5 || quantile == 0.75) {
328  reasonableError = 0.04;
329  }
330 
331  std::vector<double> errors;
332 
333  std::default_random_engine generator;
334  generator.seed(kSeed);
335  for (size_t iter = 0; iter < kNumRandomRuns; ++iter) {
336  TDigest digest(100);
337 
338  std::vector<double> values;
339 
340  if (logarithmic) {
341  std::lognormal_distribution<double> distribution(0.0, 1.0);
342 
343  for (size_t i = 0; i < kNumSamples; ++i) {
344  auto mode = (int)distribution(generator) % modes;
345  values.push_back(distribution(generator) + 100.0 * mode);
346  }
347  } else {
348  std::uniform_int_distribution<int> distributionPicker(0, modes - 1);
349 
350  std::vector<std::normal_distribution<double>> distributions;
351  for (size_t i = 0; i < modes; ++i) {
352  distributions.emplace_back(100.0 * (i + 1), 25);
353  }
354 
355  for (size_t i = 0; i < kNumSamples; ++i) {
356  auto distributionIdx = distributionPicker(generator);
357  values.push_back(distributions[distributionIdx](generator));
358  }
359  }
360 
361  std::vector<TDigest> digests;
362  for (size_t i = 0; i < kNumSamples / 1000; ++i) {
363  folly::Range<const double*> r(values, i * 1000, 1000);
364  if (digestMerge) {
365  digests.push_back(digest.merge(r));
366  } else {
367  digest = digest.merge(r);
368  }
369  }
370 
371  std::sort(values.begin(), values.end());
372 
373  if (digestMerge) {
374  digest = TDigest::merge(digests);
375  }
376 
377  double est = digest.estimateQuantile(quantile);
378  auto it = std::lower_bound(values.begin(), values.end(), est);
379  int32_t actualRank = std::distance(values.begin(), it);
380  double actualQuantile = ((double)actualRank) / kNumSamples;
381  errors.push_back(actualQuantile - quantile);
382  }
383 
384  double sum = 0.0;
385 
386  for (auto error : errors) {
387  sum += error;
388  }
389 
390  double mean = sum / kNumRandomRuns;
391 
392  double numerator = 0.0;
393  for (auto error : errors) {
394  numerator += pow(error - mean, 2);
395  }
396 
397  double stddev = std::sqrt(numerator / (kNumRandomRuns - 1));
398 
399  EXPECT_GE(reasonableError, stddev);
400 }
std::atomic< int64_t > sum(0)
const int32_t kNumRandomRuns
Definition: TDigestTest.cpp:31
std::default_random_engine generator
const int32_t kSeed
Definition: TDigestTest.cpp:32
const int32_t kNumSamples
Definition: TDigestTest.cpp:30
#define EXPECT_GE(val1, val2)
Definition: gtest.h:1932
requires And< SemiMovable< VN >... > &&SemiMovable< E > auto error(E e)
Definition: error.h:48
folly::Optional< PskKeyExchangeMode > mode
void merge(unsigned int iters, size_t maxSize, size_t bufSize)
std::vector< int > values(1'000)

Variable Documentation

const int32_t kNumRandomRuns = 10

Definition at line 31 of file TDigestTest.cpp.

Referenced by TEST_P().

const int32_t kNumSamples = 3000

Definition at line 30 of file TDigestTest.cpp.

Referenced by TEST_P().

const int32_t kSeed = 0

Definition at line 32 of file TDigestTest.cpp.

Referenced by operator<<(), and TEST_P().