proxygen
Histogram.h
Go to the documentation of this file.
1 /*
2  * Copyright 2013-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cstddef>
20 #include <cstdint>
21 #include <limits>
22 #include <ostream>
23 #include <stdexcept>
24 #include <string>
25 #include <vector>
26 
27 #include <folly/CPortability.h>
28 #include <folly/Traits.h>
30 
31 namespace folly {
32 
33 namespace detail {
34 
35 /*
36  * A helper class to manage a set of histogram buckets.
37  */
38 template <typename T, typename BucketT>
40  public:
41  typedef T ValueType;
42  typedef BucketT BucketType;
43 
44  /*
45  * Create a set of histogram buckets.
46  *
47  * One bucket will be created for each bucketSize interval of values within
48  * the specified range. Additionally, one bucket will be created to track
49  * all values that fall below the specified minimum, and one bucket will be
50  * created for all values above the specified maximum.
51  *
52  * If (max - min) is not a multiple of bucketSize, the last bucket will cover
53  * a smaller range of values than the other buckets.
54  *
55  * (max - min) must be larger than or equal to bucketSize.
56  */
58  ValueType bucketSize,
59  ValueType min,
60  ValueType max,
61  const BucketType& defaultBucket);
62 
63  /* Returns the bucket size of each bucket in the histogram. */
64  ValueType getBucketSize() const {
65  return bucketSize_;
66  }
67 
68  /* Returns the min value at which bucketing begins. */
69  ValueType getMin() const {
70  return min_;
71  }
72 
73  /* Returns the max value at which bucketing ends. */
74  ValueType getMax() const {
75  return max_;
76  }
77 
78  /*
79  * Returns the number of buckets.
80  *
81  * This includes the total number of buckets for the [min, max) range,
82  * plus 2 extra buckets, one for handling values less than min, and one for
83  * values greater than max.
84  */
85  size_t getNumBuckets() const {
86  return buckets_.size();
87  }
88 
89  /* Returns the bucket index into which the given value would fall. */
90  size_t getBucketIdx(ValueType value) const;
91 
92  /* Returns the bucket for the specified value */
93  BucketType& getByValue(ValueType value) {
94  return buckets_[getBucketIdx(value)];
95  }
96 
97  /* Returns the bucket for the specified value */
98  const BucketType& getByValue(ValueType value) const {
99  return buckets_[getBucketIdx(value)];
100  }
101 
102  /*
103  * Returns the bucket at the specified index.
104  *
105  * Note that index 0 is the bucket for all values less than the specified
106  * minimum. Index 1 is the first bucket in the specified bucket range.
107  */
108  BucketType& getByIndex(size_t idx) {
109  return buckets_[idx];
110  }
111 
112  /* Returns the bucket at the specified index. */
113  const BucketType& getByIndex(size_t idx) const {
114  return buckets_[idx];
115  }
116 
117  /*
118  * Returns the minimum threshold for the bucket at the given index.
119  *
120  * The bucket at the specified index will store values in the range
121  * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
122  * max is smaller than bucketMin + bucketSize.
123  */
124  ValueType getBucketMin(size_t idx) const {
125  if (idx == 0) {
127  }
128  if (idx == buckets_.size() - 1) {
129  return max_;
130  }
131 
132  return ValueType(min_ + ((idx - 1) * bucketSize_));
133  }
134 
135  /*
136  * Returns the maximum threshold for the bucket at the given index.
137  *
138  * The bucket at the specified index will store values in the range
139  * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
140  * max is smaller than bucketMin + bucketSize.
141  */
142  ValueType getBucketMax(size_t idx) const {
143  if (idx == buckets_.size() - 1) {
145  }
146 
147  return ValueType(min_ + (idx * bucketSize_));
148  }
149 
159  template <typename CountFn>
160  uint64_t computeTotalCount(CountFn countFromBucket) const;
161 
178  template <typename CountFn>
179  size_t getPercentileBucketIdx(
180  double pct,
181  CountFn countFromBucket,
182  double* lowPct = nullptr,
183  double* highPct = nullptr) const;
184 
197  template <typename CountFn, typename AvgFn>
198  ValueType getPercentileEstimate(
199  double pct,
200  CountFn countFromBucket,
201  AvgFn avgFromBucket) const;
202 
203  /*
204  * Iterator access to the buckets.
205  *
206  * Note that the first bucket is for all values less than min, and the last
207  * bucket is for all values greater than max. The buckets tracking values in
208  * the [min, max) actually start at the second bucket.
209  */
210  typename std::vector<BucketType>::const_iterator begin() const {
211  return buckets_.begin();
212  }
213  typename std::vector<BucketType>::iterator begin() {
214  return buckets_.begin();
215  }
216  typename std::vector<BucketType>::const_iterator end() const {
217  return buckets_.end();
218  }
219  typename std::vector<BucketType>::iterator end() {
220  return buckets_.end();
221  }
222 
223  private:
224  ValueType bucketSize_;
225  ValueType min_;
226  ValueType max_;
227  std::vector<BucketType> buckets_;
228 };
229 
230 } // namespace detail
231 
232 /*
233  * A basic histogram class.
234  *
235  * Groups data points into equally-sized buckets, and stores the overall sum of
236  * the data points in each bucket, as well as the number of data points in the
237  * bucket.
238  *
239  * The caller must specify the minimum and maximum data points to expect ahead
240  * of time, as well as the bucket width.
241  */
242 template <typename T>
243 class Histogram {
244  public:
245  typedef T ValueType;
247 
248  Histogram(ValueType bucketSize, ValueType min, ValueType max)
249  : buckets_(bucketSize, min, max, Bucket()) {}
250 
251  /* Add a data point to the histogram */
252  void addValue(ValueType value) {
253  Bucket& bucket = buckets_.getByValue(value);
254  // NOTE: Overflow is handled elsewhere and tests check this
255  // behavior (see HistogramTest.cpp TestOverflow* tests).
256  // TODO: It would be nice to handle overflow here and redesign this class.
257  auto const addend = to_unsigned(value);
258  bucket.sum = static_cast<ValueType>(to_unsigned(bucket.sum) + addend);
259  bucket.count += 1;
260  }
261 
262  /* Add multiple same data points to the histogram */
263  void addRepeatedValue(ValueType value, uint64_t nSamples) {
264  Bucket& bucket = buckets_.getByValue(value);
265  // NOTE: Overflow is handled elsewhere and tests check this
266  // behavior (see HistogramTest.cpp TestOverflow* tests).
267  // TODO: It would be nice to handle overflow here and redesign this class.
268  auto const addend = to_unsigned(value) * nSamples;
269  bucket.sum = static_cast<ValueType>(to_unsigned(bucket.sum) + addend);
270  bucket.count += nSamples;
271  }
272 
273  /*
274  * Remove a data point to the histogram
275  *
276  * Note that this method does not actually verify that this exact data point
277  * had previously been added to the histogram; it merely subtracts the
278  * requested value from the appropriate bucket's sum.
279  */
280  void removeValue(ValueType value) {
281  Bucket& bucket = buckets_.getByValue(value);
282  // NOTE: Overflow is handled elsewhere and tests check this
283  // behavior (see HistogramTest.cpp TestOverflow* tests).
284  // TODO: It would be nice to handle overflow here and redesign this class.
285  if (bucket.count > 0) {
286  auto const subtrahend = to_unsigned(value);
287  bucket.sum = static_cast<ValueType>(to_unsigned(bucket.sum) - subtrahend);
288  bucket.count -= 1;
289  } else {
290  bucket.sum = ValueType();
291  bucket.count = 0;
292  }
293  }
294 
295  /* Remove multiple same data points from the histogram */
296  void removeRepeatedValue(ValueType value, uint64_t nSamples) {
297  Bucket& bucket = buckets_.getByValue(value);
298  // TODO: It would be nice to handle overflow here.
299  if (bucket.count >= nSamples) {
300  bucket.sum -= value * nSamples;
301  bucket.count -= nSamples;
302  } else {
303  bucket.sum = ValueType();
304  bucket.count = 0;
305  }
306  }
307 
308  /* Remove all data points from the histogram */
309  void clear() {
310  for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
311  buckets_.getByIndex(i).clear();
312  }
313  }
314 
315  /* Subtract another histogram data from the histogram */
316  void subtract(const Histogram& hist) {
317  // the two histogram bucket definitions must match to support
318  // subtract.
319  if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
320  getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
321  throw std::invalid_argument("Cannot subtract input histogram.");
322  }
323 
324  for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
325  buckets_.getByIndex(i) -= hist.buckets_.getByIndex(i);
326  }
327  }
328 
329  /* Merge two histogram data together */
330  void merge(const Histogram& hist) {
331  // the two histogram bucket definitions must match to support
332  // a merge.
333  if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
334  getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
335  throw std::invalid_argument("Cannot merge from input histogram.");
336  }
337 
338  for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
339  buckets_.getByIndex(i) += hist.buckets_.getByIndex(i);
340  }
341  }
342 
343  /* Copy bucket values from another histogram */
344  void copy(const Histogram& hist) {
345  // the two histogram bucket definition must match
346  if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
347  getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
348  throw std::invalid_argument("Cannot copy from input histogram.");
349  }
350 
351  for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
352  buckets_.getByIndex(i) = hist.buckets_.getByIndex(i);
353  }
354  }
355 
356  /* Returns the bucket size of each bucket in the histogram. */
357  ValueType getBucketSize() const {
358  return buckets_.getBucketSize();
359  }
360  /* Returns the min value at which bucketing begins. */
361  ValueType getMin() const {
362  return buckets_.getMin();
363  }
364  /* Returns the max value at which bucketing ends. */
365  ValueType getMax() const {
366  return buckets_.getMax();
367  }
368  /* Returns the number of buckets */
369  size_t getNumBuckets() const {
370  return buckets_.getNumBuckets();
371  }
372 
373  /* Returns the specified bucket (for reading only!) */
374  const Bucket& getBucketByIndex(size_t idx) const {
375  return buckets_.getByIndex(idx);
376  }
377 
378  /*
379  * Returns the minimum threshold for the bucket at the given index.
380  *
381  * The bucket at the specified index will store values in the range
382  * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
383  * max is smaller than bucketMin + bucketSize.
384  */
385  ValueType getBucketMin(size_t idx) const {
386  return buckets_.getBucketMin(idx);
387  }
388 
389  /*
390  * Returns the maximum threshold for the bucket at the given index.
391  *
392  * The bucket at the specified index will store values in the range
393  * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
394  * max is smaller than bucketMin + bucketSize.
395  */
396  ValueType getBucketMax(size_t idx) const {
397  return buckets_.getBucketMax(idx);
398  }
399 
406  CountFromBucket countFn;
407  return buckets_.computeTotalCount(countFn);
408  }
409 
410  /*
411  * Get the bucket that the specified percentile falls into
412  *
413  * The lowest and highest percentile data points in returned bucket will be
414  * returned in the lowPct and highPct arguments, if they are not nullptr.
415  */
417  double pct,
418  double* lowPct = nullptr,
419  double* highPct = nullptr) const {
420  // We unfortunately can't use lambdas here yet;
421  // Some users of this code are still built with gcc-4.4.
422  CountFromBucket countFn;
423  return buckets_.getPercentileBucketIdx(pct, countFn, lowPct, highPct);
424  }
425 
434  ValueType getPercentileEstimate(double pct) const {
435  CountFromBucket countFn;
436  AvgFromBucket avgFn;
437  return buckets_.getPercentileEstimate(pct, countFn, avgFn);
438  }
439 
440  /*
441  * Get a human-readable string describing the histogram contents
442  */
443  std::string debugString() const;
444 
445  /*
446  * Write the histogram contents in tab-separated values (TSV) format.
447  * Format is "min max count sum".
448  */
449  void toTSV(std::ostream& out, bool skipEmptyBuckets = true) const;
450 
452  uint64_t operator()(const Bucket& bucket) const {
453  return bucket.count;
454  }
455  };
456  struct AvgFromBucket {
457  ValueType operator()(const Bucket& bucket) const {
458  if (bucket.count == 0) {
459  return ValueType(0);
460  }
461  // Cast bucket.count to a signed integer type. This ensures that we
462  // perform division properly here: If bucket.sum is a signed integer
463  // type but we divide by an unsigned number, unsigned division will be
464  // performed and bucket.sum will be converted to unsigned first.
465  // If bucket.sum is unsigned, the code will still do unsigned division
466  // correctly.
467  //
468  // The only downside is if bucket.count is large enough to be negative
469  // when treated as signed. That should be extremely unlikely, though.
470  return bucket.sum / static_cast<int64_t>(bucket.count);
471  }
472  };
473 
474  private:
475  template <
476  typename S,
478  static constexpr _t<std::make_unsigned<S>> to_unsigned(S s) {
479  return static_cast<_t<std::make_unsigned<S>>>(s);
480  }
481  template <
482  typename S,
484  static constexpr S to_unsigned(S s) {
485  return s;
486  }
487 
489 };
490 
491 } // namespace folly
492 
493 // MSVC 2017 Update 3/4 has an issue with explicitly instantiating templated
494 // functions with default arguments inside templated classes when compiled
495 // with /permissive- (the default for the CMake build), so we directly include
496 // the -defs as if it were -inl, and don't provide the explicit instantiations.
497 // https://developercommunity.visualstudio.com/content/problem/81223/incorrect-error-c5037-with-permissive.html
498 #if defined(_MSC_VER) && _MSC_FULL_VER >= 191125506 && \
499  _MSC_FULL_VER <= 191125547
500 #define FOLLY_MSVC_USE_WORKAROUND_FOR_C5037 1
501 #else
502 #define FOLLY_MSVC_USE_WORKAROUND_FOR_C5037 0
503 #endif
504 
505 #if FOLLY_MSVC_USE_WORKAROUND_FOR_C5037
507 #endif
ValueType getPercentileEstimate(double pct) const
Definition: Histogram.h:434
ValueType getBucketSize() const
Definition: Histogram.h:357
const BucketType & getByValue(ValueType value) const
Definition: Histogram.h:98
void removeRepeatedValue(ValueType value, uint64_t nSamples)
Definition: Histogram.h:296
ValueType getPercentileEstimate(double pct, CountFn countFromBucket, AvgFn avgFromBucket) const
ValueType getBucketMax(size_t idx) const
Definition: Histogram.h:142
void addValue(ValueType value)
Definition: Histogram.h:252
size_t getPercentileBucketIdx(double pct, double *lowPct=nullptr, double *highPct=nullptr) const
Definition: Histogram.h:416
LogLevel max
Definition: LogLevel.cpp:31
void removeValue(ValueType value)
Definition: Histogram.h:280
ValueType getBucketMax(size_t idx) const
Definition: Histogram.h:396
const Bucket & getBucketByIndex(size_t idx) const
Definition: Histogram.h:374
static constexpr _t< std::make_unsigned< S > > to_unsigned(S s)
Definition: Histogram.h:478
ValueType getBucketSize() const
Definition: Histogram.h:64
size_t getBucketIdx(ValueType value) const
Histogram(ValueType bucketSize, ValueType min, ValueType max)
Definition: Histogram.h:248
ValueType getMin() const
Definition: Histogram.h:361
folly::std T
std::vector< BucketType >::iterator end()
Definition: Histogram.h:219
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
uint64_t computeTotalCount(CountFn countFromBucket) const
uint64_t computeTotalCount() const
Definition: Histogram.h:405
ValueType operator()(const Bucket &bucket) const
Definition: Histogram.h:457
void addRepeatedValue(ValueType value, uint64_t nSamples)
Definition: Histogram.h:263
std::vector< BucketType >::const_iterator end() const
Definition: Histogram.h:216
uint64_t operator()(const Bucket &bucket) const
Definition: Histogram.h:452
constexpr auto to_unsigned(T const &t) -> typename std::make_unsigned< T >::type
Definition: Utility.h:399
BucketType & getByValue(ValueType value)
Definition: Histogram.h:93
LogLevel min
Definition: LogLevel.cpp:30
size_t getNumBuckets() const
Definition: Histogram.h:85
ValueType getBucketMin(size_t idx) const
Definition: Histogram.h:124
ValueType getMax() const
Definition: Histogram.h:74
typename T::type _t
Definition: Traits.h:171
const BucketType & getByIndex(size_t idx) const
Definition: Histogram.h:113
size_t getNumBuckets() const
Definition: Histogram.h:369
std::vector< BucketType >::const_iterator begin() const
Definition: Histogram.h:210
std::vector< BucketType > buckets_
Definition: Histogram.h:227
BucketType & getByIndex(size_t idx)
Definition: Histogram.h:108
ValueType getBucketMin(size_t idx) const
Definition: Histogram.h:385
static constexpr S to_unsigned(S s)
Definition: Histogram.h:484
void merge(const Histogram &hist)
Definition: Histogram.h:330
void subtract(const Histogram &hist)
Definition: Histogram.h:316
size_t getPercentileBucketIdx(double pct, CountFn countFromBucket, double *lowPct=nullptr, double *highPct=nullptr) const
ValueType getMin() const
Definition: Histogram.h:69
ValueType getMax() const
Definition: Histogram.h:365
const char * string
Definition: Conv.cpp:212
static set< string > s
uint64_t value(const typename LockFreeRingBuffer< T, Atom >::Cursor &rbcursor)
detail::Bucket< T > Bucket
Definition: Histogram.h:246
void copy(const Histogram &hist)
Definition: Histogram.h:344
std::vector< BucketType >::iterator begin()
Definition: Histogram.h:213
detail::HistogramBuckets< ValueType, Bucket > buckets_
Definition: Histogram.h:488
HistogramBuckets(ValueType bucketSize, ValueType min, ValueType max, const BucketType &defaultBucket)