proxygen
Histogram-defs.h
Go to the documentation of this file.
1 /*
2  * Copyright 2013-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <folly/Conv.h>
20 #include <folly/stats/Histogram.h>
21 
22 #include <glog/logging.h>
23 
24 namespace folly {
25 
26 namespace detail {
27 
28 template <typename T, typename BucketT>
30  ValueType bucketSize,
31  ValueType min,
32  ValueType max,
33  const BucketType& defaultBucket)
34  : bucketSize_(bucketSize), min_(min), max_(max) {
35  CHECK_GT(bucketSize_, ValueType(0));
36  CHECK_LT(min_, max_);
37 
38  // Deliberately make this a signed type, because we're about
39  // to compare it against max-min, which is nominally signed, too.
40  int64_t numBuckets = int64_t((max - min) / bucketSize);
41  // Round up if the bucket size does not fit evenly
42  if (numBuckets * bucketSize < max - min) {
43  ++numBuckets;
44  }
45  // Add 2 for the extra 'below min' and 'above max' buckets
46  numBuckets += 2;
47  buckets_.assign(size_t(numBuckets), defaultBucket);
48 }
49 
50 template <typename T, typename BucketType>
52  if (value < min_) {
53  return 0;
54  } else if (value >= max_) {
55  return buckets_.size() - 1;
56  } else {
57  // the 1 is the below_min bucket
58  return size_t(((value - min_) / bucketSize_) + 1);
59  }
60 }
61 
62 template <typename T, typename BucketType>
63 template <typename CountFn>
65  CountFn countFromBucket) const {
66  uint64_t count = 0;
67  for (size_t n = 0; n < buckets_.size(); ++n) {
68  count += countFromBucket(const_cast<const BucketType&>(buckets_[n]));
69  }
70  return count;
71 }
72 
73 template <typename T, typename BucketType>
74 template <typename CountFn>
76  double pct,
77  CountFn countFromBucket,
78  double* lowPct,
79  double* highPct) const {
80  CHECK_GE(pct, 0.0);
81  CHECK_LE(pct, 1.0);
82 
83  auto numBuckets = buckets_.size();
84 
85  // Compute the counts in each bucket
86  std::vector<uint64_t> counts(numBuckets);
87  uint64_t totalCount = 0;
88  for (size_t n = 0; n < numBuckets; ++n) {
89  uint64_t bucketCount =
90  countFromBucket(const_cast<const BucketType&>(buckets_[n]));
91  counts[n] = bucketCount;
92  totalCount += bucketCount;
93  }
94 
95  // If there are no elements, just return the lowest bucket.
96  // Note that we return bucket 1, which is the first bucket in the
97  // histogram range; bucket 0 is for all values below min_.
98  if (totalCount == 0) {
99  // Set lowPct and highPct both to 0.
100  // getPercentileEstimate() will recognize this to mean that the histogram
101  // is empty.
102  if (lowPct) {
103  *lowPct = 0.0;
104  }
105  if (highPct) {
106  *highPct = 0.0;
107  }
108  return 1;
109  }
110 
111  // Loop through all the buckets, keeping track of each bucket's
112  // percentile range: [0,10], [10,17], [17,45], etc. When we find a range
113  // that includes our desired percentile, we return that bucket index.
114  double prevPct = 0.0;
115  double curPct = 0.0;
116  uint64_t curCount = 0;
117  size_t idx;
118  for (idx = 0; idx < numBuckets; ++idx) {
119  if (counts[idx] == 0) {
120  // skip empty buckets
121  continue;
122  }
123 
124  prevPct = curPct;
125  curCount += counts[idx];
126  curPct = static_cast<double>(curCount) / totalCount;
127  if (pct <= curPct) {
128  // This is the desired bucket
129  break;
130  }
131  }
132 
133  if (lowPct) {
134  *lowPct = prevPct;
135  }
136  if (highPct) {
137  *highPct = curPct;
138  }
139  return idx;
140 }
141 
142 template <typename T, typename BucketType>
143 template <typename CountFn, typename AvgFn>
145  double pct,
146  CountFn countFromBucket,
147  AvgFn avgFromBucket) const {
148  // Find the bucket where this percentile falls
149  double lowPct;
150  double highPct;
151  size_t bucketIdx =
152  getPercentileBucketIdx(pct, countFromBucket, &lowPct, &highPct);
153  if (lowPct == 0.0 && highPct == 0.0) {
154  // Invalid range -- the buckets must all be empty
155  // Return the default value for ValueType.
156  return ValueType();
157  }
158  if (lowPct == highPct) {
159  // Unlikely to have exact equality,
160  // but just return the bucket average in this case.
161  // We handle this here to avoid division by 0 below.
162  return avgFromBucket(buckets_[bucketIdx]);
163  }
164 
165  CHECK_GE(pct, lowPct);
166  CHECK_LE(pct, highPct);
167  CHECK_LT(lowPct, highPct);
168 
169  // Compute information about this bucket
170  ValueType avg = avgFromBucket(buckets_[bucketIdx]);
171  ValueType low;
172  ValueType high;
173  if (bucketIdx == 0) {
174  if (avg > min_) {
175  // This normally shouldn't happen. This bucket is only supposed to track
176  // values less than min_. Most likely this means that integer overflow
177  // occurred, and the code in avgFromBucket() returned a huge value
178  // instead of a small one. Just return the minimum possible value for
179  // now.
180  //
181  // (Note that if the counter keeps being decremented, eventually it will
182  // wrap and become small enough that we won't detect this any more, and
183  // we will return bogus information.)
184  LOG(ERROR) << "invalid average value in histogram minimum bucket: " << avg
185  << " > " << min_ << ": possible integer overflow?";
186  return getBucketMin(bucketIdx);
187  }
188  // For the below-min bucket, just assume the lowest value ever seen is
189  // twice as far away from min_ as avg.
190  high = min_;
191  low = high - (2 * (high - avg));
192  // Adjust low in case it wrapped
193  if (low > avg) {
195  }
196  } else if (bucketIdx == buckets_.size() - 1) {
197  if (avg < max_) {
198  // Most likely this means integer overflow occurred. See the comments
199  // above in the minimum case.
200  LOG(ERROR) << "invalid average value in histogram maximum bucket: " << avg
201  << " < " << max_ << ": possible integer overflow?";
202  return getBucketMax(bucketIdx);
203  }
204  // Similarly for the above-max bucket, assume the highest value ever seen
205  // is twice as far away from max_ as avg.
206  low = max_;
207  high = low + (2 * (avg - low));
208  // Adjust high in case it wrapped
209  if (high < avg) {
211  }
212  } else {
213  low = getBucketMin(bucketIdx);
214  high = getBucketMax(bucketIdx);
215  if (avg < low || avg > high) {
216  // Most likely this means an integer overflow occurred.
217  // See the comments above. Return the midpoint between low and high
218  // as a best guess, since avg is meaningless.
219  LOG(ERROR) << "invalid average value in histogram bucket: " << avg
220  << " not in range [" << low << ", " << high
221  << "]: possible integer overflow?";
222  return (low + high) / 2;
223  }
224  }
225 
226  // Since we know the average value in this bucket, we can do slightly better
227  // than just assuming the data points in this bucket are uniformly
228  // distributed between low and high.
229  //
230  // Assume that the median value in this bucket is the same as the average
231  // value.
232  double medianPct = (lowPct + highPct) / 2.0;
233  if (pct < medianPct) {
234  // Assume that the data points lower than the median of this bucket
235  // are uniformly distributed between low and avg
236  double pctThroughSection = (pct - lowPct) / (medianPct - lowPct);
237  return T(low + ((avg - low) * pctThroughSection));
238  } else {
239  // Assume that the data points greater than the median of this bucket
240  // are uniformly distributed between avg and high
241  double pctThroughSection = (pct - medianPct) / (highPct - medianPct);
242  return T(avg + ((high - avg) * pctThroughSection));
243  }
244 }
245 
246 } // namespace detail
247 
248 template <typename T>
250  std::string ret = folly::to<std::string>(
251  "num buckets: ",
252  buckets_.getNumBuckets(),
253  ", bucketSize: ",
254  buckets_.getBucketSize(),
255  ", min: ",
256  buckets_.getMin(),
257  ", max: ",
258  buckets_.getMax(),
259  "\n");
260 
261  for (size_t i = 0; i < buckets_.getNumBuckets(); ++i) {
263  " ",
264  buckets_.getBucketMin(i),
265  ": ",
266  buckets_.getByIndex(i).count,
267  "\n",
268  &ret);
269  }
270 
271  return ret;
272 }
273 
274 template <typename T>
275 void Histogram<T>::toTSV(std::ostream& out, bool skipEmptyBuckets) const {
276  for (size_t i = 0; i < buckets_.getNumBuckets(); ++i) {
277  // Do not output empty buckets in order to reduce data file size.
278  if (skipEmptyBuckets && getBucketByIndex(i).count == 0) {
279  continue;
280  }
281  const auto& bucket = getBucketByIndex(i);
282  out << getBucketMin(i) << '\t' << getBucketMax(i) << '\t' << bucket.count
283  << '\t' << bucket.sum << '\n';
284  }
285 }
286 
287 } // namespace folly
ValueType getPercentileEstimate(double pct, CountFn countFromBucket, AvgFn avgFromBucket) const
ValueType getBucketMax(size_t idx) const
Definition: Histogram.h:142
LogLevel max
Definition: LogLevel.cpp:31
void toTSV(std::ostream &out, bool skipEmptyBuckets=true) const
size_t getBucketIdx(ValueType value) const
folly::std T
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
uint64_t computeTotalCount(CountFn countFromBucket) const
const int min_
const int max_
LogLevel min
Definition: LogLevel.cpp:30
ValueType getBucketMin(size_t idx) const
Definition: Histogram.h:124
std::string debugString() const
std::vector< BucketType > buckets_
Definition: Histogram.h:227
void toAppend(char value, Tgt *result)
Definition: Conv.h:406
size_t getPercentileBucketIdx(double pct, CountFn countFromBucket, double *lowPct=nullptr, double *highPct=nullptr) const
int * count
const char * string
Definition: Conv.cpp:212
uint64_t value(const typename LockFreeRingBuffer< T, Atom >::Cursor &rbcursor)
HistogramBuckets(ValueType bucketSize, ValueType min, ValueType max, const BucketType &defaultBucket)