proxygen
folly::TDigest Class Reference

#include <TDigest.h>

Classes

class  Centroid
 

Public Member Functions

 TDigest (size_t maxSize=100)
 
 TDigest (std::vector< Centroid > centroids, double sum, double count, double max_val, double min_val, size_t maxSize=100)
 
TDigest merge (presorted_t, Range< const double * > sortedValues) const
 
TDigest merge (Range< const double * > unsortedValues) const
 
double estimateQuantile (double q) const
 
double mean () const
 
double sum () const
 
double count () const
 
double min () const
 
double max () const
 
bool empty () const
 
const std::vector< Centroid > & getCentroids () const
 
size_t maxSize () const
 

Static Public Member Functions

static TDigest merge (Range< const TDigest * > digests)
 

Private Attributes

std::vector< Centroidcentroids_
 
size_t maxSize_
 
double sum_
 
double count_
 
double max_
 
double min_
 

Detailed Description

Definition at line 50 of file TDigest.h.

Constructor & Destructor Documentation

folly::TDigest::TDigest ( size_t  maxSize = 100)
inlineexplicit

Definition at line 81 of file TDigest.h.

References count(), estimateQuantile(), maxSize(), merge(), and sum().

Referenced by merge(), and TDigest().

82  : maxSize_(maxSize), sum_(0.0), count_(0.0), max_(NAN), min_(NAN) {}
double sum_
Definition: TDigest.h:145
size_t maxSize() const
Definition: TDigest.h:138
size_t maxSize_
Definition: TDigest.h:144
double min_
Definition: TDigest.h:148
double count_
Definition: TDigest.h:146
double max_
Definition: TDigest.h:147
folly::TDigest::TDigest ( std::vector< Centroid centroids,
double  sum,
double  count,
double  max_val,
double  min_val,
size_t  maxSize = 100 
)
explicit

Definition at line 79 of file TDigest.cpp.

References centroids_, count_, max_, maxSize_, merge(), min_, folly::gen::move, sum_, and TDigest().

86  : maxSize_(maxSize),
87  sum_(sum),
88  count_(count),
89  max_(max_val),
90  min_(min_val) {
91  if (centroids.size() <= maxSize_) {
92  centroids_ = std::move(centroids);
93  } else {
94  // Number of centroids is greater than maxSize, we need to compress them
95  // When merging, resulting digest takes the maxSize of the first digest
96  auto sz = centroids.size();
97  std::array<TDigest, 2> digests{{
99  TDigest(std::move(centroids), sum_, count_, max_, min_, sz),
100  }};
101  *this = this->merge(digests);
102  }
103 }
double sum_
Definition: TDigest.h:145
constexpr detail::Map< Move > move
Definition: Base-inl.h:2567
double sum() const
Definition: TDigest.h:114
double count() const
Definition: TDigest.h:118
size_t maxSize() const
Definition: TDigest.h:138
size_t maxSize_
Definition: TDigest.h:144
std::vector< Centroid > centroids_
Definition: TDigest.h:143
TDigest(size_t maxSize=100)
Definition: TDigest.h:81
TDigest merge(presorted_t, Range< const double * > sortedValues) const
Definition: TDigest.cpp:126
double min_
Definition: TDigest.h:148
double count_
Definition: TDigest.h:146
double max_
Definition: TDigest.h:147

Member Function Documentation

double folly::TDigest::count ( ) const
inline

Definition at line 118 of file TDigest.h.

References count_.

Referenced by folly::detail::estimatesFromDigest(), merge(), TDigest(), and TEST().

118  {
119  return count_;
120  }
double count_
Definition: TDigest.h:146
bool folly::TDigest::empty ( ) const
inline

Definition at line 130 of file TDigest.h.

References centroids_.

130  {
131  return centroids_.empty();
132  }
std::vector< Centroid > centroids_
Definition: TDigest.h:143
double folly::TDigest::estimateQuantile ( double  q) const

Definition at line 312 of file TDigest.cpp.

References centroids_, folly::clamp(), count_, max(), max_, min(), min_, folly::pushmi::detail::t, and folly::value().

Referenced by estimateQuantile(), folly::detail::estimatesFromDigest(), TDigest(), TEST(), and TEST_P().

312  {
313  if (centroids_.empty()) {
314  return 0.0;
315  }
316  double rank = q * count_;
317 
318  size_t pos;
319  double t;
320  if (q > 0.5) {
321  if (q >= 1.0) {
322  return max_;
323  }
324  pos = 0;
325  t = count_;
326  for (auto rit = centroids_.rbegin(); rit != centroids_.rend(); ++rit) {
327  t -= rit->weight();
328  if (rank >= t) {
329  pos = std::distance(rit, centroids_.rend()) - 1;
330  break;
331  }
332  }
333  } else {
334  if (q <= 0.0) {
335  return min_;
336  }
337  pos = centroids_.size() - 1;
338  t = 0;
339  for (auto it = centroids_.begin(); it != centroids_.end(); ++it) {
340  if (rank < t + it->weight()) {
341  pos = std::distance(centroids_.begin(), it);
342  break;
343  }
344  t += it->weight();
345  }
346  }
347 
348  double delta = 0;
349  double min = min_;
350  double max = max_;
351  if (centroids_.size() > 1) {
352  if (pos == 0) {
353  delta = centroids_[pos + 1].mean() - centroids_[pos].mean();
354  max = centroids_[pos + 1].mean();
355  } else if (pos == centroids_.size() - 1) {
356  delta = centroids_[pos].mean() - centroids_[pos - 1].mean();
357  min = centroids_[pos - 1].mean();
358  } else {
359  delta = (centroids_[pos + 1].mean() - centroids_[pos - 1].mean()) / 2;
360  min = centroids_[pos - 1].mean();
361  max = centroids_[pos + 1].mean();
362  }
363  }
364  auto value = centroids_[pos].mean() +
365  ((rank - t) / centroids_[pos].weight() - 0.5) * delta;
366  return clamp(value, min, max);
367 }
std::vector< Centroid > centroids_
Definition: TDigest.h:143
static double clamp(double v, double lo, double hi)
Definition: TDigest.cpp:70
double min_
Definition: TDigest.h:148
uint64_t value(const typename LockFreeRingBuffer< T, Atom >::Cursor &rbcursor)
double count_
Definition: TDigest.h:146
double max() const
Definition: TDigest.h:126
double min() const
Definition: TDigest.h:122
double max_
Definition: TDigest.h:147
const std::vector<Centroid>& folly::TDigest::getCentroids ( ) const
inline

Definition at line 134 of file TDigest.h.

References centroids_.

Referenced by TEST().

134  {
135  return centroids_;
136  }
std::vector< Centroid > centroids_
Definition: TDigest.h:143
double folly::TDigest::max ( ) const
inline

Definition at line 126 of file TDigest.h.

References max_.

Referenced by estimateQuantile(), merge(), and TEST().

126  {
127  return max_;
128  }
double max_
Definition: TDigest.h:147
size_t folly::TDigest::maxSize ( ) const
inline

Definition at line 138 of file TDigest.h.

References maxSize_.

Referenced by merge(), and TDigest().

138  {
139  return maxSize_;
140  }
size_t maxSize_
Definition: TDigest.h:144
double folly::TDigest::mean ( ) const
inline

Definition at line 110 of file TDigest.h.

References count_, and sum_.

Referenced by TEST().

110  {
111  return count_ ? sum_ / count_ : 0;
112  }
double sum_
Definition: TDigest.h:145
double count_
Definition: TDigest.h:146
TDigest folly::TDigest::merge ( presorted_t  ,
Range< const double * >  sortedValues 
) const

Definition at line 126 of file TDigest.cpp.

References folly::TDigest::Centroid::add(), folly::Range< Iter >::begin(), centroids_, count_, folly::Range< Iter >::empty(), folly::Range< Iter >::end(), folly::k_to_q(), max, max_, maxSize_, folly::TDigest::Centroid::mean(), min, min_, folly::gen::move, cpp.ast::next(), folly::Range< Iter >::size(), sum_, and folly::TDigest::Centroid::weight().

Referenced by estimateQuantile(), folly::SlidingWindowQuantileEstimator< ClockT >::estimateQuantiles(), merge(), merge(), mergeDigests(), TDigest(), TEST(), and TEST_P().

126  {
127  if (sortedValues.empty()) {
128  return *this;
129  }
130 
131  TDigest result(maxSize_);
132 
133  result.count_ = count_ + sortedValues.size();
134 
135  double maybeMin = *sortedValues.begin();
136  double maybeMax = *(sortedValues.end() - 1);
137  if (count_ > 0) {
138  // We know that min_ and max_ are numbers
139  result.min_ = std::min(min_, maybeMin);
140  result.max_ = std::max(max_, maybeMax);
141  } else {
142  // We know that min_ and max_ are NaN.
143  result.min_ = maybeMin;
144  result.max_ = maybeMax;
145  }
146 
147  std::vector<Centroid> compressed;
148  compressed.reserve(maxSize_);
149 
150  double k_limit = 1;
151  double q_limit_times_count = k_to_q(k_limit++, maxSize_) * result.count_;
152 
153  auto it_centroids = centroids_.begin();
154  auto it_sortedValues = sortedValues.begin();
155 
156  Centroid cur;
157  if (it_centroids != centroids_.end() &&
158  it_centroids->mean() < *it_sortedValues) {
159  cur = *it_centroids++;
160  } else {
161  cur = Centroid(*it_sortedValues++, 1.0);
162  }
163 
164  double weightSoFar = cur.weight();
165 
166  // Keep track of sums along the way to reduce expensive floating points
167  double sumsToMerge = 0;
168  double weightsToMerge = 0;
169 
170  while (it_centroids != centroids_.end() ||
171  it_sortedValues != sortedValues.end()) {
172  Centroid next;
173 
174  if (it_centroids != centroids_.end() &&
175  (it_sortedValues == sortedValues.end() ||
176  it_centroids->mean() < *it_sortedValues)) {
177  next = *it_centroids++;
178  } else {
179  next = Centroid(*it_sortedValues++, 1.0);
180  }
181 
182  double nextSum = next.mean() * next.weight();
183  weightSoFar += next.weight();
184 
185  if (weightSoFar <= q_limit_times_count) {
186  sumsToMerge += nextSum;
187  weightsToMerge += next.weight();
188  } else {
189  result.sum_ += cur.add(sumsToMerge, weightsToMerge);
190  sumsToMerge = 0;
191  weightsToMerge = 0;
192  compressed.push_back(cur);
193  q_limit_times_count = k_to_q(k_limit++, maxSize_) * result.count_;
194  cur = next;
195  }
196  }
197  result.sum_ += cur.add(sumsToMerge, weightsToMerge);
198  compressed.push_back(cur);
199  compressed.shrink_to_fit();
200 
201  // Deal with floating point precision
202  std::sort(compressed.begin(), compressed.end());
203 
204  result.centroids_ = std::move(compressed);
205  return result;
206 }
LogLevel max
Definition: LogLevel.cpp:31
static double k_to_q(double k, double d)
Definition: TDigest.cpp:60
constexpr detail::Map< Move > move
Definition: Base-inl.h:2567
LogLevel min
Definition: LogLevel.cpp:30
size_t maxSize_
Definition: TDigest.h:144
std::vector< Centroid > centroids_
Definition: TDigest.h:143
TDigest(size_t maxSize=100)
Definition: TDigest.h:81
double min_
Definition: TDigest.h:148
double count_
Definition: TDigest.h:146
double max_
Definition: TDigest.h:147
def next(obj)
Definition: ast.py:58
TDigest folly::TDigest::merge ( Range< const double * >  unsortedValues) const

Definition at line 109 of file TDigest.cpp.

References folly::Range< Iter >::begin(), folly::copy(), folly::detail::double_radix_sort(), folly::Range< Iter >::end(), merge(), folly::presorted, folly::Range< Iter >::size(), and uint64_t.

109  {
110  auto n = unsortedValues.size();
111 
112  // We require 256 buckets per byte level, plus one count array we can reuse.
113  std::unique_ptr<uint64_t[]> buckets{new uint64_t[256 * 9]};
114  // Allocate input and tmp array
115  std::unique_ptr<double[]> tmp{new double[n * 2]};
116  auto out = tmp.get() + n;
117  auto in = tmp.get();
118  std::copy(unsortedValues.begin(), unsortedValues.end(), in);
119 
120  detail::double_radix_sort(n, buckets.get(), in, out);
121  DCHECK(std::is_sorted(in, in + n));
122 
123  return merge(presorted, Range<const double*>(in, in + n));
124 }
void double_radix_sort(uint64_t n, uint64_t *buckets, double *in, double *tmp)
constexpr std::decay< T >::type copy(T &&value) noexcept(noexcept(typename std::decay< T >::type(std::forward< T >(value))))
Definition: Utility.h:72
TDigest merge(presorted_t, Range< const double * > sortedValues) const
Definition: TDigest.cpp:126
constexpr presorted_t presorted
Definition: Utility.h:311
TDigest folly::TDigest::merge ( Range< const TDigest * >  digests)
static

Definition at line 208 of file TDigest.cpp.

References folly::TDigest::Centroid::add(), folly::Range< Iter >::begin(), centroids_, count(), count_, folly::Range< Iter >::end(), folly::gen::first, i, folly::k_to_q(), max, max(), max_, maxSize(), min, min(), min_, folly::gen::move, folly::Range< Iter >::size(), sum_, TDigest(), and folly::TDigest::Centroid::weight().

208  {
209  size_t nCentroids = 0;
210  for (auto it = digests.begin(); it != digests.end(); it++) {
211  nCentroids += it->centroids_.size();
212  }
213 
214  if (nCentroids == 0) {
215  return TDigest();
216  }
217 
218  std::vector<Centroid> centroids;
219  centroids.reserve(nCentroids);
220 
221  std::vector<std::vector<Centroid>::iterator> starts;
222  starts.reserve(digests.size());
223 
224  double count = 0;
225 
226  // We can safely use these limits to avoid isnan checks below because we know
227  // nCentroids > 0, so at least one TDigest has a min and max.
228  double min = std::numeric_limits<double>::infinity();
229  double max = -std::numeric_limits<double>::infinity();
230 
231  for (auto it = digests.begin(); it != digests.end(); it++) {
232  starts.push_back(centroids.end());
233  double curCount = it->count();
234  if (curCount > 0) {
235  DCHECK(!std::isnan(it->min_));
236  DCHECK(!std::isnan(it->max_));
237  min = std::min(min, it->min_);
238  max = std::max(max, it->max_);
239  count += curCount;
240  for (const auto& centroid : it->centroids_) {
241  centroids.push_back(centroid);
242  }
243  }
244  }
245 
246  for (size_t digestsPerBlock = 1; digestsPerBlock < starts.size();
247  digestsPerBlock *= 2) {
248  // Each sorted block is digestPerBlock digests big. For each step, try to
249  // merge two blocks together.
250  for (size_t i = 0; i < starts.size(); i += (digestsPerBlock * 2)) {
251  // It is possible that this block is incomplete (less than digestsPerBlock
252  // big). In that case, the rest of the block is sorted and leave it alone
253  if (i + digestsPerBlock < starts.size()) {
254  auto first = starts[i];
255  auto middle = starts[i + digestsPerBlock];
256 
257  // It is possible that the next block is incomplete (less than
258  // digestsPerBlock big). In that case, merge to end. Otherwise, merge to
259  // the end of that block.
260  std::vector<Centroid>::iterator last =
261  (i + (digestsPerBlock * 2) < starts.size())
262  ? *(starts.begin() + i + 2 * digestsPerBlock)
263  : centroids.end();
264  std::inplace_merge(first, middle, last);
265  }
266  }
267  }
268 
269  DCHECK(std::is_sorted(centroids.begin(), centroids.end()));
270 
271  size_t maxSize = digests.begin()->maxSize_;
272  TDigest result(maxSize);
273 
274  std::vector<Centroid> compressed;
275  compressed.reserve(maxSize);
276 
277  double k_limit = 1;
278  double q_limit_times_count = k_to_q(k_limit, maxSize) * count;
279 
280  Centroid cur = centroids.front();
281  double weightSoFar = cur.weight();
282  double sumsToMerge = 0;
283  double weightsToMerge = 0;
284  for (auto it = centroids.begin() + 1; it != centroids.end(); ++it) {
285  weightSoFar += it->weight();
286  if (weightSoFar <= q_limit_times_count) {
287  sumsToMerge += it->mean() * it->weight();
288  weightsToMerge += it->weight();
289  } else {
290  result.sum_ += cur.add(sumsToMerge, weightsToMerge);
291  sumsToMerge = 0;
292  weightsToMerge = 0;
293  compressed.push_back(cur);
294  q_limit_times_count = k_to_q(k_limit++, maxSize) * count;
295  cur = *it;
296  }
297  }
298  result.sum_ += cur.add(sumsToMerge, weightsToMerge);
299  compressed.push_back(cur);
300  compressed.shrink_to_fit();
301 
302  // Deal with floating point precision
303  std::sort(compressed.begin(), compressed.end());
304 
305  result.count_ = count;
306  result.min_ = min;
307  result.max_ = max;
308  result.centroids_ = std::move(compressed);
309  return result;
310 }
LogLevel max
Definition: LogLevel.cpp:31
static double k_to_q(double k, double d)
Definition: TDigest.cpp:60
constexpr detail::Map< Move > move
Definition: Base-inl.h:2567
double count() const
Definition: TDigest.h:118
LogLevel min
Definition: LogLevel.cpp:30
size_t maxSize() const
Definition: TDigest.h:138
TDigest(size_t maxSize=100)
Definition: TDigest.h:81
double max() const
Definition: TDigest.h:126
double min() const
Definition: TDigest.h:122
constexpr detail::First first
Definition: Base-inl.h:2553
double folly::TDigest::min ( ) const
inline

Definition at line 122 of file TDigest.h.

References min_.

Referenced by estimateQuantile(), merge(), and TEST().

122  {
123  return min_;
124  }
double min_
Definition: TDigest.h:148
double folly::TDigest::sum ( ) const
inline

Definition at line 114 of file TDigest.h.

References sum_.

Referenced by folly::TDigest::Centroid::add(), folly::detail::estimatesFromDigest(), TDigest(), TEST(), and folly::TDigest::Centroid::weight().

114  {
115  return sum_;
116  }
double sum_
Definition: TDigest.h:145

Member Data Documentation

std::vector<Centroid> folly::TDigest::centroids_
private

Definition at line 143 of file TDigest.h.

Referenced by empty(), estimateQuantile(), getCentroids(), merge(), and TDigest().

double folly::TDigest::count_
private

Definition at line 146 of file TDigest.h.

Referenced by count(), estimateQuantile(), mean(), merge(), and TDigest().

double folly::TDigest::max_
private

Definition at line 147 of file TDigest.h.

Referenced by estimateQuantile(), max(), merge(), and TDigest().

size_t folly::TDigest::maxSize_
private

Definition at line 144 of file TDigest.h.

Referenced by maxSize(), merge(), and TDigest().

double folly::TDigest::min_
private

Definition at line 148 of file TDigest.h.

Referenced by estimateQuantile(), merge(), min(), and TDigest().

double folly::TDigest::sum_
private

Definition at line 145 of file TDigest.h.

Referenced by mean(), merge(), sum(), and TDigest().


The documentation for this class was generated from the following files: