proxygen
BucketedTimeSeries.h
Go to the documentation of this file.
1 /*
2  * Copyright 2012-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <chrono>
20 #include <vector>
21 
23 
24 namespace folly {
25 
26 /*
27  * A helper clock type to helper older code using BucketedTimeSeries with
28  * std::chrono::seconds transition to properly using clock types and time_point
29  * objects.
30  */
31 template <typename TT = std::chrono::seconds>
33  public:
34  using duration = TT;
35  using time_point = std::chrono::time_point<LegacyStatsClock, TT>;
36 
37  // This clock does not actually implement now(), since the older API
38  // did not really specify what clock should be used. (In practice most
39  // callers unfortuantely used wall clock time rather than a monotonic clock.)
40 };
41 
42 /*
43  * This class represents a bucketed time series which keeps track of values
44  * added in the recent past, and merges these values together into a fixed
45  * number of buckets to keep a lid on memory use if the number of values
46  * added is very large.
47  *
48  * For example, a BucketedTimeSeries() with duration == 60s and 10 buckets
49  * will keep track of 10 6-second buckets, and discard all data added more
50  * than 1 minute ago. As time ticks by, a 6-second bucket at a time will
51  * be discarded and new data will go into the newly opened bucket. Internally,
52  * it uses a circular array of buckets that it reuses as time advances.
53  *
54  * This class assumes that time advances forwards. The window of time tracked
55  * by the timeseries will advance forwards whenever a more recent timestamp is
56  * passed to addValue(). While it is possible to pass old time values to
57  * addValue(), this will never move the time window backwards. If the old time
58  * value falls outside the tracked window of time, the data point will be
59  * ignored.
60  *
61  * This class is not thread-safe -- use your own synchronization!
62  */
63 template <typename VT, typename CT = LegacyStatsClock<std::chrono::seconds>>
65  public:
66  using ValueType = VT;
67  using Clock = CT;
68  using Duration = typename Clock::duration;
69  using TimePoint = typename Clock::time_point;
71 
72  /*
73  * Create a new BucketedTimeSeries.
74  *
75  * This creates a new BucketedTimeSeries with the specified number of
76  * buckets, storing data for the specified amount of time.
77  *
78  * If the duration is 0, the BucketedTimeSeries will track data forever,
79  * and does not need the rolling buckets. The numBuckets parameter is
80  * ignored when duration is 0.
81  */
82  BucketedTimeSeries(size_t numBuckets, Duration duration);
83 
84  /*
85  * Create a new BucketedTimeSeries.
86  *
87  * This constructor is used to reconstruct a timeseries using
88  * previously saved data
89  */
91  TimePoint theFirstTime,
92  TimePoint theLatestTime,
93  Duration maxDuration,
94  const std::vector<Bucket>& bucketsList);
95 
96  /*
97  * Adds the value 'val' at time 'now'
98  *
99  * This function expects time to generally move forwards. The window of time
100  * tracked by this time series will move forwards with time. If 'now' is
101  * more recent than any time previously seen, addValue() will automatically
102  * call update(now) to advance the time window tracked by this data
103  * structure.
104  *
105  * Values in the recent past may be added to the data structure by passing in
106  * a slightly older value of 'now', as long as this time point still falls
107  * within the tracked duration. If 'now' is older than the tracked duration
108  * of time, the data point value will be ignored, and addValue() will return
109  * false without doing anything else.
110  *
111  * Returns true on success, or false if now was older than the tracked time
112  * window.
113  */
114  bool addValue(TimePoint now, const ValueType& val);
115 
116  /*
117  * Adds the value 'val' the given number of 'times' at time 'now'
118  */
120 
121  /*
122  * Adds the value 'total' as the sum of 'nsamples' samples
123  */
124  bool
125  addValueAggregated(TimePoint now, const ValueType& total, uint64_t nsamples);
126 
127  /*
128  * Updates the container to the specified time, doing all the necessary
129  * work to rotate the buckets and remove any stale data points.
130  *
131  * The addValue() methods automatically call update() when adding new data
132  * points. However, when reading data from the timeseries, you should make
133  * sure to manually call update() before accessing the data. Otherwise you
134  * may be reading stale data if update() has not been called recently.
135  *
136  * Returns the current bucket index after the update.
137  */
138  size_t update(TimePoint now);
139 
140  /*
141  * Reset the timeseries to an empty state,
142  * as if no data points have ever been added to it.
143  */
144  void clear();
145 
146  /*
147  * Get the latest time that has ever been passed to update() or addValue().
148  *
149  * If no data has ever been added to this timeseries, 0 will be returned.
150  */
152  return latestTime_;
153  }
154 
155  /*
156  * Get the time of the earliest data point stored in this timeseries.
157  *
158  * If no data has ever been added to this timeseries, 0 will be returned.
159  *
160  * If isAllTime() is true, this is simply the time when the first data point
161  * was recorded.
162  *
163  * For non-all-time data, the timestamp reflects the first data point still
164  * remembered. As new data points are added, old data will be expired.
165  * getEarliestTime() returns the timestamp of the oldest bucket still present
166  * in the timeseries. This will never be older than (getLatestTime() -
167  * duration()).
168  */
169  TimePoint getEarliestTime() const;
170 
171  /*
172  * Return the number of buckets.
173  */
174  size_t numBuckets() const {
175  return buckets_.size();
176  }
177 
178  /*
179  * Return the maximum duration of data that can be tracked by this
180  * BucketedTimeSeries.
181  */
182  Duration duration() const {
183  return duration_;
184  }
185 
186  /*
187  * Returns true if this BucketedTimeSeries stores data for all-time, without
188  * ever rolling over into new buckets.
189  */
190  bool isAllTime() const {
191  return (duration_ == Duration(0));
192  }
193 
194  /*
195  * Returns true if no calls to update() have been made since the last call to
196  * clear().
197  */
198  bool empty() const {
199  // We set firstTime_ greater than latestTime_ in the constructor and in
200  // clear, so we use this to distinguish if the timeseries is empty.
201  //
202  // Once a data point has been added, latestTime_ will always be greater
203  // than or equal to firstTime_.
204  return firstTime_ > latestTime_;
205  }
206 
207  /*
208  * Returns time of first update() since clear()/constructor.
209  * Note that the returned value is only meaningful when empty() is false.
210  */
212  return firstTime_;
213  }
214 
215  /*
216  * Returns time of last update().
217  * Note that the returned value is only meaningful when empty() is false.
218  */
220  return latestTime_;
221  }
222 
223  /*
224  * Returns actual buckets of values
225  */
226  const std::vector<Bucket>& buckets() const {
227  return buckets_;
228  }
229 
230  /*
231  * Get the amount of time tracked by this timeseries.
232  *
233  * For an all-time timeseries, this returns the length of time since the
234  * first data point was added to the time series.
235  *
236  * Otherwise, this never returns a value greater than the overall timeseries
237  * duration. If the first data point was recorded less than a full duration
238  * ago, the time since the first data point is returned. If a full duration
239  * has elapsed, and we have already thrown away some data, the time since the
240  * oldest bucket is returned.
241  *
242  * For example, say we are tracking 600 seconds worth of data, in 60 buckets.
243  * - If less than 600 seconds have elapsed since the first data point,
244  * elapsed() returns the total elapsed time so far.
245  * - If more than 600 seconds have elapsed, we have already thrown away some
246  * data. However, we throw away a full bucket (10 seconds worth) at once,
247  * so at any point in time we have from 590 to 600 seconds worth of data.
248  * elapsed() will therefore return a value between 590 and 600.
249  *
250  * Note that you generally should call update() before calling elapsed(), to
251  * make sure you are not reading stale data.
252  */
253  Duration elapsed() const;
254 
255  /*
256  * Get the amount of time tracked by this timeseries, between the specified
257  * start and end times.
258  *
259  * If the timeseries contains data for the entire time range specified, this
260  * simply returns (end - start). However, if start is earlier than
261  * getEarliestTime(), this returns (end - getEarliestTime()).
262  */
263  Duration elapsed(TimePoint start, TimePoint end) const;
264 
265  /*
266  * Return the sum of all the data points currently tracked by this
267  * BucketedTimeSeries.
268  *
269  * Note that you generally should call update() before calling sum(), to
270  * make sure you are not reading stale data.
271  */
272  const ValueType& sum() const {
273  return total_.sum;
274  }
275 
276  /*
277  * Return the number of data points currently tracked by this
278  * BucketedTimeSeries.
279  *
280  * Note that you generally should call update() before calling count(), to
281  * make sure you are not reading stale data.
282  */
283  uint64_t count() const {
284  return total_.count;
285  }
286 
287  /*
288  * Return the average value (sum / count).
289  *
290  * The return type may be specified to control whether floating-point or
291  * integer division should be performed.
292  *
293  * Note that you generally should call update() before calling avg(), to
294  * make sure you are not reading stale data.
295  */
296  template <typename ReturnType = double>
297  ReturnType avg() const {
298  return total_.template avg<ReturnType>();
299  }
300 
301  /*
302  * Return the sum divided by the elapsed time.
303  *
304  * Note that you generally should call update() before calling rate(), to
305  * make sure you are not reading stale data.
306  */
307  template <typename ReturnType = double, typename Interval = Duration>
308  ReturnType rate() const {
309  return rateHelper<ReturnType, Interval>(ReturnType(total_.sum), elapsed());
310  }
311 
312  /*
313  * Return the count divided by the elapsed time.
314  *
315  * The Interval template parameter causes the elapsed time to be converted to
316  * the Interval type before using it. For example, if Interval is
317  * std::chrono::seconds, the return value will be the count per second.
318  * If Interval is std::chrono::hours, the return value will be the count per
319  * hour.
320  *
321  * Note that you generally should call update() before calling countRate(),
322  * to make sure you are not reading stale data.
323  */
324  template <typename ReturnType = double, typename Interval = Duration>
325  ReturnType countRate() const {
326  return rateHelper<ReturnType, Interval>(
327  ReturnType(total_.count), elapsed());
328  }
329 
330  /*
331  * Estimate the sum of the data points that occurred in the specified time
332  * period.
333  *
334  * The range queried is [start, end).
335  * That is, start is inclusive, and end is exclusive.
336  *
337  * Note that data outside of the timeseries duration will no longer be
338  * available for use in the estimation. Specifying a start time earlier than
339  * getEarliestTime() will not have much effect, since only data points after
340  * that point in time will be counted.
341  *
342  * Note that the value returned is an estimate, and may not be precise.
343  */
344  ValueType sum(TimePoint start, TimePoint end) const;
345 
346  /*
347  * Estimate the number of data points that occurred in the specified time
348  * period.
349  *
350  * The same caveats documented in the sum(TimePoint start, TimePoint end)
351  * comments apply here as well.
352  */
353  uint64_t count(TimePoint start, TimePoint end) const;
354 
355  /*
356  * Estimate the average value during the specified time period.
357  *
358  * The same caveats documented in the sum(TimePoint start, TimePoint end)
359  * comments apply here as well.
360  */
361  template <typename ReturnType = double>
362  ReturnType avg(TimePoint start, TimePoint end) const;
363 
364  /*
365  * Estimate the rate during the specified time period.
366  *
367  * The same caveats documented in the sum(TimePoint start, TimePoint end)
368  * comments apply here as well.
369  */
370  template <typename ReturnType = double, typename Interval = Duration>
371  ReturnType rate(TimePoint start, TimePoint end) const {
372  ValueType intervalSum = sum(start, end);
373  Duration interval = elapsed(start, end);
374  return rateHelper<ReturnType, Interval>(intervalSum, interval);
375  }
376 
377  /*
378  * Estimate the rate of data points being added during the specified time
379  * period.
380  *
381  * The same caveats documented in the sum(TimePoint start, TimePoint end)
382  * comments apply here as well.
383  */
384  template <typename ReturnType = double, typename Interval = Duration>
385  ReturnType countRate(TimePoint start, TimePoint end) const {
386  uint64_t intervalCount = count(start, end);
387  Duration interval = elapsed(start, end);
388  return rateHelper<ReturnType, Interval>(
389  ReturnType(intervalCount), interval);
390  }
391 
392  /*
393  * Invoke a function for each bucket.
394  *
395  * The function will take as arguments the bucket index,
396  * the bucket start time, and the start time of the subsequent bucket.
397  *
398  * It should return true to continue iterating through the buckets, and false
399  * to break out of the loop and stop, without calling the function on any
400  * more buckets.
401  *
402  * bool function(const Bucket& bucket, TimePoint bucketStart,
403  * TimePoint nextBucketStart)
404  */
405  template <typename Function>
406  void forEachBucket(Function fn) const;
407 
408  /*
409  * Get the index for the bucket containing the specified time.
410  *
411  * Note that the index is only valid if this time actually falls within one
412  * of the current buckets. If you pass in a value more recent than
413  * getLatestTime() or older than (getLatestTime() - elapsed()), the index
414  * returned will not be valid.
415  *
416  * This method may not be called for all-time data.
417  */
418  size_t getBucketIdx(TimePoint time) const;
419 
420  /*
421  * Get the bucket at the specified index.
422  *
423  * This method may not be called for all-time data.
424  */
425  const Bucket& getBucketByIndex(size_t idx) const {
426  return buckets_[idx];
427  }
428 
429  /*
430  * Compute the bucket index that the specified time falls into,
431  * as well as the bucket start time and the next bucket's start time.
432  *
433  * This method may not be called for all-time data.
434  */
435  void getBucketInfo(
436  TimePoint time,
437  size_t* bucketIdx,
438  TimePoint* bucketStart,
439  TimePoint* nextBucketStart) const;
440 
441  /*
442  * Legacy APIs that accept a Duration parameters rather than TimePoint.
443  *
444  * These treat the Duration as relative to the clock epoch.
445  * Prefer using the correct TimePoint-based APIs instead. These APIs will
446  * eventually be deprecated and removed.
447  */
449  return addValueAggregated(TimePoint(now), val, 1);
450  }
452  return addValueAggregated(TimePoint(now), val * ValueType(times), times);
453  }
454  bool
455  addValueAggregated(Duration now, const ValueType& total, uint64_t nsamples) {
456  return addValueAggregated(TimePoint(now), total, nsamples);
457  }
458  size_t update(Duration now) {
459  return update(TimePoint(now));
460  }
461 
462  private:
463  template <typename ReturnType = double, typename Interval = Duration>
464  ReturnType rateHelper(ReturnType numerator, Duration elapsedTime) const {
465  return detail::rateHelper<ReturnType, Duration, Interval>(
466  numerator, elapsedTime);
467  }
468 
469  TimePoint getEarliestTimeNonEmpty() const;
470  size_t updateBuckets(TimePoint now);
471 
472  ValueType rangeAdjust(
473  TimePoint bucketStart,
474  TimePoint nextBucketStart,
475  TimePoint start,
476  TimePoint end,
477  ValueType input) const;
478 
479  template <typename Function>
480  void forEachBucket(TimePoint start, TimePoint end, Function fn) const;
481 
482  TimePoint firstTime_; // time of first update() since clear()/constructor
483  TimePoint latestTime_; // time of last update()
484  Duration duration_; // total duration ("window length") of the time series
485 
486  Bucket total_; // sum and count of everything in time series
487  std::vector<Bucket> buckets_; // actual buckets of values
488 };
489 
490 } // namespace folly
bool addValueAggregated(Duration now, const ValueType &total, uint64_t nsamples)
std::atomic< int64_t > sum(0)
std::chrono::steady_clock::time_point now()
double val
Definition: String.cpp:273
const Bucket & getBucketByIndex(size_t idx) const
ReturnType countRate() const
TimePoint getLatestTime() const
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
std::chrono::milliseconds Duration
Definition: Types.h:36
std::vector< Bucket > buckets_
TimePoint latestTime() const
auto end(TestAdlIterable &instance)
Definition: ForeachTest.cpp:62
bool addValue(Duration now, const ValueType &val, uint64_t times)
const ValueType & sum() const
size_t update(Duration now)
ReturnType rate(TimePoint start, TimePoint end) const
ReturnType rateHelper(ReturnType numerator, Duration elapsedTime) const
auto start
bool addValue(Duration now, const ValueType &val)
int * count
TimePoint firstTime() const
void addValue(unsigned int iters, seconds duration, size_t numBuckets, size_t callsPerSecond)
const std::vector< Bucket > & buckets() const
Future< Unit > times(const int n, F &&thunk)
Definition: Future-inl.h:2348
typename Clock::time_point TimePoint
StatsClock::time_point TimePoint
ReturnType countRate(TimePoint start, TimePoint end) const
std::chrono::nanoseconds time()
typename Clock::duration Duration
std::chrono::time_point< LegacyStatsClock, TT > time_point