proxygen
IndexedMemPool.h
Go to the documentation of this file.
1 /*
2  * Copyright 2014-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <assert.h>
20 #include <errno.h>
21 #include <stdint.h>
22 
23 #include <type_traits>
24 
25 #include <boost/noncopyable.hpp>
26 #include <folly/Portability.h>
31 
32 // Ignore shadowing warnings within this file, so includers can use -Wshadow.
34 FOLLY_GNU_DISABLE_WARNING("-Wshadow")
35 
36 namespace folly {
37 
38 namespace detail {
39 template <typename Pool>
41 } // namespace detail
42 
43 template <
44  typename T,
45  bool EagerRecycleWhenTrivial = false,
46  bool EagerRecycleWhenNotTrivial = true>
48  static constexpr bool eagerRecycle() {
49  return std::is_trivial<T>::value ? EagerRecycleWhenTrivial
50  : EagerRecycleWhenNotTrivial;
51  }
52 
55  static void initialize(T* ptr) {
56  if (!eagerRecycle()) {
57  new (ptr) T();
58  }
59  }
60 
63  static void cleanup(T* ptr) {
64  if (!eagerRecycle()) {
65  ptr->~T();
66  }
67  }
68 
71  template <typename... Args>
72  static void onAllocate(T* ptr, Args&&... args) {
73  static_assert(
74  sizeof...(Args) == 0 || eagerRecycle(),
75  "emplace-style allocation requires eager recycle, "
76  "which is defaulted only for non-trivial types");
77  if (eagerRecycle()) {
78  new (ptr) T(std::forward<Args>(args)...);
79  }
80  }
81 
83  static void onRecycle(T* ptr) {
84  if (eagerRecycle()) {
85  ptr->~T();
86  }
87  }
88 };
89 
93 template <typename T>
95 
99 template <typename T>
101 
153 template <
154  typename T,
155  uint32_t NumLocalLists_ = 32,
156  uint32_t LocalListLimit_ = 200,
157  template <typename> class Atom = std::atomic,
158  typename Traits = IndexedMemPoolTraits<T>>
159 struct IndexedMemPool : boost::noncopyable {
160  typedef T value_type;
161 
162  typedef std::unique_ptr<T, detail::IndexedMemPoolRecycler<IndexedMemPool>>
164 
165  static_assert(LocalListLimit_ <= 255, "LocalListLimit must fit in 8 bits");
166  enum {
167  NumLocalLists = NumLocalLists_,
168  LocalListLimit = LocalListLimit_,
169  };
170 
171  static_assert(
172  std::is_nothrow_default_constructible<Atom<uint32_t>>::value,
173  "Atom must be nothrow default constructible");
174 
175  // these are public because clients may need to reason about the number
176  // of bits required to hold indices from a pool, given its capacity
177 
178  static constexpr uint32_t maxIndexForCapacity(uint32_t capacity) {
179  // index of std::numeric_limits<uint32_t>::max() is reserved for isAllocated
180  // tracking
181  return uint32_t(std::min(
182  uint64_t(capacity) + (NumLocalLists - 1) * LocalListLimit,
184  }
185 
186  static constexpr uint32_t capacityForMaxIndex(uint32_t maxIndex) {
187  return maxIndex - (NumLocalLists - 1) * LocalListLimit;
188  }
189 
192  explicit IndexedMemPool(uint32_t capacity)
193  : actualCapacity_(maxIndexForCapacity(capacity)),
194  size_(0),
195  globalHead_(TaggedPtr{}) {
196  const size_t needed = sizeof(Slot) * (actualCapacity_ + 1);
197  size_t pagesize = size_t(sysconf(_SC_PAGESIZE));
198  mmapLength_ = ((needed - 1) & ~(pagesize - 1)) + pagesize;
199  assert(needed <= mmapLength_ && mmapLength_ < needed + pagesize);
200  assert((mmapLength_ % pagesize) == 0);
201 
202  slots_ = static_cast<Slot*>(mmap(
203  nullptr,
204  mmapLength_,
205  PROT_READ | PROT_WRITE,
206  MAP_PRIVATE | MAP_ANONYMOUS,
207  -1,
208  0));
209  if (slots_ == MAP_FAILED) {
210  assert(errno == ENOMEM);
211  throw std::bad_alloc();
212  }
213  // Atom is expected to be std::atomic, which is trivially
214  // constructible, so we can avoid explicitly initializing the
215  // slots which would cause the whole mapped area to be paged in.
216  //
217  // TODO: Switch to is_trivially_constructible once support
218  // for GCC 4.9 is dropped for this file.
219  if /* constexpr */ (!std::is_trivial<Atom<uint32_t>>::value) {
220  for (size_t i = 1; i < actualCapacity_ + 1; i++) {
221  // Atom is enforced above to be nothrow-default-constructible
222  new (&slots_[i].localNext) Atom<uint32_t>;
223  new (&slots_[i].globalNext) Atom<uint32_t>;
224  }
225  }
226  }
227 
230  using A = Atom<uint32_t>;
231  for (uint32_t i = maxAllocatedIndex(); i > 0; --i) {
232  Traits::cleanup(&slots_[i].elem);
233  }
234  for (size_t i = 1; i < actualCapacity_ + 1; i++) {
235  slots_[i].localNext.~A();
236  slots_[i].globalNext.~A();
237  }
238  munmap(slots_, mmapLength_);
239  }
240 
246  return capacityForMaxIndex(actualCapacity_);
247  }
248 
252  // Take the minimum since it is possible that size_ > actualCapacity_.
253  // This can happen if there are multiple concurrent requests
254  // when size_ == actualCapacity_ - 1.
255  return std::min(uint32_t(size_), uint32_t(actualCapacity_));
256  }
257 
263  template <typename... Args>
264  uint32_t allocIndex(Args&&... args) {
265  auto idx = localPop(localHead());
266  if (idx != 0) {
267  Slot& s = slot(idx);
268  Traits::onAllocate(&s.elem, std::forward<Args>(args)...);
269  markAllocated(s);
270  }
271  return idx;
272  }
273 
278  template <typename... Args>
279  UniquePtr allocElem(Args&&... args) {
280  auto idx = allocIndex(std::forward<Args>(args)...);
281  T* ptr = idx == 0 ? nullptr : &slot(idx).elem;
282  return UniquePtr(ptr, typename UniquePtr::deleter_type(this));
283  }
284 
286  void recycleIndex(uint32_t idx) {
287  assert(isAllocated(idx));
288  localPush(localHead(), idx);
289  }
290 
293  return slot(idx).elem;
294  }
295 
297  const T& operator[](uint32_t idx) const {
298  return slot(idx).elem;
299  }
300 
303  uint32_t locateElem(const T* elem) const {
304  if (!elem) {
305  return 0;
306  }
307 
308  static_assert(std::is_standard_layout<Slot>::value, "offsetof needs POD");
309 
310  auto slot = reinterpret_cast<const Slot*>(
311  reinterpret_cast<const char*>(elem) - offsetof(Slot, elem));
312  auto rv = uint32_t(slot - slots_);
313 
314  // this assert also tests that rv is in range
315  assert(elem == &(*this)[rv]);
316  return rv;
317  }
318 
320  bool isAllocated(uint32_t idx) const {
321  return slot(idx).localNext.load(std::memory_order_acquire) == uint32_t(-1);
322  }
323 
324  private:
326 
327  struct Slot {
329  Atom<uint32_t> localNext;
330  Atom<uint32_t> globalNext;
331 
332  Slot() : localNext{}, globalNext{} {}
333  };
334 
335  struct TaggedPtr {
337 
338  // size is bottom 8 bits, tag in top 24. g++'s code generation for
339  // bitfields seems to depend on the phase of the moon, plus we can
340  // do better because we can rely on other checks to avoid masking
342 
343  enum : uint32_t {
344  SizeBits = 8,
345  SizeMask = (1U << SizeBits) - 1,
346  TagIncr = 1U << SizeBits,
347  };
348 
349  uint32_t size() const {
350  return tagAndSize & SizeMask;
351  }
352 
354  assert(repl <= LocalListLimit);
355  return TaggedPtr{idx, (tagAndSize & ~SizeMask) | repl};
356  }
357 
359  assert(size() < LocalListLimit);
360  return TaggedPtr{idx, tagAndSize + 1};
361  }
362 
364  assert(size() > 0);
365  return TaggedPtr{idx, tagAndSize - 1};
366  }
367 
368  TaggedPtr withIdx(uint32_t repl) const {
369  return TaggedPtr{repl, tagAndSize + TagIncr};
370  }
371 
373  return withIdx(0).withSize(0);
374  }
375  };
376 
377  struct alignas(hardware_destructive_interference_size) LocalList {
379 
380  LocalList() : head(TaggedPtr{}) {}
381  };
382 
384 
387  size_t mmapLength_;
388 
394 
399  Atom<uint32_t> size_;
400 
404 
408  LocalList local_[NumLocalLists];
409 
414 
416 
418  assert(
419  0 < idx && idx <= actualCapacity_ &&
420  idx <= size_.load(std::memory_order_acquire));
421  return idx;
422  }
423 
425  return slots_[slotIndex(idx)];
426  }
427 
428  const Slot& slot(uint32_t idx) const {
429  return slots_[slotIndex(idx)];
430  }
431 
432  // localHead references a full list chained by localNext. s should
433  // reference slot(localHead), it is passed as a micro-optimization
434  void globalPush(Slot& s, uint32_t localHead) {
435  while (true) {
436  TaggedPtr gh = globalHead_.load(std::memory_order_acquire);
437  s.globalNext.store(gh.idx, std::memory_order_relaxed);
438  if (globalHead_.compare_exchange_strong(gh, gh.withIdx(localHead))) {
439  // success
440  return;
441  }
442  }
443  }
444 
445  // idx references a single node
447  Slot& s = slot(idx);
448  TaggedPtr h = head.load(std::memory_order_acquire);
449  while (true) {
450  s.localNext.store(h.idx, std::memory_order_release);
451  Traits::onRecycle(&slot(idx).elem);
452 
453  if (h.size() == LocalListLimit) {
454  // push will overflow local list, steal it instead
455  if (head.compare_exchange_strong(h, h.withEmpty())) {
456  // steal was successful, put everything in the global list
457  globalPush(s, idx);
458  return;
459  }
460  } else {
461  // local list has space
462  if (head.compare_exchange_strong(h, h.withIdx(idx).withSizeIncr())) {
463  // success
464  return;
465  }
466  }
467  // h was updated by failing CAS
468  }
469  }
470 
471  // returns 0 if empty
473  while (true) {
474  TaggedPtr gh = globalHead_.load(std::memory_order_acquire);
475  if (gh.idx == 0 ||
476  globalHead_.compare_exchange_strong(
477  gh,
478  gh.withIdx(
479  slot(gh.idx).globalNext.load(std::memory_order_relaxed)))) {
480  // global list is empty, or pop was successful
481  return gh.idx;
482  }
483  }
484  }
485 
486  // returns 0 if allocation failed
488  while (true) {
489  TaggedPtr h = head.load(std::memory_order_acquire);
490  if (h.idx != 0) {
491  // local list is non-empty, try to pop
492  Slot& s = slot(h.idx);
493  auto next = s.localNext.load(std::memory_order_relaxed);
494  if (head.compare_exchange_strong(h, h.withIdx(next).withSizeDecr())) {
495  // success
496  return h.idx;
497  }
498  continue;
499  }
500 
501  uint32_t idx = globalPop();
502  if (idx == 0) {
503  // global list is empty, allocate and construct new slot
504  if (size_.load(std::memory_order_relaxed) >= actualCapacity_ ||
505  (idx = ++size_) > actualCapacity_) {
506  // allocation failed
507  return 0;
508  }
509  Traits::initialize(&slot(idx).elem);
510  return idx;
511  }
512 
513  Slot& s = slot(idx);
514  auto next = s.localNext.load(std::memory_order_relaxed);
515  if (head.compare_exchange_strong(
516  h, h.withIdx(next).withSize(LocalListLimit))) {
517  // global list moved to local list, keep head for us
518  return idx;
519  }
520  // local bulk push failed, return idx to the global list and try again
521  globalPush(s, idx);
522  }
523  }
524 
526  auto stripe = AccessSpreader<Atom>::current(NumLocalLists);
527  return local_[stripe].head;
528  }
529 
530  void markAllocated(Slot& slot) {
531  slot.localNext.store(uint32_t(-1), std::memory_order_release);
532  }
533 
534  public:
535  static constexpr std::size_t kSlotSize = sizeof(Slot);
536 };
537 
538 namespace detail {
539 
543 template <typename Pool>
544 struct IndexedMemPoolRecycler {
545  Pool* pool;
546 
547  explicit IndexedMemPoolRecycler(Pool* pool) : pool(pool) {}
548 
551  default;
552 
553  void operator()(typename Pool::value_type* elem) const {
554  pool->recycleIndex(pool->locateElem(elem));
555  }
556 };
557 
558 } // namespace detail
559 
560 } // namespace folly
561 
void * ptr
Atom< uint32_t > size_
#define T(v)
Definition: http_parser.c:233
#define FOLLY_GNU_DISABLE_WARNING(warningName)
Definition: Portability.h:180
*than *hazptr_holder h
Definition: Hazptr.h:116
#define FOLLY_POP_WARNING
Definition: Portability.h:179
T & operator[](uint32_t idx)
Provides access to the pooled element referenced by idx.
void globalPush(Slot &s, uint32_t localHead)
std::unique_ptr< int > A
#define FOLLY_PUSH_WARNING
Definition: Portability.h:178
LogLevel max
Definition: LogLevel.cpp:31
void operator()(typename Pool::value_type *elem) const
AtomicStruct< TaggedPtr, Atom > & localHead()
uint32_t maxAllocatedIndex() const
bool compare_exchange_strong(T &v0, T v1, std::memory_order mo=std::memory_order_seq_cst) noexcept
Definition: AtomicStruct.h:93
static constexpr bool eagerRecycle()
internal::ArgsMatcher< InnerMatcher > Args(const InnerMatcher &matcher)
T load(std::memory_order mo=std::memory_order_seq_cst) const noexcept
Definition: AtomicStruct.h:141
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
const Slot & slot(uint32_t idx) const
FOLLY_PUSH_WARNING RHS rhs
Definition: Traits.h:649
static void cleanup(T *ptr)
static void initialize(T *ptr)
uint32_t locateElem(const T *elem) const
TaggedPtr withIdx(uint32_t repl) const
constexpr std::size_t hardware_destructive_interference_size
Definition: Align.h:107
int current
constexpr auto size(C const &c) -> decltype(c.size())
Definition: Access.h:45
LogLevel min
Definition: LogLevel.cpp:30
const T & operator[](uint32_t idx) const
Provides access to the pooled element referenced by idx.
#define Atom
bool isAllocated(uint32_t idx) const
Returns true iff idx has been alloc()ed and not recycleIndex()ed.
void markAllocated(Slot &slot)
Slot & slot(uint32_t idx)
UniquePtr allocElem(Args &&...args)
void localPush(AtomicStruct< TaggedPtr, Atom > &head, uint32_t idx)
static const char *const value
Definition: Conv.cpp:50
uint32_t allocIndex(Args &&...args)
static constexpr uint32_t maxIndexForCapacity(uint32_t capacity)
static void onAllocate(T *ptr, Args &&...args)
TaggedPtr withSize(uint32_t repl) const
std::unique_ptr< T, detail::IndexedMemPoolRecycler< IndexedMemPool > > UniquePtr
static void onRecycle(T *ptr)
Called when the element is recycled.
static set< string > s
AtomicStruct< TaggedPtr, Atom > head
void recycleIndex(uint32_t idx)
Gives up ownership previously granted by alloc()
IndexedMemPool(uint32_t capacity)
~IndexedMemPool()
Destroys all of the contained elements.
uint32_t localPop(AtomicStruct< TaggedPtr, Atom > &head)
void cleanup()
Definition: Init.cpp:59
uint32_t slotIndex(uint32_t idx) const
static constexpr uint32_t capacityForMaxIndex(uint32_t maxIndex)
def next(obj)
Definition: ast.py:58