proxygen
folly::detail::MemoryIdler Struct Reference

#include <MemoryIdler.h>

Public Types

enum  { kDefaultStackToRetain = 1024 }
 

Static Public Member Functions

static void flushLocalMallocCaches ()
 
static void unmapUnusedStack (size_t retain=kDefaultStackToRetain)
 
template<typename IdleTime = std::chrono::steady_clock::duration>
static IdleTime getVariationTimeout (IdleTime const &idleTimeout=defaultIdleTimeout.load(std::memory_order_acquire), float timeoutVariationFrac=0.5)
 
template<typename Futex , typename IdleTime = std::chrono::steady_clock::duration>
static FutexResult futexWait (Futex &fut, uint32_t expected, uint32_t waitMask=-1, IdleTime const &idleTimeout=defaultIdleTimeout.load(std::memory_order_acquire), size_t stackToRetain=kDefaultStackToRetain, float timeoutVariationFrac=0.5)
 
template<typename Futex , typename Deadline , typename IdleTime = std::chrono::steady_clock::duration>
static FutexResult futexWaitUntil (Futex &fut, uint32_t expected, Deadline const &deadline, uint32_t waitMask=-1, IdleTime const &idleTimeout=defaultIdleTimeout.load(std::memory_order_acquire), size_t stackToRetain=kDefaultStackToRetain, float timeoutVariationFrac=0.5)
 

Static Public Attributes

static AtomicStruct< std::chrono::steady_clock::duration > defaultIdleTimeout
 

Static Private Member Functions

template<typename Futex , typename Deadline , typename IdleTime >
static bool futexWaitPreIdle (FutexResult &_ret, Futex &fut, uint32_t expected, Deadline const &deadline, uint32_t waitMask, IdleTime idleTimeout, size_t stackToRetain, float timeoutVariationFrac)
 

Detailed Description

MemoryIdler provides helper routines that allow routines to return some assigned memory resources back to the system. The intended use is that when a thread is waiting for a long time (perhaps it is in a LIFO thread pool and hasn't been needed for a long time) it should release its thread-local malloc caches (both jemalloc and tcmalloc use these for better performance) and unmap the stack pages that contain no useful data.

Definition at line 38 of file MemoryIdler.h.

Member Enumeration Documentation

anonymous enum
Enumerator
kDefaultStackToRetain 

This value is a tradeoff between reclaiming memory and triggering a page fault immediately on wakeup. Note that the actual unit of idling for the stack is pages, so the actual stack that will be available on wakeup without a page fault is between kDefaultStackToRetain and kDefaultStackToRetain + PageSize - 1 bytes.

Definition at line 44 of file MemoryIdler.h.

Member Function Documentation

void folly::detail::MemoryIdler::flushLocalMallocCaches ( )
static

Returns memory from thread-local allocation pools to the global pool, if we know how to for the current malloc implementation. jemalloc is supported.

Definition at line 41 of file MemoryIdler.cpp.

References addr, folly::call_once(), folly::test::end(), FB_LOG_EVERY_MS, flag, FOLLY_NOINLINE, folly::kIsDebug, mallctl, mallctlbymib, mallctlnametomib, folly::mallctlRead(), max, SCOPE_EXIT, folly::CacheLocality::system(), unmapUnusedStack(), folly::usingJEMalloc(), and folly::WARNING.

Referenced by BENCHMARK(), futexWaitPreIdle(), folly::MemoryIdlerTimeout::runLoopCallback(), and TEST().

41  {
42  if (!usingJEMalloc()) {
43  return;
44  }
45  if (!mallctl || !mallctlnametomib || !mallctlbymib) {
46  FB_LOG_EVERY_MS(ERROR, 10000) << "mallctl* weak link failed";
47  return;
48  }
49 
50  try {
51  // Not using mallctlCall as this will fail if tcache is disabled.
52  mallctl("thread.tcache.flush", nullptr, nullptr, nullptr, 0);
53 
54  // By default jemalloc has 4 arenas per cpu, and then assigns each
55  // thread to one of those arenas. This means that in any service
56  // that doesn't perform a lot of context switching, the chances that
57  // another thread will be using the current thread's arena (and hence
58  // doing the appropriate dirty-page purging) are low. Some good
59  // tuned configurations (such as that used by hhvm) use fewer arenas
60  // and then pin threads to avoid contended access. In that case,
61  // purging the arenas is counter-productive. We use the heuristic
62  // that if narenas <= 2 * num_cpus then we shouldn't do anything here,
63  // which detects when the narenas has been reduced from the default
64  unsigned narenas;
65  unsigned arenaForCurrent;
66  size_t mib[3];
67  size_t miblen = 3;
68 
69  mallctlRead("opt.narenas", &narenas);
70  mallctlRead("thread.arena", &arenaForCurrent);
71  if (narenas > 2 * CacheLocality::system().numCpus &&
72  mallctlnametomib("arena.0.purge", mib, &miblen) == 0) {
73  mib[1] = static_cast<size_t>(arenaForCurrent);
74  mallctlbymib(mib, miblen, nullptr, nullptr, nullptr, 0);
75  }
76  } catch (const std::runtime_error& ex) {
77  FB_LOG_EVERY_MS(WARNING, 10000) << ex.what();
78  }
79 }
static const CacheLocality & system()
bool usingJEMalloc() noexcept
Definition: Malloc.h:147
void mallctlRead(const char *cmd, T *out)
Definition: MallctlHelper.h:48
int(* mallctlbymib)(const size_t *, size_t, void *, size_t *, void *, size_t)
Definition: MallocImpl.cpp:44
#define FB_LOG_EVERY_MS(severity, milli_interval)
Definition: GLog.h:36
int(* mallctl)(const char *, void *, size_t *, void *, size_t)
Definition: MallocImpl.cpp:42
int(* mallctlnametomib)(const char *, size_t *, size_t *)
Definition: MallocImpl.cpp:43
template<typename Futex , typename IdleTime = std::chrono::steady_clock::duration>
static FutexResult folly::detail::MemoryIdler::futexWait ( Futex fut,
uint32_t  expected,
uint32_t  waitMask = -1,
IdleTime const &  idleTimeout = defaultIdleTimeout.load(std::memory_order_acquire),
size_t  stackToRetain = kDefaultStackToRetain,
float  timeoutVariationFrac = 0.5 
)
inlinestatic

Equivalent to fut.futexWait(expected, waitMask), but calls flushLocalMallocCaches() and unmapUnusedStack(stackToRetain) after idleTimeout has passed (if it has passed). Internally uses fut.futexWait and fut.futexWaitUntil. The actual timeout will be pseudo-randomly chosen to be between idleTimeout and idleTimeout * (1 + timeoutVariationFraction), to smooth out the behavior in a system with bursty requests. The default is to wait up to 50% extra, so on average 25% extra.

Definition at line 102 of file MemoryIdler.h.

References folly::detail::futexWait(), futexWaitPreIdle(), and max.

Referenced by folly::fibers::Baton::waitThread().

109  {
110  FutexResult pre;
111  if (futexWaitPreIdle(
112  pre,
113  fut,
114  expected,
116  waitMask,
117  idleTimeout,
118  stackToRetain,
119  timeoutVariationFrac)) {
120  return pre;
121  }
122 
124  return futexWait(&fut, expected, waitMask);
125  }
LogLevel max
Definition: LogLevel.cpp:31
static FutexResult futexWait(Futex &fut, uint32_t expected, uint32_t waitMask=-1, IdleTime const &idleTimeout=defaultIdleTimeout.load(std::memory_order_acquire), size_t stackToRetain=kDefaultStackToRetain, float timeoutVariationFrac=0.5)
Definition: MemoryIdler.h:102
FutexResult futexWait(const Futex *futex, uint32_t expected, uint32_t waitMask)
Definition: Futex-inl.h:100
static bool futexWaitPreIdle(FutexResult &_ret, Futex &fut, uint32_t expected, Deadline const &deadline, uint32_t waitMask, IdleTime idleTimeout, size_t stackToRetain, float timeoutVariationFrac)
Definition: MemoryIdler.h:167
template<typename Futex , typename Deadline , typename IdleTime >
static bool folly::detail::MemoryIdler::futexWaitPreIdle ( FutexResult _ret,
Futex fut,
uint32_t  expected,
Deadline const &  deadline,
uint32_t  waitMask,
IdleTime  idleTimeout,
size_t  stackToRetain,
float  timeoutVariationFrac 
)
inlinestaticprivate

Definition at line 167 of file MemoryIdler.h.

References flushLocalMallocCaches(), folly::detail::futexWaitUntil(), futexWaitUntil(), getVariationTimeout(), max, now(), folly::detail::TIMEDOUT, and unmapUnusedStack().

Referenced by futexWait(), and futexWaitUntil().

175  {
176  // idleTimeout < 0 means no flush behavior
177  if (idleTimeout < IdleTime::zero()) {
178  return false;
179  }
180 
181  // idleTimeout == 0 means flush immediately, without variation
182  // idleTimeout > 0 means flush after delay, with variation
183  if (idleTimeout > IdleTime::zero()) {
184  idleTimeout = std::max(
185  IdleTime::zero(),
186  getVariationTimeout(idleTimeout, timeoutVariationFrac));
187  }
188  if (idleTimeout > IdleTime::zero()) {
189  auto idleDeadline = Deadline::clock::now() + idleTimeout;
190  if (idleDeadline < deadline) {
192  auto rv = futexWaitUntil(&fut, expected, idleDeadline, waitMask);
193  if (rv != FutexResult::TIMEDOUT) {
194  // finished before timeout hit, no flush
195  _ret = rv;
196  return true;
197  }
198  }
199  }
200 
201  // flush, then wait
203  unmapUnusedStack(stackToRetain);
204  return false;
205  }
static FutexResult futexWaitUntil(Futex &fut, uint32_t expected, Deadline const &deadline, uint32_t waitMask=-1, IdleTime const &idleTimeout=defaultIdleTimeout.load(std::memory_order_acquire), size_t stackToRetain=kDefaultStackToRetain, float timeoutVariationFrac=0.5)
Definition: MemoryIdler.h:139
static void unmapUnusedStack(size_t retain=kDefaultStackToRetain)
LogLevel max
Definition: LogLevel.cpp:31
static IdleTime getVariationTimeout(IdleTime const &idleTimeout=defaultIdleTimeout.load(std::memory_order_acquire), float timeoutVariationFrac=0.5)
Definition: MemoryIdler.h:70
std::chrono::steady_clock::time_point now()
FutexResult futexWaitUntil(const Futex *futex, uint32_t expected, std::chrono::time_point< Clock, Duration > const &deadline, uint32_t waitMask)
Definition: Futex-inl.h:112
static void flushLocalMallocCaches()
Definition: MemoryIdler.cpp:41
template<typename Futex , typename Deadline , typename IdleTime = std::chrono::steady_clock::duration>
static FutexResult folly::detail::MemoryIdler::futexWaitUntil ( Futex fut,
uint32_t  expected,
Deadline const &  deadline,
uint32_t  waitMask = -1,
IdleTime const &  idleTimeout = defaultIdleTimeout.load(std::memory_order_acquire),
size_t  stackToRetain = kDefaultStackToRetain,
float  timeoutVariationFrac = 0.5 
)
inlinestatic

Equivalent to fut.futexWaitUntil(expected, deadline, waitMask), but calls flushLocalMallocCaches() and unmapUnusedStack(stackToRetain) after idleTimeout has passed (if it has passed). Internally uses fut.futexWaitUntil. The actual timeout will be pseudo-randomly chosen to be between idleTimeout and idleTimeout * (1 + timeoutVariationFraction), to smooth out the behavior in a system with bursty requests. The default is to wait up to 50% extra, so on average 25% extra.

Definition at line 139 of file MemoryIdler.h.

References futexWaitPreIdle(), and folly::detail::futexWaitUntil().

Referenced by futexWaitPreIdle(), TEST(), folly::Baton< MayBlock, Atom >::tryWaitSlow(), and folly::SaturatingSemaphore< MayBlock, Atom >::tryWaitSlow().

147  {
148  FutexResult pre;
149  if (futexWaitPreIdle(
150  pre,
151  fut,
152  expected,
153  deadline,
154  waitMask,
155  idleTimeout,
156  stackToRetain,
157  timeoutVariationFrac)) {
158  return pre;
159  }
160 
162  return futexWaitUntil(&fut, expected, deadline, waitMask);
163  }
static FutexResult futexWaitUntil(Futex &fut, uint32_t expected, Deadline const &deadline, uint32_t waitMask=-1, IdleTime const &idleTimeout=defaultIdleTimeout.load(std::memory_order_acquire), size_t stackToRetain=kDefaultStackToRetain, float timeoutVariationFrac=0.5)
Definition: MemoryIdler.h:139
static bool futexWaitPreIdle(FutexResult &_ret, Futex &fut, uint32_t expected, Deadline const &deadline, uint32_t waitMask, IdleTime idleTimeout, size_t stackToRetain, float timeoutVariationFrac)
Definition: MemoryIdler.h:167
FutexResult futexWaitUntil(const Futex *futex, uint32_t expected, std::chrono::time_point< Clock, Duration > const &deadline, uint32_t waitMask)
Definition: Futex-inl.h:112
template<typename IdleTime = std::chrono::steady_clock::duration>
static IdleTime folly::detail::MemoryIdler::getVariationTimeout ( IdleTime const &  idleTimeout = defaultIdleTimeout.load(std::memory_order_acquire),
float  timeoutVariationFrac = 0.5 
)
inlinestatic

Selects a timeout pseudo-randomly chosen to be between idleTimeout and idleTimeout * (1 + timeoutVariationFraction), to smooth out the behavior in a bursty system

Definition at line 70 of file MemoryIdler.h.

References count, folly::getCurrentThreadID(), h, folly::hash::hash_combine(), max, now(), folly::hash::twang_mix64(), and uint64_t.

Referenced by futexWaitPreIdle(), and folly::MemoryIdlerTimeout::runLoopCallback().

73  {
74  if (idleTimeout <= IdleTime::zero() || timeoutVariationFrac <= 0) {
75  return idleTimeout;
76  }
77 
78  // hash the pthread_t and the time to get the adjustment
79  // Standard hash func isn't very good, so bit mix the result
82  std::chrono::system_clock::now().time_since_epoch().count()));
83 
84  // multiplying the duration by a floating point doesn't work, grr
85  auto extraFrac = timeoutVariationFrac /
86  static_cast<float>(std::numeric_limits<uint64_t>::max()) * h;
87  auto tics = uint64_t(idleTimeout.count() * (1 + extraFrac));
88  return IdleTime(tics);
89  }
*than *hazptr_holder h
Definition: Hazptr.h:116
LogLevel max
Definition: LogLevel.cpp:31
std::chrono::steady_clock::time_point now()
size_t hash_combine(const T &t, const Ts &...ts) noexcept(noexcept(hash_combine_generic(StdHasher{}, t, ts...)))
Definition: Hash.h:669
int * count
uint64_t twang_mix64(uint64_t key) noexcept
Definition: Hash.h:49
uint64_t getCurrentThreadID()
Definition: ThreadId.h:42
void folly::detail::MemoryIdler::unmapUnusedStack ( size_t  retain = kDefaultStackToRetain)
static

Uses madvise to discard the portion of the thread's stack that currently doesn't hold any data, trying to ensure that no page faults will occur during the next retain bytes of stack allocation

Definition at line 199 of file MemoryIdler.cpp.

Referenced by BENCHMARK(), flushLocalMallocCaches(), futexWaitPreIdle(), folly::MemoryIdlerTimeout::runLoopCallback(), and TEST().

199 {}

Member Data Documentation

AtomicStruct< std::chrono::steady_clock::duration > folly::detail::MemoryIdler::defaultIdleTimeout
static

The system-wide default for the amount of time a blocking thread should wait before reclaiming idle memory. Set this to Duration::max() to never wait. The default value is 5 seconds. Endpoints using this idle timeout might randomly wait longer to avoid synchronizing their flushes.

Definition at line 64 of file MemoryIdler.h.

Referenced by folly::MemoryIdlerTimeout::runLoopCallback(), and TEST().


The documentation for this struct was generated from the following files: