proxygen
SharedMutex.h
Go to the documentation of this file.
1 /*
2  * Copyright 2015-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // @author Nathan Bronson (ngbronson@fb.com)
18 
19 #pragma once
20 
21 #include <stdint.h>
22 
23 #include <atomic>
24 #include <thread>
25 #include <type_traits>
26 
27 #include <folly/CPortability.h>
28 #include <folly/Likely.h>
30 #include <folly/detail/Futex.h>
31 #include <folly/portability/Asm.h>
34 
35 // SharedMutex is a reader-writer lock. It is small, very fast, scalable
36 // on multi-core, and suitable for use when readers or writers may block.
37 // Unlike most other reader-writer locks, its throughput with concurrent
38 // readers scales linearly; it is able to acquire and release the lock
39 // in shared mode without cache line ping-ponging. It is suitable for
40 // a wide range of lock hold times because it starts with spinning,
41 // proceeds to using sched_yield with a preemption heuristic, and then
42 // waits using futex and precise wakeups.
43 //
44 // SharedMutex provides all of the methods of folly::RWSpinLock,
45 // boost::shared_mutex, boost::upgrade_mutex, and C++14's
46 // std::shared_timed_mutex. All operations that can block are available
47 // in try, try-for, and try-until (system_clock or steady_clock) versions.
48 //
49 // SharedMutexReadPriority gives priority to readers,
50 // SharedMutexWritePriority gives priority to writers. SharedMutex is an
51 // alias for SharedMutexWritePriority, because writer starvation is more
52 // likely than reader starvation for the read-heavy workloads targetted
53 // by SharedMutex.
54 //
55 // In my tests SharedMutex is as good or better than the other
56 // reader-writer locks in use at Facebook for almost all use cases,
57 // sometimes by a wide margin. (If it is rare that there are actually
58 // concurrent readers then RWSpinLock can be a few nanoseconds faster.)
59 // I compared it to folly::RWSpinLock, folly::RWTicketSpinLock64,
60 // boost::shared_mutex, pthread_rwlock_t, and a RWLock that internally uses
61 // spinlocks to guard state and pthread_mutex_t+pthread_cond_t to block.
62 // (Thrift's ReadWriteMutex is based underneath on pthread_rwlock_t.)
63 // It is generally as good or better than the rest when evaluating size,
64 // speed, scalability, or latency outliers. In the corner cases where
65 // it is not the fastest (such as single-threaded use or heavy write
66 // contention) it is never very much worse than the best. See the bottom
67 // of folly/test/SharedMutexTest.cpp for lots of microbenchmark results.
68 //
69 // Comparison to folly::RWSpinLock:
70 //
71 // * SharedMutex is faster than RWSpinLock when there are actually
72 // concurrent read accesses (sometimes much faster), and ~5 nanoseconds
73 // slower when there is not actually any contention. SharedMutex is
74 // faster in every (benchmarked) scenario where the shared mode of
75 // the lock is actually useful.
76 //
77 // * Concurrent shared access to SharedMutex scales linearly, while total
78 // RWSpinLock throughput drops as more threads try to access the lock
79 // in shared mode. Under very heavy read contention SharedMutex can
80 // be two orders of magnitude faster than RWSpinLock (or any reader
81 // writer lock that doesn't use striping or deferral).
82 //
83 // * SharedMutex can safely protect blocking calls, because after an
84 // initial period of spinning it waits using futex().
85 //
86 // * RWSpinLock prioritizes readers, SharedMutex has both reader- and
87 // writer-priority variants, but defaults to write priority.
88 //
89 // * RWSpinLock's upgradeable mode blocks new readers, while SharedMutex's
90 // doesn't. Both semantics are reasonable. The boost documentation
91 // doesn't explicitly talk about this behavior (except by omitting
92 // any statement that those lock modes conflict), but the boost
93 // implementations do allow new readers while the upgradeable mode
94 // is held. See https://github.com/boostorg/thread/blob/master/
95 // include/boost/thread/pthread/shared_mutex.hpp
96 //
97 // * RWSpinLock::UpgradedHolder maps to SharedMutex::UpgradeHolder
98 // (UpgradeableHolder would be even more pedantically correct).
99 // SharedMutex's holders have fewer methods (no reset) and are less
100 // tolerant (promotion and downgrade crash if the donor doesn't own
101 // the lock, and you must use the default constructor rather than
102 // passing a nullptr to the pointer constructor).
103 //
104 // Both SharedMutex and RWSpinLock provide "exclusive", "upgrade",
105 // and "shared" modes. At all times num_threads_holding_exclusive +
106 // num_threads_holding_upgrade <= 1, and num_threads_holding_exclusive ==
107 // 0 || num_threads_holding_shared == 0. RWSpinLock has the additional
108 // constraint that num_threads_holding_shared cannot increase while
109 // num_threads_holding_upgrade is non-zero.
110 //
111 // Comparison to the internal RWLock:
112 //
113 // * SharedMutex doesn't allow a maximum reader count to be configured,
114 // so it can't be used as a semaphore in the same way as RWLock.
115 //
116 // * SharedMutex is 4 bytes, RWLock is 256.
117 //
118 // * SharedMutex is as fast or faster than RWLock in all of my
119 // microbenchmarks, and has positive rather than negative scalability.
120 //
121 // * RWLock and SharedMutex are both writer priority locks.
122 //
123 // * SharedMutex avoids latency outliers as well as RWLock.
124 //
125 // * SharedMutex uses different names (t != 0 below):
126 //
127 // RWLock::lock(0) => SharedMutex::lock()
128 //
129 // RWLock::lock(t) => SharedMutex::try_lock_for(milliseconds(t))
130 //
131 // RWLock::tryLock() => SharedMutex::try_lock()
132 //
133 // RWLock::unlock() => SharedMutex::unlock()
134 //
135 // RWLock::enter(0) => SharedMutex::lock_shared()
136 //
137 // RWLock::enter(t) =>
138 // SharedMutex::try_lock_shared_for(milliseconds(t))
139 //
140 // RWLock::tryEnter() => SharedMutex::try_lock_shared()
141 //
142 // RWLock::leave() => SharedMutex::unlock_shared()
143 //
144 // * RWLock allows the reader count to be adjusted by a value other
145 // than 1 during enter() or leave(). SharedMutex doesn't currently
146 // implement this feature.
147 //
148 // * RWLock's methods are marked const, SharedMutex's aren't.
149 //
150 // Reader-writer locks have the potential to allow concurrent access
151 // to shared read-mostly data, but in practice they often provide no
152 // improvement over a mutex. The problem is the cache coherence protocol
153 // of modern CPUs. Coherence is provided by making sure that when a cache
154 // line is written it is present in only one core's cache. Since a memory
155 // write is required to acquire a reader-writer lock in shared mode, the
156 // cache line holding the lock is invalidated in all of the other caches.
157 // This leads to cache misses when another thread wants to acquire or
158 // release the lock concurrently. When the RWLock is colocated with the
159 // data it protects (common), cache misses can also continue occur when
160 // a thread that already holds the lock tries to read the protected data.
161 //
162 // Ideally, a reader-writer lock would allow multiple cores to acquire
163 // and release the lock in shared mode without incurring any cache misses.
164 // This requires that each core records its shared access in a cache line
165 // that isn't read or written by other read-locking cores. (Writers will
166 // have to check all of the cache lines.) Typical server hardware when
167 // this comment was written has 16 L1 caches and cache lines of 64 bytes,
168 // so a lock striped over all L1 caches would occupy a prohibitive 1024
169 // bytes. Nothing says that we need a separate set of per-core memory
170 // locations for each lock, however. Each SharedMutex instance is only
171 // 4 bytes, but all locks together share a 2K area in which they make a
172 // core-local record of lock acquisitions.
173 //
174 // SharedMutex's strategy of using a shared set of core-local stripes has
175 // a potential downside, because it means that acquisition of any lock in
176 // write mode can conflict with acquisition of any lock in shared mode.
177 // If a lock instance doesn't actually experience concurrency then this
178 // downside will outweight the upside of improved scalability for readers.
179 // To avoid this problem we dynamically detect concurrent accesses to
180 // SharedMutex, and don't start using the deferred mode unless we actually
181 // observe concurrency. See kNumSharedToStartDeferring.
182 //
183 // It is explicitly allowed to call unlock_shared() from a different
184 // thread than lock_shared(), so long as they are properly paired.
185 // unlock_shared() needs to find the location at which lock_shared()
186 // recorded the lock, which might be in the lock itself or in any of
187 // the shared slots. If you can conveniently pass state from lock
188 // acquisition to release then the fastest mechanism is to std::move
189 // the SharedMutex::ReadHolder instance or an SharedMutex::Token (using
190 // lock_shared(Token&) and unlock_shared(Token&)). The guard or token
191 // will tell unlock_shared where in deferredReaders[] to look for the
192 // deferred lock. The Token-less version of unlock_shared() works in all
193 // cases, but is optimized for the common (no inter-thread handoff) case.
194 //
195 // In both read- and write-priority mode, a waiting lock() (exclusive mode)
196 // only blocks readers after it has waited for an active upgrade lock to be
197 // released; until the upgrade lock is released (or upgraded or downgraded)
198 // readers will still be able to enter. Preferences about lock acquisition
199 // are not guaranteed to be enforced perfectly (even if they were, there
200 // is theoretically the chance that a thread could be arbitrarily suspended
201 // between calling lock() and SharedMutex code actually getting executed).
202 //
203 // try_*_for methods always try at least once, even if the duration
204 // is zero or negative. The duration type must be compatible with
205 // std::chrono::steady_clock. try_*_until methods also always try at
206 // least once. std::chrono::system_clock and std::chrono::steady_clock
207 // are supported.
208 //
209 // If you have observed by profiling that your SharedMutex-s are getting
210 // cache misses on deferredReaders[] due to another SharedMutex user, then
211 // you can use the tag type to create your own instantiation of the type.
212 // The contention threshold (see kNumSharedToStartDeferring) should make
213 // this unnecessary in all but the most extreme cases. Make sure to check
214 // that the increased icache and dcache footprint of the tagged result is
215 // worth it.
216 
217 // SharedMutex's use of thread local storage is an optimization, so
218 // for the case where thread local storage is not supported, define it
219 // away.
220 
221 // Note about TSAN (ThreadSanitizer): the SharedMutexWritePriority version
222 // (the default) of this mutex is annotated appropriately so that TSAN can
223 // perform lock inversion analysis. However, the SharedMutexReadPriority version
224 // is not annotated. This is because TSAN's lock order heuristic
225 // assumes that two calls to lock_shared must be ordered, which leads
226 // to too many false positives for the reader-priority case.
227 //
228 // Suppose thread A holds a SharedMutexWritePriority lock in shared mode and an
229 // independent thread B is waiting for exclusive access. Then a thread C's
230 // lock_shared can't proceed until A has released the lock. Discounting
231 // situations that never use exclusive mode (so no lock is necessary at all)
232 // this means that without higher-level reasoning it is not safe to ignore
233 // reader <-> reader interactions.
234 //
235 // This reasoning does not apply to SharedMutexReadPriority, because there are
236 // no actions by a thread B that can make C need to wait for A. Since the
237 // overwhelming majority of SharedMutex instances use write priority, we
238 // restrict the TSAN annotations to only SharedMutexWritePriority.
239 
240 #ifndef FOLLY_SHAREDMUTEX_TLS
241 #if !FOLLY_MOBILE
242 #define FOLLY_SHAREDMUTEX_TLS FOLLY_TLS
243 #else
244 #define FOLLY_SHAREDMUTEX_TLS
245 #endif
246 #endif
247 
248 namespace folly {
249 
251  enum class Type : uint16_t {
252  INVALID = 0,
255  };
256 
259 };
260 
261 namespace detail {
262 // Returns a guard that gives permission for the current thread to
263 // annotate, and adjust the annotation bits in, the SharedMutex at ptr.
264 std::unique_lock<std::mutex> sharedMutexAnnotationGuard(void* ptr);
265 } // namespace detail
266 
267 template <
268  bool ReaderPriority,
269  typename Tag_ = void,
270  template <typename> class Atom = std::atomic,
271  bool BlockImmediately = false,
272  bool AnnotateForThreadSanitizer = kIsSanitizeThread && !ReaderPriority>
274  public:
275  static constexpr bool kReaderPriority = ReaderPriority;
276 
277  typedef Tag_ Tag;
278 
280 
281  class ReadHolder;
282  class UpgradeHolder;
283  class WriteHolder;
284 
285  constexpr SharedMutexImpl() noexcept : state_(0) {}
286 
287  SharedMutexImpl(const SharedMutexImpl&) = delete;
288  SharedMutexImpl(SharedMutexImpl&&) = delete;
289  SharedMutexImpl& operator=(const SharedMutexImpl&) = delete;
290  SharedMutexImpl& operator=(SharedMutexImpl&&) = delete;
291 
292  // It is an error to destroy an SharedMutex that still has
293  // any outstanding locks. This is checked if NDEBUG isn't defined.
294  // SharedMutex's exclusive mode can be safely used to guard the lock's
295  // own destruction. If, for example, you acquire the lock in exclusive
296  // mode and then observe that the object containing the lock is no longer
297  // needed, you can unlock() and then immediately destroy the lock.
298  // See https://sourceware.org/bugzilla/show_bug.cgi?id=13690 for a
299  // description about why this property needs to be explicitly mentioned.
301  auto state = state_.load(std::memory_order_relaxed);
302  if (UNLIKELY((state & kHasS) != 0)) {
303  cleanupTokenlessSharedDeferred(state);
304  }
305 
306 #ifndef NDEBUG
307  // These asserts check that everybody has released the lock before it
308  // is destroyed. If you arrive here while debugging that is likely
309  // the problem. (You could also have general heap corruption.)
310 
311  // if a futexWait fails to go to sleep because the value has been
312  // changed, we don't necessarily clean up the wait bits, so it is
313  // possible they will be set here in a correct system
314  assert((state & ~(kWaitingAny | kMayDefer | kAnnotationCreated)) == 0);
315  if ((state & kMayDefer) != 0) {
316  for (uint32_t slot = 0; slot < kMaxDeferredReaders; ++slot) {
317  auto slotValue = deferredReader(slot)->load(std::memory_order_relaxed);
318  assert(!slotValueIsThis(slotValue));
319  }
320  }
321 #endif
322  annotateDestroy();
323  }
324 
325  void lock() {
326  WaitForever ctx;
327  (void)lockExclusiveImpl(kHasSolo, ctx);
328  annotateAcquired(annotate_rwlock_level::wrlock);
329  }
330 
331  bool try_lock() {
332  WaitNever ctx;
333  auto result = lockExclusiveImpl(kHasSolo, ctx);
334  annotateTryAcquired(result, annotate_rwlock_level::wrlock);
335  return result;
336  }
337 
338  template <class Rep, class Period>
339  bool try_lock_for(const std::chrono::duration<Rep, Period>& duration) {
340  WaitForDuration<Rep, Period> ctx(duration);
341  auto result = lockExclusiveImpl(kHasSolo, ctx);
342  annotateTryAcquired(result, annotate_rwlock_level::wrlock);
343  return result;
344  }
345 
346  template <class Clock, class Duration>
348  const std::chrono::time_point<Clock, Duration>& absDeadline) {
349  WaitUntilDeadline<Clock, Duration> ctx{absDeadline};
350  auto result = lockExclusiveImpl(kHasSolo, ctx);
351  annotateTryAcquired(result, annotate_rwlock_level::wrlock);
352  return result;
353  }
354 
355  void unlock() {
356  annotateReleased(annotate_rwlock_level::wrlock);
357  // It is possible that we have a left-over kWaitingNotS if the last
358  // unlock_shared() that let our matching lock() complete finished
359  // releasing before lock()'s futexWait went to sleep. Clean it up now
360  auto state = (state_ &= ~(kWaitingNotS | kPrevDefer | kHasE));
361  assert((state & ~(kWaitingAny | kAnnotationCreated)) == 0);
362  wakeRegisteredWaiters(state, kWaitingE | kWaitingU | kWaitingS);
363  }
364 
365  // Managing the token yourself makes unlock_shared a bit faster
366 
367  void lock_shared() {
368  WaitForever ctx;
369  (void)lockSharedImpl(nullptr, ctx);
370  annotateAcquired(annotate_rwlock_level::rdlock);
371  }
372 
373  void lock_shared(Token& token) {
374  WaitForever ctx;
375  (void)lockSharedImpl(&token, ctx);
376  annotateAcquired(annotate_rwlock_level::rdlock);
377  }
378 
380  WaitNever ctx;
381  auto result = lockSharedImpl(nullptr, ctx);
382  annotateTryAcquired(result, annotate_rwlock_level::rdlock);
383  return result;
384  }
385 
386  bool try_lock_shared(Token& token) {
387  WaitNever ctx;
388  auto result = lockSharedImpl(&token, ctx);
389  annotateTryAcquired(result, annotate_rwlock_level::rdlock);
390  return result;
391  }
392 
393  template <class Rep, class Period>
394  bool try_lock_shared_for(const std::chrono::duration<Rep, Period>& duration) {
395  WaitForDuration<Rep, Period> ctx(duration);
396  auto result = lockSharedImpl(nullptr, ctx);
397  annotateTryAcquired(result, annotate_rwlock_level::rdlock);
398  return result;
399  }
400 
401  template <class Rep, class Period>
403  const std::chrono::duration<Rep, Period>& duration,
404  Token& token) {
405  WaitForDuration<Rep, Period> ctx(duration);
406  auto result = lockSharedImpl(&token, ctx);
407  annotateTryAcquired(result, annotate_rwlock_level::rdlock);
408  return result;
409  }
410 
411  template <class Clock, class Duration>
413  const std::chrono::time_point<Clock, Duration>& absDeadline) {
414  WaitUntilDeadline<Clock, Duration> ctx{absDeadline};
415  auto result = lockSharedImpl(nullptr, ctx);
416  annotateTryAcquired(result, annotate_rwlock_level::rdlock);
417  return result;
418  }
419 
420  template <class Clock, class Duration>
422  const std::chrono::time_point<Clock, Duration>& absDeadline,
423  Token& token) {
424  WaitUntilDeadline<Clock, Duration> ctx{absDeadline};
425  auto result = lockSharedImpl(&token, ctx);
426  annotateTryAcquired(result, annotate_rwlock_level::rdlock);
427  return result;
428  }
429 
430  void unlock_shared() {
431  annotateReleased(annotate_rwlock_level::rdlock);
432 
433  auto state = state_.load(std::memory_order_acquire);
434 
435  // kPrevDefer can only be set if HasE or BegunE is set
436  assert((state & (kPrevDefer | kHasE | kBegunE)) != kPrevDefer);
437 
438  // lock() strips kMayDefer immediately, but then copies it to
439  // kPrevDefer so we can tell if the pre-lock() lock_shared() might
440  // have deferred
441  if ((state & (kMayDefer | kPrevDefer)) == 0 ||
442  !tryUnlockTokenlessSharedDeferred()) {
443  // Matching lock_shared() couldn't have deferred, or the deferred
444  // lock has already been inlined by applyDeferredReaders()
445  unlockSharedInline();
446  }
447  }
448 
449  void unlock_shared(Token& token) {
450  annotateReleased(annotate_rwlock_level::rdlock);
451 
452  assert(
453  token.type_ == Token::Type::INLINE_SHARED ||
454  token.type_ == Token::Type::DEFERRED_SHARED);
455 
456  if (token.type_ != Token::Type::DEFERRED_SHARED ||
457  !tryUnlockSharedDeferred(token.slot_)) {
458  unlockSharedInline();
459  }
460 #ifndef NDEBUG
461  token.type_ = Token::Type::INVALID;
462 #endif
463  }
464 
466  annotateReleased(annotate_rwlock_level::wrlock);
467  annotateAcquired(annotate_rwlock_level::rdlock);
468  // We can't use state_ -=, because we need to clear 2 bits (1 of which
469  // has an uncertain initial state) and set 1 other. We might as well
470  // clear the relevant wake bits at the same time. Note that since S
471  // doesn't block the beginning of a transition to E (writer priority
472  // can cut off new S, reader priority grabs BegunE and blocks deferred
473  // S) we need to wake E as well.
474  auto state = state_.load(std::memory_order_acquire);
475  do {
476  assert(
477  (state & ~(kWaitingAny | kPrevDefer | kAnnotationCreated)) == kHasE);
478  } while (!state_.compare_exchange_strong(
479  state, (state & ~(kWaitingAny | kPrevDefer | kHasE)) + kIncrHasS));
480  if ((state & (kWaitingE | kWaitingU | kWaitingS)) != 0) {
481  futexWakeAll(kWaitingE | kWaitingU | kWaitingS);
482  }
483  }
484 
485  void unlock_and_lock_shared(Token& token) {
486  unlock_and_lock_shared();
487  token.type_ = Token::Type::INLINE_SHARED;
488  }
489 
490  void lock_upgrade() {
491  WaitForever ctx;
492  (void)lockUpgradeImpl(ctx);
493  // For TSAN: treat upgrade locks as equivalent to read locks
494  annotateAcquired(annotate_rwlock_level::rdlock);
495  }
496 
498  WaitNever ctx;
499  auto result = lockUpgradeImpl(ctx);
500  annotateTryAcquired(result, annotate_rwlock_level::rdlock);
501  return result;
502  }
503 
504  template <class Rep, class Period>
506  const std::chrono::duration<Rep, Period>& duration) {
507  WaitForDuration<Rep, Period> ctx(duration);
508  auto result = lockUpgradeImpl(ctx);
509  annotateTryAcquired(result, annotate_rwlock_level::rdlock);
510  return result;
511  }
512 
513  template <class Clock, class Duration>
515  const std::chrono::time_point<Clock, Duration>& absDeadline) {
516  WaitUntilDeadline<Clock, Duration> ctx{absDeadline};
517  auto result = lockUpgradeImpl(ctx);
518  annotateTryAcquired(result, annotate_rwlock_level::rdlock);
519  return result;
520  }
521 
522  void unlock_upgrade() {
523  annotateReleased(annotate_rwlock_level::rdlock);
524  auto state = (state_ -= kHasU);
525  assert((state & (kWaitingNotS | kHasSolo)) == 0);
526  wakeRegisteredWaiters(state, kWaitingE | kWaitingU);
527  }
528 
530  // no waiting necessary, so waitMask is empty
531  WaitForever ctx;
532  (void)lockExclusiveImpl(0, ctx);
533  annotateReleased(annotate_rwlock_level::rdlock);
534  annotateAcquired(annotate_rwlock_level::wrlock);
535  }
536 
538  // No need to annotate for TSAN here because we model upgrade and shared
539  // locks as the same.
540  auto state = (state_ -= kHasU - kIncrHasS);
541  assert((state & (kWaitingNotS | kHasSolo)) == 0);
542  wakeRegisteredWaiters(state, kWaitingE | kWaitingU);
543  }
544 
545  void unlock_upgrade_and_lock_shared(Token& token) {
546  unlock_upgrade_and_lock_shared();
547  token.type_ = Token::Type::INLINE_SHARED;
548  }
549 
551  annotateReleased(annotate_rwlock_level::wrlock);
552  annotateAcquired(annotate_rwlock_level::rdlock);
553  // We can't use state_ -=, because we need to clear 2 bits (1 of
554  // which has an uncertain initial state) and set 1 other. We might
555  // as well clear the relevant wake bits at the same time.
556  auto state = state_.load(std::memory_order_acquire);
557  while (true) {
558  assert(
559  (state & ~(kWaitingAny | kPrevDefer | kAnnotationCreated)) == kHasE);
560  auto after =
561  (state & ~(kWaitingNotS | kWaitingS | kPrevDefer | kHasE)) + kHasU;
562  if (state_.compare_exchange_strong(state, after)) {
563  if ((state & kWaitingS) != 0) {
564  futexWakeAll(kWaitingS);
565  }
566  return;
567  }
568  }
569  }
570 
571  private:
573 
574  // Internally we use four kinds of wait contexts. These are structs
575  // that provide a doWait method that returns true if a futex wake
576  // was issued that intersects with the waitMask, false if there was a
577  // timeout and no more waiting should be performed. Spinning occurs
578  // before the wait context is invoked.
579 
580  struct WaitForever {
581  bool canBlock() {
582  return true;
583  }
584  bool canTimeOut() {
585  return false;
586  }
587  bool shouldTimeOut() {
588  return false;
589  }
590 
591  bool doWait(Futex& futex, uint32_t expected, uint32_t waitMask) {
592  detail::futexWait(&futex, expected, waitMask);
593  return true;
594  }
595  };
596 
597  struct WaitNever {
598  bool canBlock() {
599  return false;
600  }
601  bool canTimeOut() {
602  return true;
603  }
604  bool shouldTimeOut() {
605  return true;
606  }
607 
608  bool doWait(
609  Futex& /* futex */,
610  uint32_t /* expected */,
611  uint32_t /* waitMask */) {
612  return false;
613  }
614  };
615 
616  template <class Rep, class Period>
618  std::chrono::duration<Rep, Period> duration_;
620  std::chrono::steady_clock::time_point deadline_;
621 
622  explicit WaitForDuration(const std::chrono::duration<Rep, Period>& duration)
623  : duration_(duration), deadlineComputed_(false) {}
624 
625  std::chrono::steady_clock::time_point deadline() {
626  if (!deadlineComputed_) {
627  deadline_ = std::chrono::steady_clock::now() + duration_;
628  deadlineComputed_ = true;
629  }
630  return deadline_;
631  }
632 
633  bool canBlock() {
634  return duration_.count() > 0;
635  }
636  bool canTimeOut() {
637  return true;
638  }
639 
640  bool shouldTimeOut() {
641  return std::chrono::steady_clock::now() > deadline();
642  }
643 
644  bool doWait(Futex& futex, uint32_t expected, uint32_t waitMask) {
645  auto result =
646  detail::futexWaitUntil(&futex, expected, deadline(), waitMask);
647  return result != folly::detail::FutexResult::TIMEDOUT;
648  }
649  };
650 
651  template <class Clock, class Duration>
653  std::chrono::time_point<Clock, Duration> absDeadline_;
654 
655  bool canBlock() {
656  return true;
657  }
658  bool canTimeOut() {
659  return true;
660  }
661  bool shouldTimeOut() {
662  return Clock::now() > absDeadline_;
663  }
664 
665  bool doWait(Futex& futex, uint32_t expected, uint32_t waitMask) {
666  auto result =
667  detail::futexWaitUntil(&futex, expected, absDeadline_, waitMask);
668  return result != folly::detail::FutexResult::TIMEDOUT;
669  }
670  };
671 
673  if (AnnotateForThreadSanitizer &&
674  (state_.load() & kAnnotationCreated) == 0) {
676  // check again
677  if ((state_.load() & kAnnotationCreated) == 0) {
678  state_.fetch_or(kAnnotationCreated);
680  &state_, sizeof(state_), "init TSAN", __FILE__, __LINE__);
681  annotate_rwlock_create(this, __FILE__, __LINE__);
682  }
683  }
684  }
685 
687  if (AnnotateForThreadSanitizer) {
688  annotateLazyCreate();
689  annotate_rwlock_destroy(this, __FILE__, __LINE__);
690  }
691  }
692 
694  if (AnnotateForThreadSanitizer) {
695  annotateLazyCreate();
696  annotate_rwlock_acquired(this, w, __FILE__, __LINE__);
697  }
698  }
699 
701  if (AnnotateForThreadSanitizer) {
702  annotateLazyCreate();
703  annotate_rwlock_try_acquired(this, w, result, __FILE__, __LINE__);
704  }
705  }
706 
708  if (AnnotateForThreadSanitizer) {
709  assert((state_.load() & kAnnotationCreated) != 0);
710  annotate_rwlock_released(this, w, __FILE__, __LINE__);
711  }
712  }
713 
714  // 32 bits of state
715  Futex state_{};
716 
717  // S count needs to be on the end, because we explicitly allow it to
718  // underflow. This can occur while we are in the middle of applying
719  // deferred locks (we remove them from deferredReaders[] before
720  // inlining them), or during token-less unlock_shared() if a racing
721  // lock_shared();unlock_shared() moves the deferredReaders slot while
722  // the first unlock_shared() is scanning. The former case is cleaned
723  // up before we finish applying the locks. The latter case can persist
724  // until destruction, when it is cleaned up.
725  static constexpr uint32_t kIncrHasS = 1 << 11;
726  static constexpr uint32_t kHasS = ~(kIncrHasS - 1);
727 
728  // Set if annotation has been completed for this instance. That annotation
729  // (and setting this bit afterward) must be guarded by one of the mutexes in
730  // annotationCreationGuards.
731  static constexpr uint32_t kAnnotationCreated = 1 << 10;
732 
733  // If false, then there are definitely no deferred read locks for this
734  // instance. Cleared after initialization and when exclusively locked.
735  static constexpr uint32_t kMayDefer = 1 << 9;
736 
737  // lock() cleared kMayDefer as soon as it starts draining readers (so
738  // that it doesn't have to do a second CAS once drain completes), but
739  // unlock_shared() still needs to know whether to scan deferredReaders[]
740  // or not. We copy kMayDefer to kPrevDefer when setting kHasE or
741  // kBegunE, and clear it when clearing those bits.
742  static constexpr uint32_t kPrevDefer = 1 << 8;
743 
744  // Exclusive-locked blocks all read locks and write locks. This bit
745  // may be set before all readers have finished, but in that case the
746  // thread that sets it won't return to the caller until all read locks
747  // have been released.
748  static constexpr uint32_t kHasE = 1 << 7;
749 
750  // Exclusive-draining means that lock() is waiting for existing readers
751  // to leave, but that new readers may still acquire shared access.
752  // This is only used in reader priority mode. New readers during
753  // drain must be inline. The difference between this and kHasU is that
754  // kBegunE prevents kMayDefer from being set.
755  static constexpr uint32_t kBegunE = 1 << 6;
756 
757  // At most one thread may have either exclusive or upgrade lock
758  // ownership. Unlike exclusive mode, ownership of the lock in upgrade
759  // mode doesn't preclude other threads holding the lock in shared mode.
760  // boost's concept for this doesn't explicitly say whether new shared
761  // locks can be acquired one lock_upgrade has succeeded, but doesn't
762  // list that as disallowed. RWSpinLock disallows new read locks after
763  // lock_upgrade has been acquired, but the boost implementation doesn't.
764  // We choose the latter.
765  static constexpr uint32_t kHasU = 1 << 5;
766 
767  // There are three states that we consider to be "solo", in that they
768  // cannot coexist with other solo states. These are kHasE, kBegunE,
769  // and kHasU. Note that S doesn't conflict with any of these, because
770  // setting the kHasE is only one of the two steps needed to actually
771  // acquire the lock in exclusive mode (the other is draining the existing
772  // S holders).
773  static constexpr uint32_t kHasSolo = kHasE | kBegunE | kHasU;
774 
775  // Once a thread sets kHasE it needs to wait for the current readers
776  // to exit the lock. We give this a separate wait identity from the
777  // waiting to set kHasE so that we can perform partial wakeups (wake
778  // one instead of wake all).
779  static constexpr uint32_t kWaitingNotS = 1 << 4;
780 
781  // When waking writers we can either wake them all, in which case we
782  // can clear kWaitingE, or we can call futexWake(1). futexWake tells
783  // us if anybody woke up, but even if we detect that nobody woke up we
784  // can't clear the bit after the fact without issuing another wakeup.
785  // To avoid thundering herds when there are lots of pending lock()
786  // without needing to call futexWake twice when there is only one
787  // waiter, kWaitingE actually encodes if we have observed multiple
788  // concurrent waiters. Tricky: ABA issues on futexWait mean that when
789  // we see kWaitingESingle we can't assume that there is only one.
790  static constexpr uint32_t kWaitingESingle = 1 << 2;
791  static constexpr uint32_t kWaitingEMultiple = 1 << 3;
792  static constexpr uint32_t kWaitingE = kWaitingESingle | kWaitingEMultiple;
793 
794  // kWaitingU is essentially a 1 bit saturating counter. It always
795  // requires a wakeAll.
796  static constexpr uint32_t kWaitingU = 1 << 1;
797 
798  // All blocked lock_shared() should be awoken, so it is correct (not
799  // suboptimal) to wakeAll if there are any shared readers.
800  static constexpr uint32_t kWaitingS = 1 << 0;
801 
802  // kWaitingAny is a mask of all of the bits that record the state of
803  // threads, rather than the state of the lock. It is convenient to be
804  // able to mask them off during asserts.
805  static constexpr uint32_t kWaitingAny =
806  kWaitingNotS | kWaitingE | kWaitingU | kWaitingS;
807 
808  // The reader count at which a reader will attempt to use the lock
809  // in deferred mode. If this value is 2, then the second concurrent
810  // reader will set kMayDefer and use deferredReaders[]. kMayDefer is
811  // cleared during exclusive access, so this threshold must be reached
812  // each time a lock is held in exclusive mode.
813  static constexpr uint32_t kNumSharedToStartDeferring = 2;
814 
815  // The typical number of spins that a thread will wait for a state
816  // transition. There is no bound on the number of threads that can wait
817  // for a writer, so we are pretty conservative here to limit the chance
818  // that we are starving the writer of CPU. Each spin is 6 or 7 nanos,
819  // almost all of which is in the pause instruction.
820  static constexpr uint32_t kMaxSpinCount = !BlockImmediately ? 1000 : 2;
821 
822  // The maximum number of soft yields before falling back to futex.
823  // If the preemption heuristic is activated we will fall back before
824  // this. A soft yield takes ~900 nanos (two sched_yield plus a call
825  // to getrusage, with checks of the goal at each step). Soft yields
826  // aren't compatible with deterministic execution under test (unlike
827  // futexWaitUntil, which has a capricious but deterministic back end).
828  static constexpr uint32_t kMaxSoftYieldCount = !BlockImmediately ? 1000 : 0;
829 
830  // If AccessSpreader assigns indexes from 0..k*n-1 on a system where some
831  // level of the memory hierarchy is symmetrically divided into k pieces
832  // (NUMA nodes, last-level caches, L1 caches, ...), then slot indexes
833  // that are the same after integer division by k share that resource.
834  // Our strategy for deferred readers is to probe up to numSlots/4 slots,
835  // using the full granularity of AccessSpreader for the start slot
836  // and then search outward. We can use AccessSpreader::current(n)
837  // without managing our own spreader if kMaxDeferredReaders <=
838  // AccessSpreader::kMaxCpus, which is currently 128.
839  //
840  // Our 2-socket E5-2660 machines have 8 L1 caches on each chip,
841  // with 64 byte cache lines. That means we need 64*16 bytes of
842  // deferredReaders[] to give each L1 its own playground. On x86_64
843  // each DeferredReaderSlot is 8 bytes, so we need kMaxDeferredReaders
844  // * kDeferredSeparationFactor >= 64 * 16 / 8 == 128. If
845  // kDeferredSearchDistance * kDeferredSeparationFactor <=
846  // 64 / 8 then we will search only within a single cache line, which
847  // guarantees we won't have inter-L1 contention. We give ourselves
848  // a factor of 2 on the core count, which should hold us for a couple
849  // processor generations. deferredReaders[] is 2048 bytes currently.
850  public:
851  static constexpr uint32_t kMaxDeferredReaders = 64;
852  static constexpr uint32_t kDeferredSearchDistance = 2;
853  static constexpr uint32_t kDeferredSeparationFactor = 4;
854 
855  private:
856  static_assert(
857  !(kMaxDeferredReaders & (kMaxDeferredReaders - 1)),
858  "kMaxDeferredReaders must be a power of 2");
859  static_assert(
860  !(kDeferredSearchDistance & (kDeferredSearchDistance - 1)),
861  "kDeferredSearchDistance must be a power of 2");
862 
863  // The number of deferred locks that can be simultaneously acquired
864  // by a thread via the token-less methods without performing any heap
865  // allocations. Each of these costs 3 pointers (24 bytes, probably)
866  // per thread. There's not much point in making this larger than
867  // kDeferredSearchDistance.
868  static constexpr uint32_t kTokenStackTLSCapacity = 2;
869 
870  // We need to make sure that if there is a lock_shared()
871  // and lock_shared(token) followed by unlock_shared() and
872  // unlock_shared(token), the token-less unlock doesn't null
873  // out deferredReaders[token.slot_]. If we allowed that, then
874  // unlock_shared(token) wouldn't be able to assume that its lock
875  // had been inlined by applyDeferredReaders when it finds that
876  // deferredReaders[token.slot_] no longer points to this. We accomplish
877  // this by stealing bit 0 from the pointer to record that the slot's
878  // element has no token, hence our use of uintptr_t in deferredReaders[].
879  static constexpr uintptr_t kTokenless = 0x1;
880 
881  // This is the starting location for Token-less unlock_shared().
883 
884  // Last deferred reader slot used.
886 
887  // Only indexes divisible by kDeferredSeparationFactor are used.
888  // If any of those elements points to a SharedMutexImpl, then it
889  // should be considered that there is a shared lock on that instance.
890  // See kTokenless.
891  public:
892  typedef Atom<uintptr_t> DeferredReaderSlot;
893 
894  private:
895  alignas(hardware_destructive_interference_size) static DeferredReaderSlot
896  deferredReaders[kMaxDeferredReaders * kDeferredSeparationFactor];
897 
898  // Performs an exclusive lock, waiting for state_ & waitMask to be
899  // zero first
900  template <class WaitContext>
901  bool lockExclusiveImpl(uint32_t preconditionGoalMask, WaitContext& ctx) {
902  uint32_t state = state_.load(std::memory_order_acquire);
903  if (LIKELY(
904  (state & (preconditionGoalMask | kMayDefer | kHasS)) == 0 &&
905  state_.compare_exchange_strong(state, (state | kHasE) & ~kHasU))) {
906  return true;
907  } else {
908  return lockExclusiveImpl(state, preconditionGoalMask, ctx);
909  }
910  }
911 
912  template <class WaitContext>
914  uint32_t& state,
915  uint32_t preconditionGoalMask,
916  WaitContext& ctx) {
917  while (true) {
918  if (UNLIKELY((state & preconditionGoalMask) != 0) &&
919  !waitForZeroBits(state, preconditionGoalMask, kWaitingE, ctx) &&
920  ctx.canTimeOut()) {
921  return false;
922  }
923 
924  uint32_t after = (state & kMayDefer) == 0 ? 0 : kPrevDefer;
925  if (!kReaderPriority || (state & (kMayDefer | kHasS)) == 0) {
926  // Block readers immediately, either because we are in write
927  // priority mode or because we can acquire the lock in one
928  // step. Note that if state has kHasU, then we are doing an
929  // unlock_upgrade_and_lock() and we should clear it (reader
930  // priority branch also does this).
931  after |= (state | kHasE) & ~(kHasU | kMayDefer);
932  } else {
933  after |= (state | kBegunE) & ~(kHasU | kMayDefer);
934  }
935  if (state_.compare_exchange_strong(state, after)) {
936  auto before = state;
937  state = after;
938 
939  // If we set kHasE (writer priority) then no new readers can
940  // arrive. If we set kBegunE then they can still enter, but
941  // they must be inline. Either way we need to either spin on
942  // deferredReaders[] slots, or inline them so that we can wait on
943  // kHasS to zero itself. deferredReaders[] is pointers, which on
944  // x86_64 are bigger than futex() can handle, so we inline the
945  // deferred locks instead of trying to futexWait on each slot.
946  // Readers are responsible for rechecking state_ after recording
947  // a deferred read to avoid atomicity problems between the state_
948  // CAS and applyDeferredReader's reads of deferredReaders[].
949  if (UNLIKELY((before & kMayDefer) != 0)) {
950  applyDeferredReaders(state, ctx);
951  }
952  while (true) {
953  assert((state & (kHasE | kBegunE)) != 0 && (state & kHasU) == 0);
954  if (UNLIKELY((state & kHasS) != 0) &&
955  !waitForZeroBits(state, kHasS, kWaitingNotS, ctx) &&
956  ctx.canTimeOut()) {
957  // Ugh. We blocked new readers and other writers for a while,
958  // but were unable to complete. Move on. On the plus side
959  // we can clear kWaitingNotS because nobody else can piggyback
960  // on it.
961  state = (state_ &= ~(kPrevDefer | kHasE | kBegunE | kWaitingNotS));
962  wakeRegisteredWaiters(state, kWaitingE | kWaitingU | kWaitingS);
963  return false;
964  }
965 
966  if (kReaderPriority && (state & kHasE) == 0) {
967  assert((state & kBegunE) != 0);
968  if (!state_.compare_exchange_strong(
969  state, (state & ~kBegunE) | kHasE)) {
970  continue;
971  }
972  }
973 
974  return true;
975  }
976  }
977  }
978  }
979 
980  template <class WaitContext>
982  uint32_t& state,
983  uint32_t goal,
984  uint32_t waitMask,
985  WaitContext& ctx) {
986  uint32_t spinCount = 0;
987  while (true) {
988  state = state_.load(std::memory_order_acquire);
989  if ((state & goal) == 0) {
990  return true;
991  }
993  ++spinCount;
994  if (UNLIKELY(spinCount >= kMaxSpinCount)) {
995  return ctx.canBlock() &&
996  yieldWaitForZeroBits(state, goal, waitMask, ctx);
997  }
998  }
999  }
1000 
1001  template <class WaitContext>
1003  uint32_t& state,
1004  uint32_t goal,
1005  uint32_t waitMask,
1006  WaitContext& ctx) {
1007 #ifdef RUSAGE_THREAD
1008  struct rusage usage;
1009  std::memset(&usage, 0, sizeof(usage));
1010  long before = -1;
1011 #endif
1012  for (uint32_t yieldCount = 0; yieldCount < kMaxSoftYieldCount;
1013  ++yieldCount) {
1014  for (int softState = 0; softState < 3; ++softState) {
1015  if (softState < 2) {
1017  } else {
1018 #ifdef RUSAGE_THREAD
1019  getrusage(RUSAGE_THREAD, &usage);
1020 #endif
1021  }
1022  if (((state = state_.load(std::memory_order_acquire)) & goal) == 0) {
1023  return true;
1024  }
1025  if (ctx.shouldTimeOut()) {
1026  return false;
1027  }
1028  }
1029 #ifdef RUSAGE_THREAD
1030  if (before >= 0 && usage.ru_nivcsw >= before + 2) {
1031  // One involuntary csw might just be occasional background work,
1032  // but if we get two in a row then we guess that there is someone
1033  // else who can profitably use this CPU. Fall back to futex
1034  break;
1035  }
1036  before = usage.ru_nivcsw;
1037 #endif
1038  }
1039  return futexWaitForZeroBits(state, goal, waitMask, ctx);
1040  }
1041 
1042  template <class WaitContext>
1044  uint32_t& state,
1045  uint32_t goal,
1046  uint32_t waitMask,
1047  WaitContext& ctx) {
1048  assert(
1049  waitMask == kWaitingNotS || waitMask == kWaitingE ||
1050  waitMask == kWaitingU || waitMask == kWaitingS);
1051 
1052  while (true) {
1053  state = state_.load(std::memory_order_acquire);
1054  if ((state & goal) == 0) {
1055  return true;
1056  }
1057 
1058  auto after = state;
1059  if (waitMask == kWaitingE) {
1060  if ((state & kWaitingESingle) != 0) {
1061  after |= kWaitingEMultiple;
1062  } else {
1063  after |= kWaitingESingle;
1064  }
1065  } else {
1066  after |= waitMask;
1067  }
1068 
1069  // CAS is better than atomic |= here, because it lets us avoid
1070  // setting the wait flag when the goal is concurrently achieved
1071  if (after != state && !state_.compare_exchange_strong(state, after)) {
1072  continue;
1073  }
1074 
1075  if (!ctx.doWait(state_, after, waitMask)) {
1076  // timed out
1077  return false;
1078  }
1079  }
1080  }
1081 
1082  // Wakes up waiters registered in state_ as appropriate, clearing the
1083  // awaiting bits for anybody that was awoken. Tries to perform direct
1084  // single wakeup of an exclusive waiter if appropriate
1086  if (UNLIKELY((state & wakeMask) != 0)) {
1087  wakeRegisteredWaitersImpl(state, wakeMask);
1088  }
1089  }
1090 
1092  // If there are multiple lock() pending only one of them will actually
1093  // get to wake up, so issuing futexWakeAll will make a thundering herd.
1094  // There's nothing stopping us from issuing futexWake(1) instead,
1095  // so long as the wait bits are still an accurate reflection of
1096  // the waiters. If we notice (via futexWake's return value) that
1097  // nobody woke up then we can try again with the normal wake-all path.
1098  // Note that we can't just clear the bits at that point; we need to
1099  // clear the bits and then issue another wakeup.
1100  //
1101  // It is possible that we wake an E waiter but an outside S grabs the
1102  // lock instead, at which point we should wake pending U and S waiters.
1103  // Rather than tracking state to make the failing E regenerate the
1104  // wakeup, we just disable the optimization in the case that there
1105  // are waiting U or S that we are eligible to wake.
1106  if ((wakeMask & kWaitingE) == kWaitingE &&
1107  (state & wakeMask) == kWaitingE &&
1108  detail::futexWake(&state_, 1, kWaitingE) > 0) {
1109  // somebody woke up, so leave state_ as is and clear it later
1110  return;
1111  }
1112 
1113  if ((state & wakeMask) != 0) {
1114  auto prev = state_.fetch_and(~wakeMask);
1115  if ((prev & wakeMask) != 0) {
1116  futexWakeAll(wakeMask);
1117  }
1118  state = prev & ~wakeMask;
1119  }
1120  }
1121 
1122  void futexWakeAll(uint32_t wakeMask) {
1123  detail::futexWake(&state_, std::numeric_limits<int>::max(), wakeMask);
1124  }
1125 
1126  DeferredReaderSlot* deferredReader(uint32_t slot) {
1127  return &deferredReaders[slot * kDeferredSeparationFactor];
1128  }
1129 
1130  uintptr_t tokenfulSlotValue() {
1131  return reinterpret_cast<uintptr_t>(this);
1132  }
1133 
1134  uintptr_t tokenlessSlotValue() {
1135  return tokenfulSlotValue() | kTokenless;
1136  }
1137 
1138  bool slotValueIsThis(uintptr_t slotValue) {
1139  return (slotValue & ~kTokenless) == tokenfulSlotValue();
1140  }
1141 
1142  // Clears any deferredReaders[] that point to this, adjusting the inline
1143  // shared lock count to compensate. Does some spinning and yielding
1144  // to avoid the work. Always finishes the application, even if ctx
1145  // times out.
1146  template <class WaitContext>
1147  void applyDeferredReaders(uint32_t& state, WaitContext& ctx) {
1148  uint32_t slot = 0;
1149 
1150  uint32_t spinCount = 0;
1151  while (true) {
1152  while (!slotValueIsThis(
1153  deferredReader(slot)->load(std::memory_order_acquire))) {
1154  if (++slot == kMaxDeferredReaders) {
1155  return;
1156  }
1157  }
1159  if (UNLIKELY(++spinCount >= kMaxSpinCount)) {
1160  applyDeferredReaders(state, ctx, slot);
1161  return;
1162  }
1163  }
1164  }
1165 
1166  template <class WaitContext>
1167  void applyDeferredReaders(uint32_t& state, WaitContext& ctx, uint32_t slot) {
1168 #ifdef RUSAGE_THREAD
1169  struct rusage usage;
1170  std::memset(&usage, 0, sizeof(usage));
1171  long before = -1;
1172 #endif
1173  for (uint32_t yieldCount = 0; yieldCount < kMaxSoftYieldCount;
1174  ++yieldCount) {
1175  for (int softState = 0; softState < 3; ++softState) {
1176  if (softState < 2) {
1178  } else {
1179 #ifdef RUSAGE_THREAD
1180  getrusage(RUSAGE_THREAD, &usage);
1181 #endif
1182  }
1183  while (!slotValueIsThis(
1184  deferredReader(slot)->load(std::memory_order_acquire))) {
1185  if (++slot == kMaxDeferredReaders) {
1186  return;
1187  }
1188  }
1189  if (ctx.shouldTimeOut()) {
1190  // finish applying immediately on timeout
1191  break;
1192  }
1193  }
1194 #ifdef RUSAGE_THREAD
1195  if (before >= 0 && usage.ru_nivcsw >= before + 2) {
1196  // heuristic says run queue is not empty
1197  break;
1198  }
1199  before = usage.ru_nivcsw;
1200 #endif
1201  }
1202 
1203  uint32_t movedSlotCount = 0;
1204  for (; slot < kMaxDeferredReaders; ++slot) {
1205  auto slotPtr = deferredReader(slot);
1206  auto slotValue = slotPtr->load(std::memory_order_acquire);
1207  if (slotValueIsThis(slotValue) &&
1208  slotPtr->compare_exchange_strong(slotValue, 0)) {
1209  ++movedSlotCount;
1210  }
1211  }
1212 
1213  if (movedSlotCount > 0) {
1214  state = (state_ += movedSlotCount * kIncrHasS);
1215  }
1216  assert((state & (kHasE | kBegunE)) != 0);
1217 
1218  // if state + kIncrHasS overflows (off the end of state) then either
1219  // we have 2^(32-9) readers (almost certainly an application bug)
1220  // or we had an underflow (also a bug)
1221  assert(state < state + kIncrHasS);
1222  }
1223 
1224  // It is straightfoward to make a token-less lock_shared() and
1225  // unlock_shared() either by making the token-less version always use
1226  // INLINE_SHARED mode or by removing the token version. Supporting
1227  // deferred operation for both types is trickier than it appears, because
1228  // the purpose of the token it so that unlock_shared doesn't have to
1229  // look in other slots for its deferred lock. Token-less unlock_shared
1230  // might place a deferred lock in one place and then release a different
1231  // slot that was originally used by the token-ful version. If this was
1232  // important we could solve the problem by differentiating the deferred
1233  // locks so that cross-variety release wouldn't occur. The best way
1234  // is probably to steal a bit from the pointer, making deferredLocks[]
1235  // an array of Atom<uintptr_t>.
1236 
1237  template <class WaitContext>
1238  bool lockSharedImpl(Token* token, WaitContext& ctx) {
1239  uint32_t state = state_.load(std::memory_order_relaxed);
1240  if ((state & (kHasS | kMayDefer | kHasE)) == 0 &&
1241  state_.compare_exchange_strong(state, state + kIncrHasS)) {
1242  if (token != nullptr) {
1243  token->type_ = Token::Type::INLINE_SHARED;
1244  }
1245  return true;
1246  }
1247  return lockSharedImpl(state, token, ctx);
1248  }
1249 
1250  template <class WaitContext>
1251  bool lockSharedImpl(uint32_t& state, Token* token, WaitContext& ctx);
1252 
1253  // Updates the state in/out argument as if the locks were made inline,
1254  // but does not update state_
1256  for (uint32_t i = 0; i < kMaxDeferredReaders; ++i) {
1257  auto slotPtr = deferredReader(i);
1258  auto slotValue = slotPtr->load(std::memory_order_relaxed);
1259  if (slotValue == tokenlessSlotValue()) {
1260  slotPtr->store(0, std::memory_order_relaxed);
1261  state += kIncrHasS;
1262  if ((state & kHasS) == 0) {
1263  break;
1264  }
1265  }
1266  }
1267  }
1268 
1269  bool tryUnlockTokenlessSharedDeferred();
1270 
1272  assert(slot < kMaxDeferredReaders);
1273  auto slotValue = tokenfulSlotValue();
1274  return deferredReader(slot)->compare_exchange_strong(slotValue, 0);
1275  }
1276 
1278  uint32_t state = (state_ -= kIncrHasS);
1279  assert(
1280  (state & (kHasE | kBegunE | kMayDefer)) != 0 ||
1281  state < state + kIncrHasS);
1282  if ((state & kHasS) == 0) {
1283  // Only the second half of lock() can be blocked by a non-zero
1284  // reader count, so that's the only thing we need to wake
1285  wakeRegisteredWaiters(state, kWaitingNotS);
1286  }
1287  return state;
1288  }
1289 
1290  template <class WaitContext>
1291  bool lockUpgradeImpl(WaitContext& ctx) {
1292  uint32_t state;
1293  do {
1294  if (!waitForZeroBits(state, kHasSolo, kWaitingU, ctx)) {
1295  return false;
1296  }
1297  } while (!state_.compare_exchange_strong(state, state | kHasU));
1298  return true;
1299  }
1300 
1301  public:
1302  class ReadHolder {
1303  ReadHolder() : lock_(nullptr) {}
1304 
1305  public:
1307  : lock_(const_cast<SharedMutexImpl*>(lock)) {
1308  if (lock_) {
1309  lock_->lock_shared(token_);
1310  }
1311  }
1312 
1314  : lock_(const_cast<SharedMutexImpl*>(&lock)) {
1315  lock_->lock_shared(token_);
1316  }
1317 
1319  : lock_(rhs.lock_), token_(rhs.token_) {
1320  rhs.lock_ = nullptr;
1321  }
1322 
1323  // Downgrade from upgrade mode
1324  explicit ReadHolder(UpgradeHolder&& upgraded) : lock_(upgraded.lock_) {
1325  assert(upgraded.lock_ != nullptr);
1326  upgraded.lock_ = nullptr;
1327  lock_->unlock_upgrade_and_lock_shared(token_);
1328  }
1329 
1330  // Downgrade from exclusive mode
1331  explicit ReadHolder(WriteHolder&& writer) : lock_(writer.lock_) {
1332  assert(writer.lock_ != nullptr);
1333  writer.lock_ = nullptr;
1334  lock_->unlock_and_lock_shared(token_);
1335  }
1336 
1338  std::swap(lock_, rhs.lock_);
1339  std::swap(token_, rhs.token_);
1340  return *this;
1341  }
1342 
1343  ReadHolder(const ReadHolder& rhs) = delete;
1344  ReadHolder& operator=(const ReadHolder& rhs) = delete;
1345 
1347  unlock();
1348  }
1349 
1350  void unlock() {
1351  if (lock_) {
1352  lock_->unlock_shared(token_);
1353  lock_ = nullptr;
1354  }
1355  }
1356 
1357  private:
1358  friend class UpgradeHolder;
1359  friend class WriteHolder;
1362  };
1363 
1365  UpgradeHolder() : lock_(nullptr) {}
1366 
1367  public:
1368  explicit UpgradeHolder(SharedMutexImpl* lock) : lock_(lock) {
1369  if (lock_) {
1370  lock_->lock_upgrade();
1371  }
1372  }
1373 
1374  explicit UpgradeHolder(SharedMutexImpl& lock) : lock_(&lock) {
1375  lock_->lock_upgrade();
1376  }
1377 
1378  // Downgrade from exclusive mode
1379  explicit UpgradeHolder(WriteHolder&& writer) : lock_(writer.lock_) {
1380  assert(writer.lock_ != nullptr);
1381  writer.lock_ = nullptr;
1382  lock_->unlock_and_lock_upgrade();
1383  }
1384 
1386  rhs.lock_ = nullptr;
1387  }
1388 
1390  std::swap(lock_, rhs.lock_);
1391  return *this;
1392  }
1393 
1394  UpgradeHolder(const UpgradeHolder& rhs) = delete;
1395  UpgradeHolder& operator=(const UpgradeHolder& rhs) = delete;
1396 
1398  unlock();
1399  }
1400 
1401  void unlock() {
1402  if (lock_) {
1403  lock_->unlock_upgrade();
1404  lock_ = nullptr;
1405  }
1406  }
1407 
1408  private:
1409  friend class WriteHolder;
1410  friend class ReadHolder;
1412  };
1413 
1414  class WriteHolder {
1415  WriteHolder() : lock_(nullptr) {}
1416 
1417  public:
1418  explicit WriteHolder(SharedMutexImpl* lock) : lock_(lock) {
1419  if (lock_) {
1420  lock_->lock();
1421  }
1422  }
1423 
1424  explicit WriteHolder(SharedMutexImpl& lock) : lock_(&lock) {
1425  lock_->lock();
1426  }
1427 
1428  // Promotion from upgrade mode
1429  explicit WriteHolder(UpgradeHolder&& upgrade) : lock_(upgrade.lock_) {
1430  assert(upgrade.lock_ != nullptr);
1431  upgrade.lock_ = nullptr;
1432  lock_->unlock_upgrade_and_lock();
1433  }
1434 
1435  // README:
1436  //
1437  // It is intended that WriteHolder(ReadHolder&& rhs) do not exist.
1438  //
1439  // Shared locks (read) can not safely upgrade to unique locks (write).
1440  // That upgrade path is a well-known recipe for deadlock, so we explicitly
1441  // disallow it.
1442  //
1443  // If you need to do a conditional mutation, you have a few options:
1444  // 1. Check the condition under a shared lock and release it.
1445  // Then maybe check the condition again under a unique lock and maybe do
1446  // the mutation.
1447  // 2. Check the condition once under an upgradeable lock.
1448  // Then maybe upgrade the lock to a unique lock and do the mutation.
1449  // 3. Check the condition and maybe perform the mutation under a unique
1450  // lock.
1451  //
1452  // Relevant upgradeable lock notes:
1453  // * At most one upgradeable lock can be held at a time for a given shared
1454  // mutex, just like a unique lock.
1455  // * An upgradeable lock may be held concurrently with any number of shared
1456  // locks.
1457  // * An upgradeable lock may be upgraded atomically to a unique lock.
1458 
1460  rhs.lock_ = nullptr;
1461  }
1462 
1464  std::swap(lock_, rhs.lock_);
1465  return *this;
1466  }
1467 
1468  WriteHolder(const WriteHolder& rhs) = delete;
1469  WriteHolder& operator=(const WriteHolder& rhs) = delete;
1470 
1472  unlock();
1473  }
1474 
1475  void unlock() {
1476  if (lock_) {
1477  lock_->unlock();
1478  lock_ = nullptr;
1479  }
1480  }
1481 
1482  private:
1483  friend class ReadHolder;
1484  friend class UpgradeHolder;
1486  };
1487 
1488  // Adapters for Synchronized<>
1490  lock.lock_shared();
1491  }
1493  lock.lock();
1494  }
1496  lock.unlock_shared();
1497  }
1499  lock.unlock();
1500  }
1501  friend bool acquireRead(SharedMutexImpl& lock, unsigned int ms) {
1502  return lock.try_lock_shared_for(std::chrono::milliseconds(ms));
1503  }
1504  friend bool acquireReadWrite(SharedMutexImpl& lock, unsigned int ms) {
1505  return lock.try_lock_for(std::chrono::milliseconds(ms));
1506  }
1507 };
1508 
1511 typedef SharedMutexWritePriority SharedMutex;
1514 
1515 // Prevent the compiler from instantiating these in other translation units.
1516 // They are instantiated once in SharedMutex.cpp
1517 extern template class SharedMutexImpl<true>;
1518 extern template class SharedMutexImpl<false>;
1519 
1520 template <
1521  bool ReaderPriority,
1522  typename Tag_,
1523  template <typename> class Atom,
1524  bool BlockImmediately,
1525  bool AnnotateForThreadSanitizer>
1527  ReaderPriority,
1528  Tag_,
1529  Atom,
1530  BlockImmediately,
1531  AnnotateForThreadSanitizer>::DeferredReaderSlot
1533  ReaderPriority,
1534  Tag_,
1535  Atom,
1536  BlockImmediately,
1537  AnnotateForThreadSanitizer>::deferredReaders
1538  [kMaxDeferredReaders * kDeferredSeparationFactor] = {};
1539 
1540 template <
1541  bool ReaderPriority,
1542  typename Tag_,
1543  template <typename> class Atom,
1544  bool BlockImmediately,
1545  bool AnnotateForThreadSanitizer>
1547  ReaderPriority,
1548  Tag_,
1549  Atom,
1550  BlockImmediately,
1551  AnnotateForThreadSanitizer>::tls_lastTokenlessSlot = 0;
1552 
1553 template <
1554  bool ReaderPriority,
1555  typename Tag_,
1556  template <typename> class Atom,
1557  bool BlockImmediately,
1558  bool AnnotateForThreadSanitizer>
1560  ReaderPriority,
1561  Tag_,
1562  Atom,
1563  BlockImmediately,
1564  AnnotateForThreadSanitizer>::tls_lastDeferredReaderSlot = 0;
1565 
1566 template <
1567  bool ReaderPriority,
1568  typename Tag_,
1569  template <typename> class Atom,
1570  bool BlockImmediately,
1571  bool AnnotateForThreadSanitizer>
1572 bool SharedMutexImpl<
1573  ReaderPriority,
1574  Tag_,
1575  Atom,
1576  BlockImmediately,
1577  AnnotateForThreadSanitizer>::tryUnlockTokenlessSharedDeferred() {
1578  auto bestSlot = tls_lastTokenlessSlot;
1579  for (uint32_t i = 0; i < kMaxDeferredReaders; ++i) {
1580  auto slotPtr = deferredReader(bestSlot ^ i);
1581  auto slotValue = slotPtr->load(std::memory_order_relaxed);
1582  if (slotValue == tokenlessSlotValue() &&
1583  slotPtr->compare_exchange_strong(slotValue, 0)) {
1584  tls_lastTokenlessSlot = bestSlot ^ i;
1585  return true;
1586  }
1587  }
1588  return false;
1589 }
1590 
1591 template <
1592  bool ReaderPriority,
1593  typename Tag_,
1594  template <typename> class Atom,
1595  bool BlockImmediately,
1596  bool AnnotateForThreadSanitizer>
1597 template <class WaitContext>
1598 bool SharedMutexImpl<
1599  ReaderPriority,
1600  Tag_,
1601  Atom,
1602  BlockImmediately,
1603  AnnotateForThreadSanitizer>::
1604  lockSharedImpl(uint32_t& state, Token* token, WaitContext& ctx) {
1605  while (true) {
1606  if (UNLIKELY((state & kHasE) != 0) &&
1607  !waitForZeroBits(state, kHasE, kWaitingS, ctx) && ctx.canTimeOut()) {
1608  return false;
1609  }
1610 
1611  uint32_t slot = tls_lastDeferredReaderSlot;
1612  uintptr_t slotValue = 1; // any non-zero value will do
1613 
1614  bool canAlreadyDefer = (state & kMayDefer) != 0;
1615  bool aboveDeferThreshold =
1616  (state & kHasS) >= (kNumSharedToStartDeferring - 1) * kIncrHasS;
1617  bool drainInProgress = ReaderPriority && (state & kBegunE) != 0;
1618  if (canAlreadyDefer || (aboveDeferThreshold && !drainInProgress)) {
1619  /* Try using the most recent slot first. */
1620  slotValue = deferredReader(slot)->load(std::memory_order_relaxed);
1621  if (slotValue != 0) {
1622  // starting point for our empty-slot search, can change after
1623  // calling waitForZeroBits
1624  uint32_t bestSlot =
1625  (uint32_t)folly::AccessSpreader<Atom>::current(kMaxDeferredReaders);
1626 
1627  // deferred readers are already enabled, or it is time to
1628  // enable them if we can find a slot
1629  for (uint32_t i = 0; i < kDeferredSearchDistance; ++i) {
1630  slot = bestSlot ^ i;
1631  assert(slot < kMaxDeferredReaders);
1632  slotValue = deferredReader(slot)->load(std::memory_order_relaxed);
1633  if (slotValue == 0) {
1634  // found empty slot
1635  tls_lastDeferredReaderSlot = slot;
1636  break;
1637  }
1638  }
1639  }
1640  }
1641 
1642  if (slotValue != 0) {
1643  // not yet deferred, or no empty slots
1644  if (state_.compare_exchange_strong(state, state + kIncrHasS)) {
1645  // successfully recorded the read lock inline
1646  if (token != nullptr) {
1647  token->type_ = Token::Type::INLINE_SHARED;
1648  }
1649  return true;
1650  }
1651  // state is updated, try again
1652  continue;
1653  }
1654 
1655  // record that deferred readers might be in use if necessary
1656  if ((state & kMayDefer) == 0) {
1657  if (!state_.compare_exchange_strong(state, state | kMayDefer)) {
1658  // keep going if CAS failed because somebody else set the bit
1659  // for us
1660  if ((state & (kHasE | kMayDefer)) != kMayDefer) {
1661  continue;
1662  }
1663  }
1664  // state = state | kMayDefer;
1665  }
1666 
1667  // try to use the slot
1668  bool gotSlot = deferredReader(slot)->compare_exchange_strong(
1669  slotValue,
1670  token == nullptr ? tokenlessSlotValue() : tokenfulSlotValue());
1671 
1672  // If we got the slot, we need to verify that an exclusive lock
1673  // didn't happen since we last checked. If we didn't get the slot we
1674  // need to recheck state_ anyway to make sure we don't waste too much
1675  // work. It is also possible that since we checked state_ someone
1676  // has acquired and released the write lock, clearing kMayDefer.
1677  // Both cases are covered by looking for the readers-possible bit,
1678  // because it is off when the exclusive lock bit is set.
1679  state = state_.load(std::memory_order_acquire);
1680 
1681  if (!gotSlot) {
1682  continue;
1683  }
1684 
1685  if (token == nullptr) {
1686  tls_lastTokenlessSlot = slot;
1687  }
1688 
1689  if ((state & kMayDefer) != 0) {
1690  assert((state & kHasE) == 0);
1691  // success
1692  if (token != nullptr) {
1693  token->type_ = Token::Type::DEFERRED_SHARED;
1694  token->slot_ = (uint16_t)slot;
1695  }
1696  return true;
1697  }
1698 
1699  // release the slot before retrying
1700  if (token == nullptr) {
1701  // We can't rely on slot. Token-less slot values can be freed by
1702  // any unlock_shared(), so we need to do the full deferredReader
1703  // search during unlock. Unlike unlock_shared(), we can't trust
1704  // kPrevDefer here. This deferred lock isn't visible to lock()
1705  // (that's the whole reason we're undoing it) so there might have
1706  // subsequently been an unlock() and lock() with no intervening
1707  // transition to deferred mode.
1708  if (!tryUnlockTokenlessSharedDeferred()) {
1709  unlockSharedInline();
1710  }
1711  } else {
1712  if (!tryUnlockSharedDeferred(slot)) {
1713  unlockSharedInline();
1714  }
1715  }
1716 
1717  // We got here not because the lock was unavailable, but because
1718  // we lost a compare-and-swap. Try-lock is typically allowed to
1719  // have spurious failures, but there is no lock efficiency gain
1720  // from exploiting that freedom here.
1721  }
1722 }
1723 
1724 } // namespace folly
bool lockExclusiveImpl(uint32_t preconditionGoalMask, WaitContext &ctx)
Definition: SharedMutex.h:901
void * ptr
bool lockUpgradeImpl(WaitContext &ctx)
Definition: SharedMutex.h:1291
bool slotValueIsThis(uintptr_t slotValue)
Definition: SharedMutex.h:1138
bool try_lock_shared_for(const std::chrono::duration< Rep, Period > &duration)
Definition: SharedMutex.h:394
void annotateReleased(annotate_rwlock_level w)
Definition: SharedMutex.h:707
void wakeRegisteredWaiters(uint32_t &state, uint32_t wakeMask)
Definition: SharedMutex.h:1085
bool try_lock_upgrade_for(const std::chrono::duration< Rep, Period > &duration)
Definition: SharedMutex.h:505
friend void acquireRead(SharedMutexImpl &lock)
Definition: SharedMutex.h:1489
bool lockSharedImpl(Token *token, WaitContext &ctx)
Definition: SharedMutex.h:1238
folly::detail::Futex< Atom > Futex
Definition: SharedMutex.h:572
LogLevel max
Definition: LogLevel.cpp:31
bool doWait(Futex &futex, uint32_t expected, uint32_t waitMask)
Definition: SharedMutex.h:591
SharedMutexImpl< true > SharedMutexReadPriority
Definition: SharedMutex.h:1509
static FOLLY_ALWAYS_INLINE void annotate_rwlock_released(void const volatile *const addr, annotate_rwlock_level const w, char const *const f, int const l)
constexpr bool kIsSanitizeThread
Definition: Portability.h:124
static FOLLY_ALWAYS_INLINE void annotate_rwlock_destroy(void const volatile *const addr, char const *const f, int const l)
bool try_lock_for(const std::chrono::duration< Rep, Period > &duration)
Definition: SharedMutex.h:339
static FOLLY_TLS uint32_t tls_lastTokenlessSlot
Definition: SharedMutex.h:882
std::chrono::steady_clock::time_point now()
Atom< std::uint32_t > Futex
Definition: Futex.h:51
#define LIKELY(x)
Definition: Likely.h:47
uintptr_t tokenlessSlotValue()
Definition: SharedMutex.h:1134
WriteHolder & operator=(WriteHolder &&rhs) noexcept
Definition: SharedMutex.h:1463
UpgradeHolder(SharedMutexImpl *lock)
Definition: SharedMutex.h:1368
ReadHolder(WriteHolder &&writer)
Definition: SharedMutex.h:1331
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
static FOLLY_ALWAYS_INLINE void annotate_benign_race_sized(void const volatile *const addr, long const size, char const *const desc, char const *const f, int const l)
bool try_lock_shared_until(const std::chrono::time_point< Clock, Duration > &absDeadline)
Definition: SharedMutex.h:412
requires E e noexcept(noexcept(s.error(std::move(e))))
FutexResult futexWait(const Futex *futex, uint32_t expected, uint32_t waitMask)
Definition: Futex-inl.h:100
std::chrono::time_point< Clock, Duration > absDeadline_
Definition: SharedMutex.h:653
UpgradeHolder(UpgradeHolder &&rhs) noexcept
Definition: SharedMutex.h:1385
def load()
Definition: deadlock.py:441
#define nullptr
Definition: http_parser.c:41
#define FOLLY_SHAREDMUTEX_TLS
Definition: SharedMutex.h:242
std::chrono::duration< Rep, Period > duration_
Definition: SharedMutex.h:618
std::chrono::steady_clock::time_point deadline()
Definition: SharedMutex.h:625
friend void releaseRead(SharedMutexImpl &lock)
Definition: SharedMutex.h:1495
FOLLY_PUSH_WARNING RHS rhs
Definition: Traits.h:649
bool doWait(Futex &futex, uint32_t expected, uint32_t waitMask)
Definition: SharedMutex.h:665
ReadHolder(const SharedMutexImpl *lock)
Definition: SharedMutex.h:1306
ReadHolder(UpgradeHolder &&upgraded)
Definition: SharedMutex.h:1324
void applyDeferredReaders(uint32_t &state, WaitContext &ctx)
Definition: SharedMutex.h:1147
friend bool acquireReadWrite(SharedMutexImpl &lock, unsigned int ms)
Definition: SharedMutex.h:1504
constexpr std::size_t hardware_destructive_interference_size
Definition: Align.h:107
bool try_lock_shared_for(const std::chrono::duration< Rep, Period > &duration, Token &token)
Definition: SharedMutex.h:402
void unlock_and_lock_shared(Token &token)
Definition: SharedMutex.h:485
void cleanupTokenlessSharedDeferred(uint32_t &state)
Definition: SharedMutex.h:1255
WaitForDuration(const std::chrono::duration< Rep, Period > &duration)
Definition: SharedMutex.h:622
WriteHolder(SharedMutexImpl &lock)
Definition: SharedMutex.h:1424
bool try_lock_shared(Token &token)
Definition: SharedMutex.h:386
WriteHolder(SharedMutexImpl *lock)
Definition: SharedMutex.h:1418
#define Atom
static FOLLY_TLS uint32_t tls_lastDeferredReaderSlot
Definition: SharedMutex.h:885
static FOLLY_ALWAYS_INLINE void annotate_rwlock_try_acquired(void const volatile *const addr, annotate_rwlock_level const w, bool const result, char const *const f, int const l)
GuardImpl guard(ErrorHandler &&handler)
Definition: Base.h:840
auto lock(Synchronized< D, M > &synchronized, Args &&...args)
WriteHolder(WriteHolder &&rhs) noexcept
Definition: SharedMutex.h:1459
uintptr_t tokenfulSlotValue()
Definition: SharedMutex.h:1130
void unlock_upgrade_and_lock_shared()
Definition: SharedMutex.h:537
void applyDeferredReaders(uint32_t &state, WaitContext &ctx, uint32_t slot)
Definition: SharedMutex.h:1167
void annotateAcquired(annotate_rwlock_level w)
Definition: SharedMutex.h:693
SharedMutexWritePriority SharedMutex
Definition: SharedMutex.h:1511
std::chrono::steady_clock::time_point deadline_
Definition: SharedMutex.h:620
FutexResult futexWaitUntil(const Futex *futex, uint32_t expected, std::chrono::time_point< Clock, Duration > const &deadline, uint32_t waitMask)
Definition: Futex-inl.h:112
bool lockExclusiveImpl(uint32_t &state, uint32_t preconditionGoalMask, WaitContext &ctx)
Definition: SharedMutex.h:913
SharedMutexToken Token
Definition: SharedMutex.h:279
Atom< uintptr_t > DeferredReaderSlot
Definition: SharedMutex.h:892
bool yieldWaitForZeroBits(uint32_t &state, uint32_t goal, uint32_t waitMask, WaitContext &ctx)
Definition: SharedMutex.h:1002
void annotateTryAcquired(bool result, annotate_rwlock_level w)
Definition: SharedMutex.h:700
void unlock_shared(Token &token)
Definition: SharedMutex.h:449
bool try_lock_upgrade_until(const std::chrono::time_point< Clock, Duration > &absDeadline)
Definition: SharedMutex.h:514
bool futexWaitForZeroBits(uint32_t &state, uint32_t goal, uint32_t waitMask, WaitContext &ctx)
Definition: SharedMutex.h:1043
bool try_lock_until(const std::chrono::time_point< Clock, Duration > &absDeadline)
Definition: SharedMutex.h:347
UpgradeHolder(SharedMutexImpl &lock)
Definition: SharedMutex.h:1374
ReadHolder(ReadHolder &&rhs) noexcept
Definition: SharedMutex.h:1318
bool doWait(Futex &futex, uint32_t expected, uint32_t waitMask)
Definition: SharedMutex.h:644
uint32_t unlockSharedInline()
Definition: SharedMutex.h:1277
SharedMutexImpl< false, void, std::atomic, false, false > SharedMutexSuppressTSAN
Definition: SharedMutex.h:1513
ReadHolder(const SharedMutexImpl &lock)
Definition: SharedMutex.h:1313
friend bool acquireRead(SharedMutexImpl &lock, unsigned int ms)
Definition: SharedMutex.h:1501
bool waitForZeroBits(uint32_t &state, uint32_t goal, uint32_t waitMask, WaitContext &ctx)
Definition: SharedMutex.h:981
void futexWakeAll(uint32_t wakeMask)
Definition: SharedMutex.h:1122
static FOLLY_ALWAYS_INLINE void annotate_rwlock_acquired(void const volatile *const addr, annotate_rwlock_level const w, char const *const f, int const l)
WriteHolder(UpgradeHolder &&upgrade)
Definition: SharedMutex.h:1429
SharedMutexImpl< false > SharedMutexWritePriority
Definition: SharedMutex.h:1510
ReadHolder & operator=(ReadHolder &&rhs) noexcept
Definition: SharedMutex.h:1337
void swap(SwapTrackingAlloc< T > &, SwapTrackingAlloc< T > &)
Definition: F14TestUtil.h:414
constexpr SharedMutexImpl() noexcept
Definition: SharedMutex.h:285
std::unique_lock< std::mutex > sharedMutexAnnotationGuard(void *ptr)
Definition: SharedMutex.cpp:25
UpgradeHolder & operator=(UpgradeHolder &&rhs) noexcept
Definition: SharedMutex.h:1389
#define UNLIKELY(x)
Definition: Likely.h:48
void lock_shared(Token &token)
Definition: SharedMutex.h:373
bool try_lock_shared_until(const std::chrono::time_point< Clock, Duration > &absDeadline, Token &token)
Definition: SharedMutex.h:421
void wakeRegisteredWaitersImpl(uint32_t &state, uint32_t wakeMask)
Definition: SharedMutex.h:1091
bool doWait(Futex &, uint32_t, uint32_t)
Definition: SharedMutex.h:608
annotate_rwlock_level
void unlock_upgrade_and_lock_shared(Token &token)
Definition: SharedMutex.h:545
friend void releaseReadWrite(SharedMutexImpl &lock)
Definition: SharedMutex.h:1498
bool tryUnlockSharedDeferred(uint32_t slot)
Definition: SharedMutex.h:1271
state
Definition: http_parser.c:272
int futexWake(const Futex *futex, int count, uint32_t wakeMask)
Definition: Futex-inl.h:107
DeferredReaderSlot * deferredReader(uint32_t slot)
Definition: SharedMutex.h:1126
void asm_volatile_pause()
Definition: Asm.h:37
UpgradeHolder(WriteHolder &&writer)
Definition: SharedMutex.h:1379
friend void acquireReadWrite(SharedMutexImpl &lock)
Definition: SharedMutex.h:1492
static FOLLY_ALWAYS_INLINE void annotate_rwlock_create(void const volatile *const addr, char const *const f, int const l)