proxygen
RWSpinLock.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /*
17  * N.B. You most likely do _not_ want to use RWSpinLock or any other
18  * kind of spinlock. Use SharedMutex instead.
19  *
20  * In short, spinlocks in preemptive multi-tasking operating systems
21  * have serious problems and fast mutexes like SharedMutex are almost
22  * certainly the better choice, because letting the OS scheduler put a
23  * thread to sleep is better for system responsiveness and throughput
24  * than wasting a timeslice repeatedly querying a lock held by a
25  * thread that's blocked, and you can't prevent userspace
26  * programs blocking.
27  *
28  * Spinlocks in an operating system kernel make much more sense than
29  * they do in userspace.
30  *
31  * -------------------------------------------------------------------
32  *
33  * Two Read-Write spin lock implementations.
34  *
35  * Ref: http://locklessinc.com/articles/locks
36  *
37  * Both locks here are faster than pthread_rwlock and have very low
38  * overhead (usually 20-30ns). They don't use any system mutexes and
39  * are very compact (4/8 bytes), so are suitable for per-instance
40  * based locking, particularly when contention is not expected.
41  *
42  * For a spinlock, RWSpinLock is a reasonable choice. (See the note
43  * about for why a spin lock is frequently a bad idea generally.)
44  * RWSpinLock has minimal overhead, and comparable contention
45  * performance when the number of competing threads is less than or
46  * equal to the number of logical CPUs. Even as the number of
47  * threads gets larger, RWSpinLock can still be very competitive in
48  * READ, although it is slower on WRITE, and also inherently unfair
49  * to writers.
50  *
51  * RWTicketSpinLock shows more balanced READ/WRITE performance. If
52  * your application really needs a lot more threads, and a
53  * higher-priority writer, prefer one of the RWTicketSpinLock locks.
54  *
55  * Caveats:
56  *
57  * RWTicketSpinLock locks can only be used with GCC on x86/x86-64
58  * based systems.
59  *
60  * RWTicketSpinLock<32> only allows up to 2^8 - 1 concurrent
61  * readers and writers.
62  *
63  * RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent
64  * readers and writers.
65  *
66  * RWTicketSpinLock<..., true> (kFavorWriter = true, that is, strict
67  * writer priority) is NOT reentrant, even for lock_shared().
68  *
69  * The lock will not grant any new shared (read) accesses while a thread
70  * attempting to acquire the lock in write mode is blocked. (That is,
71  * if the lock is held in shared mode by N threads, and a thread attempts
72  * to acquire it in write mode, no one else can acquire it in shared mode
73  * until these N threads release the lock and then the blocked thread
74  * acquires and releases the exclusive lock.) This also applies for
75  * attempts to reacquire the lock in shared mode by threads that already
76  * hold it in shared mode, making the lock non-reentrant.
77  *
78  * RWSpinLock handles 2^30 - 1 concurrent readers.
79  *
80  * @author Xin Liu <xliux@fb.com>
81  */
82 
83 #pragma once
84 
85 /*
86 ========================================================================
87 Benchmark on (Intel(R) Xeon(R) CPU L5630 @ 2.13GHz) 8 cores(16 HTs)
88 ========================================================================
89 
90 ------------------------------------------------------------------------------
91 1. Single thread benchmark (read/write lock + unlock overhead)
92 Benchmark Iters Total t t/iter iter/sec
93 -------------------------------------------------------------------------------
94 * BM_RWSpinLockRead 100000 1.786 ms 17.86 ns 53.4M
95 +30.5% BM_RWSpinLockWrite 100000 2.331 ms 23.31 ns 40.91M
96 +85.7% BM_RWTicketSpinLock32Read 100000 3.317 ms 33.17 ns 28.75M
97 +96.0% BM_RWTicketSpinLock32Write 100000 3.5 ms 35 ns 27.25M
98 +85.6% BM_RWTicketSpinLock64Read 100000 3.315 ms 33.15 ns 28.77M
99 +96.0% BM_RWTicketSpinLock64Write 100000 3.5 ms 35 ns 27.25M
100 +85.7% BM_RWTicketSpinLock32FavorWriterRead 100000 3.317 ms 33.17 ns 28.75M
101 +29.7% BM_RWTicketSpinLock32FavorWriterWrite 100000 2.316 ms 23.16 ns 41.18M
102 +85.3% BM_RWTicketSpinLock64FavorWriterRead 100000 3.309 ms 33.09 ns 28.82M
103 +30.2% BM_RWTicketSpinLock64FavorWriterWrite 100000 2.325 ms 23.25 ns 41.02M
104 + 175% BM_PThreadRWMutexRead 100000 4.917 ms 49.17 ns 19.4M
105 + 166% BM_PThreadRWMutexWrite 100000 4.757 ms 47.57 ns 20.05M
106 
107 ------------------------------------------------------------------------------
108 2. Contention Benchmark 90% read 10% write
109 Benchmark hits average min max sigma
110 ------------------------------------------------------------------------------
111 ---------- 8 threads ------------
112 RWSpinLock Write 142666 220ns 78ns 40.8us 269ns
113 RWSpinLock Read 1282297 222ns 80ns 37.7us 248ns
114 RWTicketSpinLock Write 85692 209ns 71ns 17.9us 252ns
115 RWTicketSpinLock Read 769571 215ns 78ns 33.4us 251ns
116 pthread_rwlock_t Write 84248 2.48us 99ns 269us 8.19us
117 pthread_rwlock_t Read 761646 933ns 101ns 374us 3.25us
118 
119 ---------- 16 threads ------------
120 RWSpinLock Write 124236 237ns 78ns 261us 801ns
121 RWSpinLock Read 1115807 236ns 78ns 2.27ms 2.17us
122 RWTicketSpinLock Write 81781 231ns 71ns 31.4us 351ns
123 RWTicketSpinLock Read 734518 238ns 78ns 73.6us 379ns
124 pthread_rwlock_t Write 83363 7.12us 99ns 785us 28.1us
125 pthread_rwlock_t Read 754978 2.18us 101ns 1.02ms 14.3us
126 
127 ---------- 50 threads ------------
128 RWSpinLock Write 131142 1.37us 82ns 7.53ms 68.2us
129 RWSpinLock Read 1181240 262ns 78ns 6.62ms 12.7us
130 RWTicketSpinLock Write 83045 397ns 73ns 7.01ms 31.5us
131 RWTicketSpinLock Read 744133 386ns 78ns 11ms 31.4us
132 pthread_rwlock_t Write 80849 112us 103ns 4.52ms 263us
133 pthread_rwlock_t Read 728698 24us 101ns 7.28ms 194us
134 
135 */
136 
137 #include <folly/Portability.h>
138 #include <folly/portability/Asm.h>
139 
140 #if defined(__GNUC__) && (defined(__i386) || FOLLY_X64 || defined(ARCH_K8))
141 #define RW_SPINLOCK_USE_X86_INTRINSIC_
142 #include <x86intrin.h>
143 #elif defined(_MSC_VER) && defined(FOLLY_X64)
144 #define RW_SPINLOCK_USE_X86_INTRINSIC_
145 #elif FOLLY_AARCH64
146 #define RW_SPINLOCK_USE_X86_INTRINSIC_
147 #else
148 #undef RW_SPINLOCK_USE_X86_INTRINSIC_
149 #endif
150 
151 // iOS doesn't define _mm_cvtsi64_si128 and friends
152 #if (FOLLY_SSE >= 2) && !FOLLY_MOBILE && FOLLY_X64
153 #define RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
154 #else
155 #undef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
156 #endif
157 
158 #include <algorithm>
159 #include <atomic>
160 #include <thread>
161 
162 #include <folly/Likely.h>
163 
164 namespace folly {
165 
166 /*
167  * A simple, small (4-bytes), but unfair rwlock. Use it when you want
168  * a nice writer and don't expect a lot of write/read contention, or
169  * when you need small rwlocks since you are creating a large number
170  * of them.
171  *
172  * Note that the unfairness here is extreme: if the lock is
173  * continually accessed for read, writers will never get a chance. If
174  * the lock can be that highly contended this class is probably not an
175  * ideal choice anyway.
176  *
177  * It currently implements most of the Lockable, SharedLockable and
178  * UpgradeLockable concepts except the TimedLockable related locking/unlocking
179  * interfaces.
180  */
181 class RWSpinLock {
182  enum : int32_t { READER = 4, UPGRADED = 2, WRITER = 1 };
183 
184  public:
185  constexpr RWSpinLock() : bits_(0) {}
186 
187  RWSpinLock(RWSpinLock const&) = delete;
188  RWSpinLock& operator=(RWSpinLock const&) = delete;
189 
190  // Lockable Concept
191  void lock() {
192  uint_fast32_t count = 0;
193  while (!LIKELY(try_lock())) {
194  if (++count > 1000) {
196  }
197  }
198  }
199 
200  // Writer is responsible for clearing up both the UPGRADED and WRITER bits.
201  void unlock() {
202  static_assert(READER > WRITER + UPGRADED, "wrong bits!");
203  bits_.fetch_and(~(WRITER | UPGRADED), std::memory_order_release);
204  }
205 
206  // SharedLockable Concept
207  void lock_shared() {
208  uint_fast32_t count = 0;
209  while (!LIKELY(try_lock_shared())) {
210  if (++count > 1000) {
212  }
213  }
214  }
215 
216  void unlock_shared() {
217  bits_.fetch_add(-READER, std::memory_order_release);
218  }
219 
220  // Downgrade the lock from writer status to reader status.
222  bits_.fetch_add(READER, std::memory_order_acquire);
223  unlock();
224  }
225 
226  // UpgradeLockable Concept
227  void lock_upgrade() {
228  uint_fast32_t count = 0;
229  while (!try_lock_upgrade()) {
230  if (++count > 1000) {
232  }
233  }
234  }
235 
236  void unlock_upgrade() {
237  bits_.fetch_add(-UPGRADED, std::memory_order_acq_rel);
238  }
239 
240  // unlock upgrade and try to acquire write lock
242  int64_t count = 0;
243  while (!try_unlock_upgrade_and_lock()) {
244  if (++count > 1000) {
246  }
247  }
248  }
249 
250  // unlock upgrade and read lock atomically
252  bits_.fetch_add(READER - UPGRADED, std::memory_order_acq_rel);
253  }
254 
255  // write unlock and upgrade lock atomically
257  // need to do it in two steps here -- as the UPGRADED bit might be OR-ed at
258  // the same time when other threads are trying do try_lock_upgrade().
259  bits_.fetch_or(UPGRADED, std::memory_order_acquire);
260  bits_.fetch_add(-WRITER, std::memory_order_release);
261  }
262 
263  // Attempt to acquire writer permission. Return false if we didn't get it.
264  bool try_lock() {
265  int32_t expect = 0;
266  return bits_.compare_exchange_strong(
267  expect, WRITER, std::memory_order_acq_rel);
268  }
269 
270  // Try to get reader permission on the lock. This can fail if we
271  // find out someone is a writer or upgrader.
272  // Setting the UPGRADED bit would allow a writer-to-be to indicate
273  // its intention to write and block any new readers while waiting
274  // for existing readers to finish and release their read locks. This
275  // helps avoid starving writers (promoted from upgraders).
277  // fetch_add is considerably (100%) faster than compare_exchange,
278  // so here we are optimizing for the common (lock success) case.
279  int32_t value = bits_.fetch_add(READER, std::memory_order_acquire);
280  if (UNLIKELY(value & (WRITER | UPGRADED))) {
281  bits_.fetch_add(-READER, std::memory_order_release);
282  return false;
283  }
284  return true;
285  }
286 
287  // try to unlock upgrade and write lock atomically
290  return bits_.compare_exchange_strong(
291  expect, WRITER, std::memory_order_acq_rel);
292  }
293 
294  // try to acquire an upgradable lock.
296  int32_t value = bits_.fetch_or(UPGRADED, std::memory_order_acquire);
297 
298  // Note: when failed, we cannot flip the UPGRADED bit back,
299  // as in this case there is either another upgrade lock or a write lock.
300  // If it's a write lock, the bit will get cleared up when that lock's done
301  // with unlock().
302  return ((value & (UPGRADED | WRITER)) == 0);
303  }
304 
305  // mainly for debugging purposes.
306  int32_t bits() const {
307  return bits_.load(std::memory_order_acquire);
308  }
309 
310  class ReadHolder;
311  class UpgradedHolder;
312  class WriteHolder;
313 
314  class ReadHolder {
315  public:
316  explicit ReadHolder(RWSpinLock* lock) : lock_(lock) {
317  if (lock_) {
318  lock_->lock_shared();
319  }
320  }
321 
322  explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) {
323  lock_->lock_shared();
324  }
325 
327  other.lock_ = nullptr;
328  }
329 
330  // down-grade
331  explicit ReadHolder(UpgradedHolder&& upgraded) : lock_(upgraded.lock_) {
332  upgraded.lock_ = nullptr;
333  if (lock_) {
335  }
336  }
337 
338  explicit ReadHolder(WriteHolder&& writer) : lock_(writer.lock_) {
339  writer.lock_ = nullptr;
340  if (lock_) {
342  }
343  }
344 
346  using std::swap;
347  swap(lock_, other.lock_);
348  return *this;
349  }
350 
351  ReadHolder(const ReadHolder& other) = delete;
352  ReadHolder& operator=(const ReadHolder& other) = delete;
353 
355  if (lock_) {
356  lock_->unlock_shared();
357  }
358  }
359 
360  void reset(RWSpinLock* lock = nullptr) {
361  if (lock == lock_) {
362  return;
363  }
364  if (lock_) {
365  lock_->unlock_shared();
366  }
367  lock_ = lock;
368  if (lock_) {
369  lock_->lock_shared();
370  }
371  }
372 
373  void swap(ReadHolder* other) {
374  std::swap(lock_, other->lock_);
375  }
376 
377  private:
378  friend class UpgradedHolder;
379  friend class WriteHolder;
381  };
382 
384  public:
385  explicit UpgradedHolder(RWSpinLock* lock) : lock_(lock) {
386  if (lock_) {
387  lock_->lock_upgrade();
388  }
389  }
390 
391  explicit UpgradedHolder(RWSpinLock& lock) : lock_(&lock) {
392  lock_->lock_upgrade();
393  }
394 
395  explicit UpgradedHolder(WriteHolder&& writer) {
396  lock_ = writer.lock_;
397  writer.lock_ = nullptr;
398  if (lock_) {
400  }
401  }
402 
404  other.lock_ = nullptr;
405  }
406 
408  using std::swap;
409  swap(lock_, other.lock_);
410  return *this;
411  }
412 
413  UpgradedHolder(const UpgradedHolder& other) = delete;
414  UpgradedHolder& operator=(const UpgradedHolder& other) = delete;
415 
417  if (lock_) {
419  }
420  }
421 
422  void reset(RWSpinLock* lock = nullptr) {
423  if (lock == lock_) {
424  return;
425  }
426  if (lock_) {
428  }
429  lock_ = lock;
430  if (lock_) {
431  lock_->lock_upgrade();
432  }
433  }
434 
435  void swap(UpgradedHolder* other) {
436  using std::swap;
437  swap(lock_, other->lock_);
438  }
439 
440  private:
441  friend class WriteHolder;
442  friend class ReadHolder;
444  };
445 
446  class WriteHolder {
447  public:
448  explicit WriteHolder(RWSpinLock* lock) : lock_(lock) {
449  if (lock_) {
450  lock_->lock();
451  }
452  }
453 
454  explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) {
455  lock_->lock();
456  }
457 
458  // promoted from an upgrade lock holder
459  explicit WriteHolder(UpgradedHolder&& upgraded) {
460  lock_ = upgraded.lock_;
461  upgraded.lock_ = nullptr;
462  if (lock_) {
464  }
465  }
466 
468  other.lock_ = nullptr;
469  }
470 
472  using std::swap;
473  swap(lock_, other.lock_);
474  return *this;
475  }
476 
477  WriteHolder(const WriteHolder& other) = delete;
478  WriteHolder& operator=(const WriteHolder& other) = delete;
479 
481  if (lock_) {
482  lock_->unlock();
483  }
484  }
485 
486  void reset(RWSpinLock* lock = nullptr) {
487  if (lock == lock_) {
488  return;
489  }
490  if (lock_) {
491  lock_->unlock();
492  }
493  lock_ = lock;
494  if (lock_) {
495  lock_->lock();
496  }
497  }
498 
499  void swap(WriteHolder* other) {
500  using std::swap;
501  swap(lock_, other->lock_);
502  }
503 
504  private:
505  friend class ReadHolder;
506  friend class UpgradedHolder;
508  };
509 
510  private:
511  std::atomic<int32_t> bits_;
512 };
513 
514 #ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
515 // A more balanced Read-Write spin lock implemented based on GCC intrinsics.
516 
517 namespace detail {
518 template <size_t kBitWidth>
519 struct RWTicketIntTrait {
520  static_assert(
521  kBitWidth == 32 || kBitWidth == 64,
522  "bit width has to be either 32 or 64 ");
523 };
524 
525 template <>
526 struct RWTicketIntTrait<64> {
527  typedef uint64_t FullInt;
528  typedef uint32_t HalfInt;
529  typedef uint16_t QuarterInt;
530 
531 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
532  static __m128i make128(const uint16_t v[4]) {
533  return _mm_set_epi16(
534  0, 0, 0, 0, short(v[3]), short(v[2]), short(v[1]), short(v[0]));
535  }
536  static inline __m128i fromInteger(uint64_t from) {
537  return _mm_cvtsi64_si128(int64_t(from));
538  }
539  static inline uint64_t toInteger(__m128i in) {
540  return uint64_t(_mm_cvtsi128_si64(in));
541  }
542  static inline uint64_t addParallel(__m128i in, __m128i kDelta) {
543  return toInteger(_mm_add_epi16(in, kDelta));
544  }
545 #endif
546 };
547 
548 template <>
549 struct RWTicketIntTrait<32> {
550  typedef uint32_t FullInt;
551  typedef uint16_t HalfInt;
552  typedef uint8_t QuarterInt;
553 
554 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
555  static __m128i make128(const uint8_t v[4]) {
556  // clang-format off
557  return _mm_set_epi8(
558  0, 0, 0, 0,
559  0, 0, 0, 0,
560  0, 0, 0, 0,
561  char(v[3]), char(v[2]), char(v[1]), char(v[0]));
562  // clang-format on
563  }
564  static inline __m128i fromInteger(uint32_t from) {
565  return _mm_cvtsi32_si128(int32_t(from));
566  }
567  static inline uint32_t toInteger(__m128i in) {
568  return uint32_t(_mm_cvtsi128_si32(in));
569  }
570  static inline uint32_t addParallel(__m128i in, __m128i kDelta) {
571  return toInteger(_mm_add_epi8(in, kDelta));
572  }
573 #endif
574 };
575 } // namespace detail
576 
577 template <size_t kBitWidth, bool kFavorWriter = false>
578 class RWTicketSpinLockT {
579  typedef detail::RWTicketIntTrait<kBitWidth> IntTraitType;
580  typedef typename detail::RWTicketIntTrait<kBitWidth>::FullInt FullInt;
581  typedef typename detail::RWTicketIntTrait<kBitWidth>::HalfInt HalfInt;
582  typedef typename detail::RWTicketIntTrait<kBitWidth>::QuarterInt QuarterInt;
583 
584  union RWTicket {
585  constexpr RWTicket() : whole(0) {}
586  FullInt whole;
587  HalfInt readWrite;
588  __extension__ struct {
589  QuarterInt write;
590  QuarterInt read;
591  QuarterInt users;
592  };
593  } ticket;
594 
595  private: // Some x64-specific utilities for atomic access to ticket.
596  template <class T>
597  static T load_acquire(T* addr) {
598  T t = *addr; // acquire barrier
600  return t;
601  }
602 
603  template <class T>
604  static void store_release(T* addr, T v) {
606  *addr = v; // release barrier
607  }
608 
609  public:
610  constexpr RWTicketSpinLockT() {}
611 
612  RWTicketSpinLockT(RWTicketSpinLockT const&) = delete;
613  RWTicketSpinLockT& operator=(RWTicketSpinLockT const&) = delete;
614 
615  void lock() {
616  if (kFavorWriter) {
617  writeLockAggressive();
618  } else {
619  writeLockNice();
620  }
621  }
622 
623  /*
624  * Both try_lock and try_lock_shared diverge in our implementation from the
625  * lock algorithm described in the link above.
626  *
627  * In the read case, it is undesirable that the readers could wait
628  * for another reader (before increasing ticket.read in the other
629  * implementation). Our approach gives up on
630  * first-come-first-serve, but our benchmarks showed improve
631  * performance for both readers and writers under heavily contended
632  * cases, particularly when the number of threads exceeds the number
633  * of logical CPUs.
634  *
635  * We have writeLockAggressive() using the original implementation
636  * for a writer, which gives some advantage to the writer over the
637  * readers---for that path it is guaranteed that the writer will
638  * acquire the lock after all the existing readers exit.
639  */
640  bool try_lock() {
641  RWTicket t;
642  FullInt old = t.whole = load_acquire(&ticket.whole);
643  if (t.users != t.write) {
644  return false;
645  }
646  ++t.users;
647  return __sync_bool_compare_and_swap(&ticket.whole, old, t.whole);
648  }
649 
650  /*
651  * Call this if you want to prioritize writer to avoid starvation.
652  * Unlike writeLockNice, immediately acquires the write lock when
653  * the existing readers (arriving before the writer) finish their
654  * turns.
655  */
656  void writeLockAggressive() {
657  // std::this_thread::yield() is needed here to avoid a pathology if the
658  // number of threads attempting concurrent writes is >= the number of real
659  // cores allocated to this process. This is less likely than the
660  // corresponding situation in lock_shared(), but we still want to
661  // avoid it
662  uint_fast32_t count = 0;
663  QuarterInt val = __sync_fetch_and_add(&ticket.users, 1);
664  while (val != load_acquire(&ticket.write)) {
666  if (UNLIKELY(++count > 1000)) {
668  }
669  }
670  }
671 
672  // Call this when the writer should be nicer to the readers.
673  void writeLockNice() {
674  // Here it doesn't cpu-relax the writer.
675  //
676  // This is because usually we have many more readers than the
677  // writers, so the writer has less chance to get the lock when
678  // there are a lot of competing readers. The aggressive spinning
679  // can help to avoid starving writers.
680  //
681  // We don't worry about std::this_thread::yield() here because the caller
682  // has already explicitly abandoned fairness.
683  while (!try_lock()) {
684  }
685  }
686 
687  // Atomically unlock the write-lock from writer and acquire the read-lock.
688  void unlock_and_lock_shared() {
689  QuarterInt val = __sync_fetch_and_add(&ticket.read, 1);
690  }
691 
692  // Release writer permission on the lock.
693  void unlock() {
694  RWTicket t;
695  t.whole = load_acquire(&ticket.whole);
696 
697 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
698  FullInt old = t.whole;
699  // SSE2 can reduce the lock and unlock overhead by 10%
700  static const QuarterInt kDeltaBuf[4] = {1, 1, 0, 0}; // write/read/user
701  static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
702  __m128i m = IntTraitType::fromInteger(old);
703  t.whole = IntTraitType::addParallel(m, kDelta);
704 #else
705  ++t.read;
706  ++t.write;
707 #endif
708  store_release(&ticket.readWrite, t.readWrite);
709  }
710 
711  void lock_shared() {
712  // std::this_thread::yield() is important here because we can't grab the
713  // shared lock if there is a pending writeLockAggressive, so we
714  // need to let threads that already have a shared lock complete
715  uint_fast32_t count = 0;
716  while (!LIKELY(try_lock_shared())) {
718  if (UNLIKELY((++count & 1023) == 0)) {
720  }
721  }
722  }
723 
724  bool try_lock_shared() {
725  RWTicket t, old;
726  old.whole = t.whole = load_acquire(&ticket.whole);
727  old.users = old.read;
728 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
729  // SSE2 may reduce the total lock and unlock overhead by 10%
730  static const QuarterInt kDeltaBuf[4] = {0, 1, 1, 0}; // write/read/user
731  static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
732  __m128i m = IntTraitType::fromInteger(old.whole);
733  t.whole = IntTraitType::addParallel(m, kDelta);
734 #else
735  ++t.read;
736  ++t.users;
737 #endif
738  return __sync_bool_compare_and_swap(&ticket.whole, old.whole, t.whole);
739  }
740 
741  void unlock_shared() {
742  __sync_fetch_and_add(&ticket.write, 1);
743  }
744 
745  class WriteHolder;
746 
747  typedef RWTicketSpinLockT<kBitWidth, kFavorWriter> RWSpinLock;
748  class ReadHolder {
749  public:
750  ReadHolder(ReadHolder const&) = delete;
751  ReadHolder& operator=(ReadHolder const&) = delete;
752 
753  explicit ReadHolder(RWSpinLock* lock) : lock_(lock) {
754  if (lock_) {
755  lock_->lock_shared();
756  }
757  }
758 
759  explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) {
760  if (lock_) {
761  lock_->lock_shared();
762  }
763  }
764 
765  // atomically unlock the write-lock from writer and acquire the read-lock
766  explicit ReadHolder(WriteHolder* writer) : lock_(nullptr) {
767  std::swap(this->lock_, writer->lock_);
768  if (lock_) {
770  }
771  }
772 
773  ~ReadHolder() {
774  if (lock_) {
775  lock_->unlock_shared();
776  }
777  }
778 
779  void reset(RWSpinLock* lock = nullptr) {
780  if (lock_) {
781  lock_->unlock_shared();
782  }
783  lock_ = lock;
784  if (lock_) {
785  lock_->lock_shared();
786  }
787  }
788 
789  void swap(ReadHolder* other) {
790  std::swap(this->lock_, other->lock_);
791  }
792 
793  private:
794  RWSpinLock* lock_;
795  };
796 
797  class WriteHolder {
798  public:
799  WriteHolder(WriteHolder const&) = delete;
800  WriteHolder& operator=(WriteHolder const&) = delete;
801 
802  explicit WriteHolder(RWSpinLock* lock) : lock_(lock) {
803  if (lock_) {
804  lock_->lock();
805  }
806  }
807  explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) {
808  if (lock_) {
809  lock_->lock();
810  }
811  }
812 
813  ~WriteHolder() {
814  if (lock_) {
815  lock_->unlock();
816  }
817  }
818 
819  void reset(RWSpinLock* lock = nullptr) {
820  if (lock == lock_) {
821  return;
822  }
823  if (lock_) {
824  lock_->unlock();
825  }
826  lock_ = lock;
827  if (lock_) {
828  lock_->lock();
829  }
830  }
831 
832  void swap(WriteHolder* other) {
833  std::swap(this->lock_, other->lock_);
834  }
835 
836  private:
837  friend class ReadHolder;
838  RWSpinLock* lock_;
839  };
840 };
841 
842 typedef RWTicketSpinLockT<32> RWTicketSpinLock32;
843 typedef RWTicketSpinLockT<64> RWTicketSpinLock64;
844 
845 #endif // RW_SPINLOCK_USE_X86_INTRINSIC_
846 
847 } // namespace folly
848 
849 #ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
850 #undef RW_SPINLOCK_USE_X86_INTRINSIC_
851 #endif
std::atomic< int32_t > bits_
Definition: RWSpinLock.h:511
ReadHolder(WriteHolder &&writer)
Definition: RWSpinLock.h:338
ReadHolder(ReadHolder &&other) noexcept
Definition: RWSpinLock.h:326
void unlock_upgrade_and_lock_shared()
Definition: RWSpinLock.h:251
constexpr RWSpinLock()
Definition: RWSpinLock.h:185
auto v
void write(const T &in, folly::io::Appender &appender)
Definition: Types-inl.h:112
void unlock_and_lock_upgrade()
Definition: RWSpinLock.h:256
UpgradedHolder & operator=(UpgradedHolder &&other)
Definition: RWSpinLock.h:407
void reset(RWSpinLock *lock=nullptr)
Definition: RWSpinLock.h:422
UpgradedHolder(UpgradedHolder &&other) noexcept
Definition: RWSpinLock.h:403
WriteHolder(UpgradedHolder &&upgraded)
Definition: RWSpinLock.h:459
#define LIKELY(x)
Definition: Likely.h:47
RWSpinLock & operator=(RWSpinLock const &)=delete
double val
Definition: String.cpp:273
void unlock_shared()
Definition: RWSpinLock.h:216
folly::std T
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
WriteHolder(RWSpinLock &lock)
Definition: RWSpinLock.h:454
requires E e noexcept(noexcept(s.error(std::move(e))))
ReadHolder(RWSpinLock &lock)
Definition: RWSpinLock.h:322
bool try_lock_shared()
Definition: RWSpinLock.h:276
WriteHolder & operator=(WriteHolder &&other)
Definition: RWSpinLock.h:471
static constexpr StringPiece ticket
ReadHolder(UpgradedHolder &&upgraded)
Definition: RWSpinLock.h:331
void swap(WriteHolder *other)
Definition: RWSpinLock.h:499
void unlock_and_lock_shared()
Definition: RWSpinLock.h:221
size_t read(T &out, folly::io::Cursor &cursor)
Definition: Types-inl.h:258
void swap(UpgradedHolder *other)
Definition: RWSpinLock.h:435
static map< string, int > m
UpgradedHolder(RWSpinLock &lock)
Definition: RWSpinLock.h:391
WriteHolder(RWSpinLock *lock)
Definition: RWSpinLock.h:448
void expect(LineReader &lr, const char *expected)
UpgradedHolder(RWSpinLock *lock)
Definition: RWSpinLock.h:385
void reset(RWSpinLock *lock=nullptr)
Definition: RWSpinLock.h:360
PUSHMI_INLINE_VAR constexpr struct folly::pushmi::operators::from_fn from
void reset(RWSpinLock *lock=nullptr)
Definition: RWSpinLock.h:486
int * count
UpgradedHolder(WriteHolder &&writer)
Definition: RWSpinLock.h:395
void unlock_upgrade_and_lock()
Definition: RWSpinLock.h:241
WriteHolder(WriteHolder &&other) noexcept
Definition: RWSpinLock.h:467
uint64_t value(const typename LockFreeRingBuffer< T, Atom >::Cursor &rbcursor)
bool try_lock_upgrade()
Definition: RWSpinLock.h:295
void swap(SwapTrackingAlloc< T > &, SwapTrackingAlloc< T > &)
Definition: F14TestUtil.h:414
#define UNLIKELY(x)
Definition: Likely.h:48
void unlock_upgrade()
Definition: RWSpinLock.h:236
void swap(ReadHolder *other)
Definition: RWSpinLock.h:373
ThreadPoolListHook * addr
int32_t bits() const
Definition: RWSpinLock.h:306
ReadHolder(RWSpinLock *lock)
Definition: RWSpinLock.h:316
bool try_unlock_upgrade_and_lock()
Definition: RWSpinLock.h:288
void asm_volatile_memory()
Definition: Asm.h:29
ReadHolder & operator=(ReadHolder &&other)
Definition: RWSpinLock.h:345
void asm_volatile_pause()
Definition: Asm.h:37