proxygen
AtomicSharedPtr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <folly/PackedSyncPtr.h>
22 #include <atomic>
23 #include <thread>
24 
25 /*
26  * This is an implementation of the std::atomic_shared_ptr TS
27  * http://en.cppreference.com/w/cpp/experimental/atomic_shared_ptr
28  * https://isocpp.org/files/papers/N4162.pdf
29  *
30  * AFAIK, the only other implementation is Anthony Williams from
31  * Just::thread library:
32  *
33  * https://bitbucket.org/anthonyw/atomic_shared_ptr
34  *
35  * implementation details:
36  *
37  * Basically, three things need to be atomically exchanged to make this work:
38  * * the local count
39  * * the pointer to the control block
40  * * the aliased pointer, if any.
41  *
42  * The Williams version does it with DWcas: 32 bits for local count, 64
43  * bits for control block ptr, and he changes the shared_ptr
44  * implementation to also store the aliased pointers using a linked list
45  * like structure, and provides 32-bit index accessors to them (like
46  * IndexedMemPool trick).
47  *
48  * This version instead stores the 48 bits of address, plus 16 bits of
49  * local count in a single 8byte pointer. This avoids 'lock cmpxchg16b',
50  * which is much slower than 'lock xchg' in the normal 'store' case. In
51  * the less-common aliased pointer scenaro, we just allocate it in a new
52  * block, and store a pointer to that instead.
53  *
54  * Note that even if we only want to use the 3-bits of pointer alignment,
55  * this trick should still work - Any more than 4 concurrent accesses
56  * will have to go to an external map count instead (slower, but lots of
57  * concurrent access will be slow anyway due to bouncing cachelines).
58  *
59  * As a perf optimization, we currently batch up local count and only
60  * move it global every once in a while. This means load() is usually
61  * only a single atomic operation, instead of 3. For this trick to work,
62  * we probably need at least 8 bits to make batching worth it.
63  */
64 
65 // A note on noexcept: If the pointer is an aliased pointer,
66 // store() will allocate. Otherwise is noexcept.
67 namespace folly {
68 
69 template <
70  typename T,
71  template <typename> class Atom = std::atomic,
72  typename CountedDetail = detail::shared_ptr_internals>
74  using SharedPtr = typename CountedDetail::template CountedPtr<T>;
75  using BasePtr = typename CountedDetail::counted_base;
77 
78  public:
80  init();
81  }
82  explicit atomic_shared_ptr(SharedPtr foo) /* noexcept */
83  : atomic_shared_ptr() {
84  store(std::move(foo));
85  }
86  atomic_shared_ptr(const atomic_shared_ptr<T>&) = delete;
87 
89  store(SharedPtr(nullptr));
90  }
91  void operator=(SharedPtr desired) /* noexcept */ {
92  store(std::move(desired));
93  }
94  void operator=(const atomic_shared_ptr<T>&) = delete;
95 
97  // lock free unless more than EXTERNAL_OFFSET threads are
98  // contending and they all get unlucky and scheduled out during
99  // load().
100  //
101  // TODO: Could use a lock-free external map to fix this
102  // corner case.
103  return true;
104  }
105 
106  SharedPtr load(std::memory_order order = std::memory_order_seq_cst) const
107  noexcept {
108  auto local = takeOwnedBase(order);
109  return get_shared_ptr(local, false);
110  }
111 
112  /* implicit */ operator SharedPtr() const {
113  return load();
114  }
115 
116  void store(
117  SharedPtr n,
118  std::memory_order order = std::memory_order_seq_cst) /* noexcept */ {
119  auto newptr = get_newptr(std::move(n));
120  auto old = ptr_.exchange(newptr, order);
121  release_external(old);
122  }
123 
125  SharedPtr n,
126  std::memory_order order = std::memory_order_seq_cst) /* noexcept */ {
127  auto newptr = get_newptr(std::move(n));
128  auto old = ptr_.exchange(newptr, order);
129 
130  SharedPtr old_ptr;
131 
132  if (old.get()) {
133  old_ptr = get_shared_ptr(old);
134  release_external(old);
135  }
136 
137  return old_ptr;
138  }
139 
141  SharedPtr& expected,
142  const SharedPtr& n,
143  std::memory_order mo = std::memory_order_seq_cst) noexcept {
144  return compare_exchange_weak(
145  expected, n, mo, detail::default_failure_memory_order(mo));
146  }
148  SharedPtr& expected,
149  const SharedPtr& n,
150  std::memory_order success,
151  std::memory_order failure) /* noexcept */ {
152  auto newptr = get_newptr(n);
153  PackedPtr oldptr, expectedptr;
154 
155  oldptr = takeOwnedBase(success);
156  if (!owners_eq(oldptr, CountedDetail::get_counted_base(expected))) {
157  expected = get_shared_ptr(oldptr, false);
158  release_external(newptr);
159  return false;
160  }
161  expectedptr = oldptr; // Need oldptr to release if failed
162  if (ptr_.compare_exchange_weak(expectedptr, newptr, success, failure)) {
163  if (oldptr.get()) {
164  release_external(oldptr, -1);
165  }
166  return true;
167  } else {
168  if (oldptr.get()) {
169  expected = get_shared_ptr(oldptr, false);
170  } else {
171  expected = SharedPtr(nullptr);
172  }
173  release_external(newptr);
174  return false;
175  }
176  }
178  SharedPtr& expected,
179  SharedPtr&& desired,
180  std::memory_order mo = std::memory_order_seq_cst) noexcept {
181  return compare_exchange_weak(
182  expected, desired, mo, detail::default_failure_memory_order(mo));
183  }
185  SharedPtr& expected,
186  SharedPtr&& desired,
187  std::memory_order success,
188  std::memory_order failure) /* noexcept */ {
189  return compare_exchange_weak(expected, desired, success, failure);
190  }
192  SharedPtr& expected,
193  const SharedPtr& n,
194  std::memory_order mo = std::memory_order_seq_cst) noexcept {
196  expected, n, mo, detail::default_failure_memory_order(mo));
197  }
199  SharedPtr& expected,
200  const SharedPtr& n,
201  std::memory_order success,
202  std::memory_order failure) /* noexcept */ {
203  auto local_expected = expected;
204  do {
205  if (compare_exchange_weak(expected, n, success, failure)) {
206  return true;
207  }
208  } while (local_expected == expected);
209 
210  return false;
211  }
213  SharedPtr& expected,
214  SharedPtr&& desired,
215  std::memory_order mo = std::memory_order_seq_cst) noexcept {
217  expected, desired, mo, detail::default_failure_memory_order(mo));
218  }
220  SharedPtr& expected,
221  SharedPtr&& desired,
222  std::memory_order success,
223  std::memory_order failure) /* noexcept */ {
224  return compare_exchange_strong(expected, desired, success, failure);
225  }
226 
227  private:
228  // Matches packed_sync_pointer. Must be > max number of local
229  // counts. This is the max number of threads that can access this
230  // atomic_shared_ptr at once before we start blocking.
231  static constexpr unsigned EXTERNAL_OFFSET{0x2000};
232  // Bit signifying aliased constructor
233  static constexpr unsigned ALIASED_PTR{0x4000};
234 
236 
237  void add_external(BasePtr* res, int64_t c = 0) const {
238  assert(res);
239  CountedDetail::inc_shared_count(res, EXTERNAL_OFFSET + c);
240  }
241  void release_external(PackedPtr& res, int64_t c = 0) const {
242  if (!res.get()) {
243  return;
244  }
245  int64_t count = get_local_count(res) + c;
247  assert(diff >= 0);
248  CountedDetail::template release_shared<T>(res.get(), diff);
249  }
250  PackedPtr get_newptr(const SharedPtr& n) const {
251  BasePtr* newval;
252  unsigned count = 0;
253  if (!n) {
254  newval = nullptr;
255  } else {
256  newval = CountedDetail::get_counted_base(n);
257  if (n.get() != CountedDetail::template get_shared_ptr<T>(newval)) {
258  // This is an aliased sharedptr. Make an un-aliased one
259  // by wrapping in *another* shared_ptr.
260  auto data = CountedDetail::template make_ptr<SharedPtr>(n);
261  newval = CountedDetail::get_counted_base(data);
262  count = ALIASED_PTR;
263  // (add external must happen before data goes out of scope)
264  add_external(newval);
265  } else {
266  add_external(newval);
267  }
268  }
269 
270  PackedPtr newptr;
271  newptr.init(newval, count);
272 
273  return newptr;
274  }
276  BasePtr* newval;
277  unsigned count = 0;
278  if (!n) {
279  newval = nullptr;
280  } else {
281  newval = CountedDetail::get_counted_base(n);
282  if (n.get() != CountedDetail::template get_shared_ptr<T>(newval)) {
283  // This is an aliased sharedptr. Make an un-aliased one
284  // by wrapping in *another* shared_ptr.
285  auto data = CountedDetail::template make_ptr<SharedPtr>(std::move(n));
286  newval = CountedDetail::get_counted_base(data);
287  count = ALIASED_PTR;
288  CountedDetail::release_ptr(data);
289  add_external(newval, -1);
290  } else {
291  CountedDetail::release_ptr(n);
292  add_external(newval, -1);
293  }
294  }
295 
296  PackedPtr newptr;
297  newptr.init(newval, count);
298 
299  return newptr;
300  }
301  void init() {
302  PackedPtr data;
303  data.init();
304  ptr_.store(data);
305  }
306 
307  unsigned int get_local_count(const PackedPtr& p) const {
308  return p.extra() & ~ALIASED_PTR;
309  }
310 
311  // Check pointer equality considering wrapped aliased pointers.
312  bool owners_eq(PackedPtr& p1, BasePtr* p2) {
313  bool aliased1 = p1.extra() & ALIASED_PTR;
314  if (aliased1) {
315  auto p1a = CountedDetail::template get_shared_ptr_from_counted_base<T>(
316  p1.get(), false);
317  return CountedDetail::get_counted_base(p1a) == p2;
318  }
319  return p1.get() == p2;
320  }
321 
322  SharedPtr get_shared_ptr(const PackedPtr& p, bool inc = true) const {
323  bool aliased = p.extra() & ALIASED_PTR;
324 
325  auto res = CountedDetail::template get_shared_ptr_from_counted_base<T>(
326  p.get(), inc);
327  if (aliased) {
328  auto aliasedp =
329  CountedDetail::template get_shared_ptr_from_counted_base<SharedPtr>(
330  p.get());
331  res = *aliasedp;
332  }
333  return res;
334  }
335 
336  /* Get a reference to the pointer, either from the local batch or
337  * from the global count.
338  *
339  * return is the base ptr, and the previous local count, if it is
340  * needed for compare_and_swap later.
341  */
342  PackedPtr takeOwnedBase(std::memory_order order) const noexcept {
343  PackedPtr local, newlocal;
344  local = ptr_.load(std::memory_order_acquire);
345  while (true) {
346  if (!local.get()) {
347  return local;
348  }
349  newlocal = local;
350  if (get_local_count(newlocal) + 1 > EXTERNAL_OFFSET) {
351  // spinlock in the rare case we have more than
352  // EXTERNAL_OFFSET threads trying to access at once.
354  // Force DeterministicSchedule to choose a different thread
355  local = ptr_.load(std::memory_order_acquire);
356  } else {
357  newlocal.setExtra(newlocal.extra() + 1);
358  assert(get_local_count(newlocal) > 0);
359  if (ptr_.compare_exchange_weak(local, newlocal, order)) {
360  break;
361  }
362  }
363  }
364 
365  // Check if we need to push a batch from local -> global
366  auto batchcount = EXTERNAL_OFFSET / 2;
367  if (get_local_count(newlocal) > batchcount) {
368  CountedDetail::inc_shared_count(newlocal.get(), batchcount);
369  putOwnedBase(newlocal.get(), batchcount, order);
370  }
371 
372  return newlocal;
373  }
374 
375  void putOwnedBase(BasePtr* p, unsigned int count, std::memory_order mo) const
376  noexcept {
377  PackedPtr local = ptr_.load(std::memory_order_acquire);
378  while (true) {
379  if (local.get() != p) {
380  break;
381  }
382  auto newlocal = local;
383  if (get_local_count(local) > count) {
384  newlocal.setExtra(local.extra() - count);
385  } else {
386  // Otherwise it may be the same pointer, but someone else won
387  // the compare_exchange below, local count was already made
388  // global. We decrement the global count directly instead of
389  // the local one.
390  break;
391  }
392  if (ptr_.compare_exchange_weak(local, newlocal, mo)) {
393  return;
394  }
395  }
396 
397  CountedDetail::template release_shared<T>(p, count);
398  }
399 };
400 
401 } // namespace folly
uint16_t extra() const
PackedPtr takeOwnedBase(std::memory_order order) const noexcept
void store(T v, std::memory_order mo=std::memory_order_seq_cst) noexcept
Definition: AtomicStruct.h:149
bool compare_exchange_strong(SharedPtr &expected, SharedPtr &&desired, std::memory_order success, std::memory_order failure)
void add_external(BasePtr *res, int64_t c=0) const
SharedPtr load(std::memory_order order=std::memory_order_seq_cst) const noexcept
void setExtra(uint16_t extra)
constexpr detail::Map< Move > move
Definition: Base-inl.h:2567
bool compare_exchange_weak(SharedPtr &expected, SharedPtr &&desired, std::memory_order success, std::memory_order failure)
SharedPtr exchange(SharedPtr n, std::memory_order order=std::memory_order_seq_cst)
typename CountedDetail::counted_base BasePtr
unsigned int get_local_count(const PackedPtr &p) const
AtomicStruct< PackedPtr, Atom > ptr_
folly::std T
T load(std::memory_order mo=std::memory_order_seq_cst) const noexcept
Definition: AtomicStruct.h:141
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
requires E e noexcept(noexcept(s.error(std::move(e))))
bool compare_exchange_weak(SharedPtr &expected, const SharedPtr &n, std::memory_order success, std::memory_order failure)
void operator=(SharedPtr desired)
typename CountedDetail::template CountedPtr< T > SharedPtr
void init(T *initialPtr=nullptr, uint16_t initialExtra=0)
Definition: PackedSyncPtr.h:77
void putOwnedBase(BasePtr *p, unsigned int count, std::memory_order mo) const noexcept
std::memory_order default_failure_memory_order(std::memory_order successMode)
Definition: AtomicUtils.h:24
#define Atom
constexpr auto data(C &c) -> decltype(c.data())
Definition: Access.h:71
atomic_shared_ptr(SharedPtr foo)
bool compare_exchange_strong(SharedPtr &expected, const SharedPtr &n, std::memory_order mo=std::memory_order_seq_cst) noexcept
bool compare_exchange_weak(SharedPtr &expected, const SharedPtr &n, std::memory_order mo=std::memory_order_seq_cst) noexcept
static constexpr unsigned ALIASED_PTR
SharedPtr get_shared_ptr(const PackedPtr &p, bool inc=true) const
bool owners_eq(PackedPtr &p1, BasePtr *p2)
static constexpr unsigned EXTERNAL_OFFSET
int * count
bool compare_exchange_weak(SharedPtr &expected, SharedPtr &&desired, std::memory_order mo=std::memory_order_seq_cst) noexcept
uint64_t diff(uint64_t a, uint64_t b)
Definition: FutexTest.cpp:135
bool compare_exchange_weak(T &v0, T v1, std::memory_order mo=std::memory_order_seq_cst) noexcept
Definition: AtomicStruct.h:113
bool is_lock_free() const noexcept
const
Definition: upload.py:398
void store(SharedPtr n, std::memory_order order=std::memory_order_seq_cst)
void release_external(PackedPtr &res, int64_t c=0) const
int order
char c
bool compare_exchange_strong(SharedPtr &expected, SharedPtr &&desired, std::memory_order mo=std::memory_order_seq_cst) noexcept
bool compare_exchange_strong(SharedPtr &expected, const SharedPtr &n, std::memory_order success, std::memory_order failure)
PackedPtr get_newptr(SharedPtr &&n) const
PackedPtr get_newptr(const SharedPtr &n) const