proxygen
ThreadLocalDetail.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2015-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
18 
19 #include <list>
20 #include <mutex>
21 
22 constexpr auto kSmallGrowthFactor = 1.1;
23 constexpr auto kBigGrowthFactor = 1.7;
24 
25 namespace folly {
26 namespace threadlocal_detail {
27 
28 void ThreadEntryNode::initIfZero(bool locked) {
29  if (UNLIKELY(!next)) {
30  if (LIKELY(locked)) {
32  } else {
34  }
35  }
36 }
37 
39  // get the head prev and next nodes
40  ThreadEntryNode* hnode = &head->elements[id].node;
41 
42  // update current
43  next = head;
44  prev = hnode->prev;
45 
46  // hprev
47  ThreadEntryNode* hprev = &hnode->prev->elements[id].node;
48  hprev->next = parent;
49  hnode->prev = parent;
50 }
51 
53  if (LIKELY(prev != nullptr)) {
54  // get the prev and next nodes
55  ThreadEntryNode* nprev = &prev->elements[id].node;
56  ThreadEntryNode* nnext = &next->elements[id].node;
57 
58  // update the prev and next
59  nnext->prev = prev;
60  nprev->next = next;
61 
62  // set the prev and next to nullptr
63  next = prev = nullptr;
64  }
65 }
66 
67 StaticMetaBase::StaticMetaBase(ThreadEntry* (*threadEntry)(), bool strict)
68  : nextId_(1), threadEntry_(threadEntry), strict_(strict) {
69  head_.next = head_.prev = &head_;
70  int ret = pthread_key_create(&pthreadKey_, &onThreadExit);
71  checkPosixError(ret, "pthread_key_create failed");
73 }
74 
76 #ifdef FOLLY_TLD_USE_FOLLY_TLS
77  static FOLLY_TLS ThreadEntryList threadEntryListSingleton;
78  return &threadEntryListSingleton;
79 #else
80  class PthreadKey {
81  public:
82  PthreadKey() {
83  int ret = pthread_key_create(&pthreadKey_, nullptr);
84  checkPosixError(ret, "pthread_key_create failed");
86  }
87 
88  FOLLY_ALWAYS_INLINE pthread_key_t get() const {
89  return pthreadKey_;
90  }
91 
92  private:
93  pthread_key_t pthreadKey_;
94  };
95 
96  static auto instance = detail::createGlobal<PthreadKey, void>();
97 
98  ThreadEntryList* threadEntryList =
99  static_cast<ThreadEntryList*>(pthread_getspecific(instance->get()));
100 
101  if (UNLIKELY(!threadEntryList)) {
102  threadEntryList = new ThreadEntryList();
103  int ret = pthread_setspecific(instance->get(), threadEntryList);
104  checkPosixError(ret, "pthread_setspecific failed");
105  }
106 
107  return threadEntryList;
108 #endif
109 }
110 
112  auto threadEntry = static_cast<ThreadEntry*>(ptr);
113 
114  {
115  auto& meta = *threadEntry->meta;
116 
117  // Make sure this ThreadEntry is available if ThreadLocal A is accessed in
118  // ThreadLocal B destructor.
119  pthread_setspecific(meta.pthreadKey_, threadEntry);
121  if (meta.strict_) {
122  rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_);
123  }
124  {
125  std::lock_guard<std::mutex> g(meta.lock_);
126  // mark it as removed
127  threadEntry->removed_ = true;
128  meta.erase(&(*threadEntry));
129  auto elementsCapacity = threadEntry->getElementsCapacity();
130  for (size_t i = 0u; i < elementsCapacity; ++i) {
131  threadEntry->elements[i].node.eraseZero();
132  }
133  // No need to hold the lock any longer; the ThreadEntry is private to this
134  // thread now that it's been removed from meta.
135  }
136  // NOTE: User-provided deleter / object dtor itself may be using ThreadLocal
137  // with the same Tag, so dispose() calls below may (re)create some of the
138  // elements or even increase elementsCapacity, thus multiple cleanup rounds
139  // may be required.
140  for (bool shouldRun = true; shouldRun;) {
141  shouldRun = false;
142  auto elementsCapacity = threadEntry->getElementsCapacity();
144  if (threadEntry->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) {
145  threadEntry->elements[i].cleanup();
146  shouldRun = true;
147  }
148  }
149  }
150  pthread_setspecific(meta.pthreadKey_, nullptr);
151  }
152 
153  auto threadEntryList = threadEntry->list;
154  DCHECK_GT(threadEntryList->count, 0u);
155 
156  --threadEntryList->count;
157 
158  if (threadEntryList->count) {
159  return;
160  }
161 
162  // dispose all the elements
163  for (bool shouldRunOuter = true; shouldRunOuter;) {
164  shouldRunOuter = false;
165  auto tmp = threadEntryList->head;
166  while (tmp) {
167  auto& meta = *tmp->meta;
168  pthread_setspecific(meta.pthreadKey_, tmp);
170  if (meta.strict_) {
171  rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_);
172  }
173  for (bool shouldRunInner = true; shouldRunInner;) {
174  shouldRunInner = false;
175  auto elementsCapacity = tmp->getElementsCapacity();
177  if (tmp->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) {
178  tmp->elements[i].cleanup();
179  shouldRunInner = true;
180  shouldRunOuter = true;
181  }
182  }
183  }
184  pthread_setspecific(meta.pthreadKey_, nullptr);
185  tmp = tmp->listNext;
186  }
187  }
188 
189  // free the entry list
190  auto head = threadEntryList->head;
191  threadEntryList->head = nullptr;
192  while (head) {
193  auto tmp = head;
194  head = head->listNext;
195  if (tmp->elements) {
196  free(tmp->elements);
197  tmp->elements = nullptr;
198  tmp->setElementsCapacity(0);
199  }
200 
201 #ifndef FOLLY_TLD_USE_FOLLY_TLS
202  delete tmp;
203 #endif
204  }
205 
206 #ifndef FOLLY_TLD_USE_FOLLY_TLS
207  delete threadEntryList;
208 #endif
209 }
210 
212  ThreadEntry* threadEntry = (*threadEntry_)();
213 
214  return FOLLY_LIKELY(!!threadEntry) ? threadEntry->getElementsCapacity() : 0;
215 }
216 
218  uint32_t id;
219  auto& meta = *this;
220  std::lock_guard<std::mutex> g(meta.lock_);
221 
222  id = ent->value.load();
223  if (id != kEntryIDInvalid) {
224  return id;
225  }
226 
227  if (!meta.freeIds_.empty()) {
228  id = meta.freeIds_.back();
229  meta.freeIds_.pop_back();
230  } else {
231  id = meta.nextId_++;
232  }
233 
234  uint32_t old_id = ent->value.exchange(id);
235  DCHECK_EQ(old_id, kEntryIDInvalid);
236 
238 
239  return id;
240 }
241 
243  try {
244  auto& meta = *this;
245 
246  // Elements in other threads that use this id.
247  std::vector<ElementWrapper> elements;
248 
249  {
251  if (meta.strict_) {
252  /*
253  * In strict mode, the logic guarantees per-thread instances are
254  * destroyed by the moment ThreadLocal<> dtor returns.
255  * In order to achieve that, we should wait until concurrent
256  * onThreadExit() calls (that might acquire ownership over per-thread
257  * instances in order to destroy them) are finished.
258  */
259  wlock = SharedMutex::WriteHolder(meta.accessAllThreadsLock_);
260  }
261 
262  {
263  std::lock_guard<std::mutex> g(meta.lock_);
264  uint32_t id = ent->value.exchange(kEntryIDInvalid);
265  if (id == kEntryIDInvalid) {
266  return;
267  }
268 
269  auto& node = meta.head_.elements[id].node;
270  while (!node.empty()) {
271  auto* next = node.getNext();
272  next->eraseZero();
273 
274  ThreadEntry* e = next->parent;
276  if (id < elementsCapacity && e->elements[id].ptr) {
277  elements.push_back(e->elements[id]);
278 
279  /*
280  * Writing another thread's ThreadEntry from here is fine;
281  * the only other potential reader is the owning thread --
282  * from onThreadExit (which grabs the lock, so is properly
283  * synchronized with us) or from get(), which also grabs
284  * the lock if it needs to resize the elements vector.
285  *
286  * We can't conflict with reads for a get(id), because
287  * it's illegal to call get on a thread local that's
288  * destructing.
289  */
290  e->elements[id].ptr = nullptr;
291  e->elements[id].deleter1 = nullptr;
292  e->elements[id].ownsDeleter = false;
293  }
294  }
295  meta.freeIds_.push_back(id);
296  }
297  }
298  // Delete elements outside the locks.
299  for (ElementWrapper& elem : elements) {
300  if (elem.dispose(TLPDestructionMode::ALL_THREADS)) {
301  elem.cleanup();
302  }
303  }
304  } catch (...) { // Just in case we get a lock error or something anyway...
305  LOG(WARNING) << "Destructor discarding an exception that was thrown.";
306  }
307 }
308 
310  ThreadEntry* threadEntry,
311  uint32_t idval,
312  size_t& newCapacity) {
313  size_t prevCapacity = threadEntry->getElementsCapacity();
314 
315  // Growth factor < 2, see folly/docs/FBVector.md; + 5 to prevent
316  // very slow start.
317  auto smallCapacity = static_cast<size_t>((idval + 5) * kSmallGrowthFactor);
318  auto bigCapacity = static_cast<size_t>((idval + 5) * kBigGrowthFactor);
319 
320  newCapacity =
321  (threadEntry->meta &&
322  (bigCapacity <= threadEntry->meta->head_.getElementsCapacity()))
323  ? bigCapacity
324  : smallCapacity;
325 
326  assert(newCapacity > prevCapacity);
327  ElementWrapper* reallocated = nullptr;
328 
329  // Need to grow. Note that we can't call realloc, as elements is
330  // still linked in meta, so another thread might access invalid memory
331  // after realloc succeeds. We'll copy by hand and update our ThreadEntry
332  // under the lock.
333  if (usingJEMalloc()) {
334  bool success = false;
335  size_t newByteSize = nallocx(newCapacity * sizeof(ElementWrapper), 0);
336 
337  // Try to grow in place.
338  //
339  // Note that xallocx(MALLOCX_ZERO) will only zero newly allocated memory,
340  // even if a previous allocation allocated more than we requested.
341  // This is fine; we always use MALLOCX_ZERO with jemalloc and we
342  // always expand our allocation to the real size.
343  if (prevCapacity * sizeof(ElementWrapper) >= jemallocMinInPlaceExpandable) {
344  success =
345  (xallocx(threadEntry->elements, newByteSize, 0, MALLOCX_ZERO) ==
346  newByteSize);
347  }
348 
349  // In-place growth failed.
350  if (!success) {
351  success =
352  ((reallocated = static_cast<ElementWrapper*>(
353  mallocx(newByteSize, MALLOCX_ZERO))) != nullptr);
354  }
355 
356  if (success) {
357  // Expand to real size
358  assert(newByteSize / sizeof(ElementWrapper) >= newCapacity);
359  newCapacity = newByteSize / sizeof(ElementWrapper);
360  } else {
361  throw std::bad_alloc();
362  }
363  } else { // no jemalloc
364  // calloc() is simpler than malloc() followed by memset(), and
365  // potentially faster when dealing with a lot of memory, as it can get
366  // already-zeroed pages from the kernel.
367  reallocated = static_cast<ElementWrapper*>(
368  calloc(newCapacity, sizeof(ElementWrapper)));
369  if (!reallocated) {
370  throw std::bad_alloc();
371  }
372  }
373 
374  return reallocated;
375 }
376 
383  auto& meta = *this;
384  ThreadEntry* threadEntry = (*threadEntry_)();
385  size_t prevCapacity = threadEntry->getElementsCapacity();
386 
387  uint32_t idval = id->getOrAllocate(meta);
388  if (prevCapacity > idval) {
389  return;
390  }
391 
392  size_t newCapacity;
393  ElementWrapper* reallocated = reallocate(threadEntry, idval, newCapacity);
394 
395  // Success, update the entry
396  {
397  std::lock_guard<std::mutex> g(meta.lock_);
398 
399  if (prevCapacity == 0) {
400  meta.push_back(threadEntry);
401  }
402 
403  if (reallocated) {
404  /*
405  * Note: we need to hold the meta lock when copying data out of
406  * the old vector, because some other thread might be
407  * destructing a ThreadLocal and writing to the elements vector
408  * of this thread.
409  */
410  if (prevCapacity != 0) {
411  memcpy(
412  reallocated,
413  threadEntry->elements,
414  sizeof(*reallocated) * prevCapacity);
415  }
416  std::swap(reallocated, threadEntry->elements);
417  }
418 
419  for (size_t i = prevCapacity; i < newCapacity; i++) {
420  threadEntry->elements[i].node.initZero(threadEntry, i);
421  }
422 
423  threadEntry->setElementsCapacity(newCapacity);
424  }
425 
426  free(reallocated);
427 }
428 
430  if (head_.getElementsCapacity() <= id) {
431  size_t prevCapacity = head_.getElementsCapacity();
432  size_t newCapacity;
433  ElementWrapper* reallocated = reallocate(&head_, id, newCapacity);
434 
435  if (reallocated) {
436  if (prevCapacity != 0) {
437  memcpy(
438  reallocated, head_.elements, sizeof(*reallocated) * prevCapacity);
439  }
440  std::swap(reallocated, head_.elements);
441  }
442 
443  for (size_t i = prevCapacity; i < newCapacity; i++) {
445  }
446 
447  head_.setElementsCapacity(newCapacity);
448  free(reallocated);
449  }
450 }
451 
453  if (LIKELY(!t->removed_)) {
454  std::lock_guard<std::mutex> g(lock_);
455  auto* node = &t->elements[id].node;
456  node->push_back(&head_);
457  }
458 }
459 
461  if (LIKELY(!t->removed_)) {
462  auto* node = &t->elements[id].node;
463  node->push_back(&head_);
464  }
465 }
466 
469 } // namespace threadlocal_detail
470 } // namespace folly
void * ptr
constexpr auto kBigGrowthFactor
auto wlock(Synchronized< D, M > &synchronized, Args &&...args)
void pushBackUnlocked(ThreadEntry *t, uint32_t id)
#define FOLLY_ALWAYS_INLINE
Definition: CPortability.h:151
void pushBackLocked(ThreadEntry *t, uint32_t id)
bool usingJEMalloc() noexcept
Definition: Malloc.h:147
#define LIKELY(x)
Definition: Likely.h:47
#define MALLOCX_ZERO
Definition: Malloc.h:38
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
static ElementWrapper * reallocate(ThreadEntry *threadEntry, uint32_t idval, size_t &newCapacity)
#define FOLLY_STATIC_CTOR_PRIORITY_MAX
Definition: Portability.h:341
static const size_t jemallocMinInPlaceExpandable
Definition: Malloc.h:221
auto rlock(const Synchronized< Data, Mutex > &synchronized, Args &&...args)
void init(ThreadEntry *entry, uint32_t newId)
#define FOR_EACH_RANGE(i, begin, end)
Definition: Foreach.h:313
size_t(* xallocx)(void *, size_t, size_t, int)
Definition: MallocImpl.cpp:37
static FOLLY_EXPORT ThreadEntryList * getThreadEntryList()
void checkPosixError(int err, Args &&...args)
Definition: Exception.h:83
size_t getElementsCapacity() const noexcept
size_t(* nallocx)(size_t, int)
Definition: MallocImpl.cpp:41
StaticMetaBase(ThreadEntry *(*threadEntry)(), bool strict)
void free()
void *(* mallocx)(size_t, int)
Definition: MallocImpl.cpp:35
#define FOLLY_LIKELY(x)
Definition: Likely.h:35
constexpr uint32_t kEntryIDInvalid
g_t g(f_t)
constexpr auto kSmallGrowthFactor
void swap(SwapTrackingAlloc< T > &, SwapTrackingAlloc< T > &)
Definition: F14TestUtil.h:414
void initZero(ThreadEntry *entry, uint32_t newId)
#define UNLIKELY(x)
Definition: Likely.h:48
void setElementsCapacity(size_t capacity) noexcept
def next(obj)
Definition: ast.py:58