proxygen
MemoryMapping.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2013-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <algorithm>
20 #include <functional>
21 #include <utility>
22 
23 #include <folly/Format.h>
26 
27 #ifdef __linux__
29 #endif
30 
31 #include <fcntl.h>
32 #include <sys/types.h>
33 #include <system_error>
34 
35 static constexpr ssize_t kDefaultMlockChunkSize =
36 #ifndef _MSC_VER
37  // Linux implementations of unmap/mlock/munlock take a kernel
38  // semaphore and block other threads from doing other memory
39  // operations. Split the operations in chunks.
40  (1 << 20) // 1MB
41 #else // _MSC_VER
42  // MSVC doesn't have this problem, and calling munmap many times
43  // with the same address is a bad idea with the windows implementation.
44  (-1)
45 #endif // _MSC_VER
46  ;
47 
49  mlock_chunk_size,
51  "Maximum bytes to mlock/munlock/munmap at once "
52  "(will be rounded up to PAGESIZE). Ignored if negative.");
53 
54 #ifndef MAP_POPULATE
55 #define MAP_POPULATE 0
56 #endif
57 
58 namespace folly {
59 
61  swap(other);
62 }
63 
65  File file,
66  off_t offset,
67  off_t length,
68  Options options)
69  : file_(std::move(file)), options_(std::move(options)) {
70  CHECK(file_);
71  init(offset, length);
72 }
73 
75  const char* name,
76  off_t offset,
77  off_t length,
78  Options options)
79  : MemoryMapping(
80  File(name, options.writable ? O_RDWR : O_RDONLY),
81  offset,
82  length,
83  options) {}
84 
86  int fd,
87  off_t offset,
88  off_t length,
89  Options options)
90  : MemoryMapping(File(fd), offset, length, options) {}
91 
93  : options_(std::move(options)) {
94  init(0, length);
95 }
96 
97 namespace {
98 
99 #ifdef __linux__
100 void getDeviceOptions(dev_t device, off_t& pageSize, bool& autoExtend) {
101  auto ps = getHugePageSizeForDevice(device);
102  if (ps) {
103  pageSize = ps->size;
104  autoExtend = true;
105  }
106 }
107 #else
108 inline void getDeviceOptions(dev_t, off_t&, bool&) {}
109 #endif
110 
111 } // namespace
112 
113 void MemoryMapping::init(off_t offset, off_t length) {
114  const bool grow = options_.grow;
115  const bool anon = !file_;
116  CHECK(!(grow && anon));
117 
118  off_t& pageSize = options_.pageSize;
119 
120  struct stat st;
121 
122  // On Linux, hugetlbfs file systems don't require ftruncate() to grow the
123  // file, and (on kernels before 2.6.24) don't even allow it. Also, the file
124  // size is always a multiple of the page size.
125  bool autoExtend = false;
126 
127  if (!anon) {
128  // Stat the file
129  CHECK_ERR(fstat(file_.fd(), &st));
130 
131  if (pageSize == 0) {
132  getDeviceOptions(st.st_dev, pageSize, autoExtend);
133  }
134  } else {
135  DCHECK(!file_);
136  DCHECK_EQ(offset, 0);
137  CHECK_EQ(pageSize, 0);
138  CHECK_GE(length, 0);
139  }
140 
141  if (pageSize == 0) {
142  pageSize = off_t(sysconf(_SC_PAGESIZE));
143  }
144 
145  CHECK_GT(pageSize, 0);
146  CHECK_EQ(pageSize & (pageSize - 1), 0); // power of two
147  CHECK_GE(offset, 0);
148 
149  // Round down the start of the mapped region
150  off_t skipStart = offset % pageSize;
151  offset -= skipStart;
152 
153  mapLength_ = length;
154  if (mapLength_ != -1) {
155  mapLength_ += skipStart;
156 
157  // Round up the end of the mapped region
158  mapLength_ = (mapLength_ + pageSize - 1) / pageSize * pageSize;
159  }
160 
161  off_t remaining = anon ? length : st.st_size - offset;
162 
163  if (mapLength_ == -1) {
164  length = mapLength_ = remaining;
165  } else {
166  if (length > remaining) {
167  if (grow) {
168  if (!autoExtend) {
169  PCHECK(0 == ftruncate(file_.fd(), offset + length))
170  << "ftruncate() failed, couldn't grow file to "
171  << offset + length;
172  remaining = length;
173  } else {
174  // Extend mapping to multiple of page size, don't use ftruncate
175  remaining = mapLength_;
176  }
177  } else {
178  length = remaining;
179  }
180  }
181  if (mapLength_ > remaining) {
182  mapLength_ = remaining;
183  }
184  }
185 
186  if (length == 0) {
187  mapLength_ = 0;
188  mapStart_ = nullptr;
189  } else {
190  int flags = options_.shared ? MAP_SHARED : MAP_PRIVATE;
191  if (anon) {
192  flags |= MAP_ANONYMOUS;
193  }
194  if (options_.prefault) {
195  flags |= MAP_POPULATE;
196  }
197 
198  // The standard doesn't actually require PROT_NONE to be zero...
199  int prot = PROT_NONE;
201  prot =
202  ((options_.readable ? PROT_READ : 0) |
203  (options_.writable ? PROT_WRITE : 0));
204  }
205 
206  unsigned char* start = static_cast<unsigned char*>(mmap(
207  options_.address, size_t(mapLength_), prot, flags, file_.fd(), offset));
208  PCHECK(start != MAP_FAILED)
209  << " offset=" << offset << " length=" << mapLength_;
210  mapStart_ = start;
211  data_.reset(start + skipStart, size_t(length));
212  }
213 }
214 
215 namespace {
216 
217 off_t memOpChunkSize(off_t length, off_t pageSize) {
218  off_t chunkSize = length;
219  if (FLAGS_mlock_chunk_size <= 0) {
220  return chunkSize;
221  }
222 
223  chunkSize = off_t(FLAGS_mlock_chunk_size);
224  off_t r = chunkSize % pageSize;
225  if (r) {
226  chunkSize += (pageSize - r);
227  }
228  return chunkSize;
229 }
230 
238 bool memOpInChunks(
239  std::function<int(void*, size_t)> op,
240  void* mem,
241  size_t bufSize,
242  off_t pageSize,
243  size_t& amountSucceeded) {
244  // Linux' unmap/mlock/munlock take a kernel semaphore and block other threads
245  // from doing other memory operations. If the size of the buffer is big the
246  // semaphore can be down for seconds (for benchmarks see
247  // http://kostja-osipov.livejournal.com/42963.html). Doing the operations in
248  // chunks breaks the locking into intervals and lets other threads do memory
249  // operations of their own.
250 
251  size_t chunkSize = size_t(memOpChunkSize(off_t(bufSize), pageSize));
252 
253  char* addr = static_cast<char*>(mem);
254  amountSucceeded = 0;
255 
256  while (amountSucceeded < bufSize) {
257  size_t size = std::min(chunkSize, bufSize - amountSucceeded);
258  if (op(addr + amountSucceeded, size) != 0) {
259  return false;
260  }
261  amountSucceeded += size;
262  }
263 
264  return true;
265 }
266 
267 } // namespace
268 
270  size_t amountSucceeded = 0;
271  locked_ = memOpInChunks(
272  ::mlock,
273  mapStart_,
274  size_t(mapLength_),
276  amountSucceeded);
277  if (locked_) {
278  return true;
279  }
280 
281  auto msg =
282  folly::format("mlock({}) failed at {}", mapLength_, amountSucceeded);
283  if (lock == LockMode::TRY_LOCK && errno == EPERM) {
284  PLOG(WARNING) << msg;
285  } else if (lock == LockMode::TRY_LOCK && errno == ENOMEM) {
286  VLOG(1) << msg;
287  } else {
288  PLOG(FATAL) << msg;
289  }
290 
291  // only part of the buffer was mlocked, unlock it back
292  if (!memOpInChunks(
293  ::munlock,
294  mapStart_,
295  amountSucceeded,
297  amountSucceeded)) {
298  PLOG(WARNING) << "munlock()";
299  }
300 
301  return false;
302 }
303 
304 void MemoryMapping::munlock(bool dontneed) {
305  if (!locked_) {
306  return;
307  }
308 
309  size_t amountSucceeded = 0;
310  if (!memOpInChunks(
311  ::munlock,
312  mapStart_,
313  size_t(mapLength_),
315  amountSucceeded)) {
316  PLOG(WARNING) << "munlock()";
317  }
318  if (mapLength_ && dontneed &&
319  ::madvise(mapStart_, size_t(mapLength_), MADV_DONTNEED)) {
320  PLOG(WARNING) << "madvise()";
321  }
322  locked_ = false;
323 }
324 
326  advise(MADV_SEQUENTIAL);
327 }
328 
330  if (mapLength_) {
331  size_t amountSucceeded = 0;
332  if (!memOpInChunks(
333  ::munmap,
334  mapStart_,
335  size_t(mapLength_),
337  amountSucceeded)) {
338  PLOG(FATAL) << folly::format(
339  "munmap({}) failed at {}", mapLength_, amountSucceeded);
340  }
341  }
342 }
343 
344 void MemoryMapping::advise(int advice) const {
345  advise(advice, 0, size_t(mapLength_));
346 }
347 
348 void MemoryMapping::advise(int advice, size_t offset, size_t length) const {
349  CHECK_LE(offset + length, size_t(mapLength_))
350  << " offset: " << offset << " length: " << length
351  << " mapLength_: " << mapLength_;
352 
353  // Include the entire start page: round down to page boundary.
354  const auto offMisalign = offset % options_.pageSize;
355  offset -= offMisalign;
356  length += offMisalign;
357 
358  // Round the last page down to page boundary.
359  if (offset + length != size_t(mapLength_)) {
360  length -= length % options_.pageSize;
361  }
362 
363  if (length == 0) {
364  return;
365  }
366 
367  char* mapStart = static_cast<char*>(mapStart_) + offset;
368  PLOG_IF(WARNING, ::madvise(mapStart, length, advice)) << "madvise";
369 }
370 
372  swap(other);
373  return *this;
374 }
375 
377  using std::swap;
378  swap(this->file_, other.file_);
379  swap(this->mapStart_, other.mapStart_);
380  swap(this->mapLength_, other.mapLength_);
381  swap(this->options_, other.options_);
382  swap(this->locked_, other.locked_);
383  swap(this->data_, other.data_);
384 }
385 
387  a.swap(b);
388 }
389 
390 void alignedForwardMemcpy(void* dst, const void* src, size_t size) {
391  assert(reinterpret_cast<uintptr_t>(src) % alignof(unsigned long) == 0);
392  assert(reinterpret_cast<uintptr_t>(dst) % alignof(unsigned long) == 0);
393 
394  auto srcl = static_cast<const unsigned long*>(src);
395  auto dstl = static_cast<unsigned long*>(dst);
396 
397  while (size >= sizeof(unsigned long)) {
398  *dstl++ = *srcl++;
399  size -= sizeof(unsigned long);
400  }
401 
402  auto srcc = reinterpret_cast<const unsigned char*>(srcl);
403  auto dstc = reinterpret_cast<unsigned char*>(dstl);
404 
405  while (size != 0) {
406  *dstc++ = *srcc++;
407  --size;
408  }
409 }
410 
411 void mmapFileCopy(const char* src, const char* dest, mode_t mode) {
412  MemoryMapping srcMap(src);
413  srcMap.hintLinearScan();
414 
415  MemoryMapping destMap(
416  File(dest, O_RDWR | O_CREAT | O_TRUNC, mode),
417  0,
418  off_t(srcMap.range().size()),
420 
422  destMap.writableRange().data(),
423  srcMap.range().data(),
424  srcMap.range().size());
425 }
426 
427 } // namespace folly
void swap(MemoryMapping &other) noexcept
flags
Definition: http_parser.h:127
DEFINE_int64(mlock_chunk_size, kDefaultMlockChunkSize,"Maximum bytes to mlock/munlock/munmap at once ""(will be rounded up to PAGESIZE). Ignored if negative.")
char b
void swap(MemoryMapping &a, MemoryMapping &b) noexcept
constexpr detail::Map< Move > move
Definition: Base-inl.h:2567
dest
Definition: upload.py:394
STL namespace.
constexpr size_type size() const
Definition: Range.h:431
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
requires E e noexcept(noexcept(s.error(std::move(e))))
const HugePageSize * getHugePageSizeForDevice(dev_t device)
Definition: HugePages.cpp:219
void alignedForwardMemcpy(void *dst, const void *src, size_t size)
bool mlock(LockMode lock)
folly::Optional< PskKeyExchangeMode > mode
const char * name
Definition: http_parser.c:437
constexpr auto size(C const &c) -> decltype(c.size())
Definition: Access.h:45
LogLevel min
Definition: LogLevel.cpp:30
int fd() const
Definition: File.h:85
MutableByteRange data_
constexpr Iter data() const
Definition: Range.h:446
#define MAP_POPULATE
void advise(int advice) const
static constexpr ssize_t kDefaultMlockChunkSize
auto lock(Synchronized< D, M > &synchronized, Args &&...args)
static Options writable()
char a
auto start
void munlock(bool dontneed=false)
Formatter< false, Args... > format(StringPiece fmt, Args &&...args)
Definition: Format.h:271
MemoryMapping & operator=(MemoryMapping)
ByteRange range() const
void init(off_t offset, off_t length)
ThreadPoolListHook * addr
void mmapFileCopy(const char *src, const char *dest, mode_t mode)
void reset(Iter start, size_type size)
Definition: Range.h:421