proxygen
BitVectorCoding.h
Go to the documentation of this file.
1 /*
2  * Copyright 2015-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cstdlib>
20 #include <limits>
21 #include <type_traits>
22 
23 #include <folly/Likely.h>
24 #include <folly/Portability.h>
25 #include <folly/Range.h>
30 #include <folly/lang/Bits.h>
31 #include <glog/logging.h>
32 
33 #if !FOLLY_X64
34 #error BitVectorCoding.h requires x86_64
35 #endif
36 
37 namespace folly {
38 namespace compression {
39 
40 static_assert(kIsLittleEndian, "BitVectorCoding.h requires little endianness");
41 
42 template <class Pointer>
44  BitVectorCompressedListBase() = default;
45 
46  template <class OtherPointer>
49  : size(other.size),
50  upperBound(other.upperBound),
51  data(other.data),
52  bits(reinterpret_cast<Pointer>(other.bits)),
53  skipPointers(reinterpret_cast<Pointer>(other.skipPointers)),
54  forwardPointers(reinterpret_cast<Pointer>(other.forwardPointers)) {}
55 
56  template <class T = Pointer>
57  auto free() -> decltype(::free(T(nullptr))) {
59  }
60 
61  size_t size = 0;
62  size_t upperBound = 0;
63 
65 
66  Pointer bits = nullptr;
67  Pointer skipPointers = nullptr;
68  Pointer forwardPointers = nullptr;
69 };
70 
73 
74 template <
75  class Value,
76  class SkipValue,
77  size_t kSkipQuantum = 0,
78  size_t kForwardQuantum = 0>
80  static_assert(
82  "Value should be unsigned integral");
83 
84  typedef BitVectorCompressedList CompressedList;
85  typedef MutableBitVectorCompressedList MutableCompressedList;
86 
87  typedef Value ValueType;
88  typedef SkipValue SkipValueType;
89  struct Layout;
90 
91  static constexpr size_t skipQuantum = kSkipQuantum;
92  static constexpr size_t forwardQuantum = kForwardQuantum;
93 
94  template <class RandomAccessIterator>
95  static MutableCompressedList encode(
96  RandomAccessIterator begin,
97  RandomAccessIterator end) {
98  if (begin == end) {
99  return MutableCompressedList();
100  }
101  BitVectorEncoder encoder(size_t(end - begin), *(end - 1));
102  for (; begin != end; ++begin) {
103  encoder.add(*begin);
104  }
105  return encoder.finish();
106  }
107 
108  explicit BitVectorEncoder(const MutableCompressedList& result)
109  : bits_(result.bits),
110  skipPointers_(result.skipPointers),
111  forwardPointers_(result.forwardPointers),
112  result_(result) {
113  memset(result.data.data(), 0, result.data.size());
114  }
115 
116  BitVectorEncoder(size_t size, ValueType upperBound)
118  Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {}
119 
120  void add(ValueType value) {
121  CHECK_LT(value, std::numeric_limits<ValueType>::max());
122  // Also works when lastValue_ == -1.
123  CHECK_GT(value + 1, lastValue_ + 1)
124  << "BitVectorCoding only supports stricly monotone lists";
125 
126  auto block = bits_ + (value / 64) * sizeof(uint64_t);
127  size_t inner = value % 64;
129  reinterpret_cast<folly::Unaligned<uint64_t>*>(block), inner);
130 
131  if (skipQuantum != 0) {
132  size_t nextSkipPointerSize = value / skipQuantum;
133  while (skipPointersSize_ < nextSkipPointerSize) {
134  auto pos = skipPointersSize_++;
135  folly::storeUnaligned<SkipValueType>(
136  skipPointers_ + pos * sizeof(SkipValueType), size_);
137  }
138  }
139 
140  if (forwardQuantum != 0) {
141  if (size_ != 0 && (size_ % forwardQuantum == 0)) {
142  const auto pos = size_ / forwardQuantum - 1;
143  folly::storeUnaligned<SkipValueType>(
144  forwardPointers_ + pos * sizeof(SkipValueType), value);
145  }
146  }
147 
148  lastValue_ = value;
149  ++size_;
150  }
151 
152  const MutableCompressedList& finish() const {
153  CHECK_EQ(size_, result_.size);
154  // TODO(ott): Relax this assumption.
155  CHECK_EQ(result_.upperBound, lastValue_);
156  return result_;
157  }
158 
159  private:
160  uint8_t* const bits_ = nullptr;
161  uint8_t* const skipPointers_ = nullptr;
162  uint8_t* const forwardPointers_ = nullptr;
163 
164  ValueType lastValue_ = -1;
165  size_t size_ = 0;
166  size_t skipPointersSize_ = 0;
167 
168  MutableCompressedList result_;
169 };
170 
171 template <
172  class Value,
173  class SkipValue,
174  size_t kSkipQuantum,
175  size_t kForwardQuantum>
176 struct BitVectorEncoder<Value, SkipValue, kSkipQuantum, kForwardQuantum>::
177  Layout {
178  static Layout fromUpperBoundAndSize(size_t upperBound, size_t size) {
179  Layout layout;
180  layout.size = size;
181  layout.upperBound = upperBound;
182 
183  size_t bitVectorSizeInBytes = (upperBound / 8) + 1;
184  layout.bits = bitVectorSizeInBytes;
185 
186  if (skipQuantum != 0) {
187  size_t numSkipPointers = upperBound / skipQuantum;
188  layout.skipPointers = numSkipPointers * sizeof(SkipValueType);
189  }
190  if (forwardQuantum != 0) {
191  size_t numForwardPointers = size / forwardQuantum;
192  layout.forwardPointers = numForwardPointers * sizeof(SkipValueType);
193  }
194 
195  CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
196 
197  return layout;
198  }
199 
200  size_t bytes() const {
201  return bits + skipPointers + forwardPointers;
202  }
203 
204  template <class Range>
206  Range& buf) const {
208  result.size = size;
209  result.upperBound = upperBound;
210  result.data = buf.subpiece(0, bytes());
211  auto advance = [&](size_t n) {
212  auto begin = buf.data();
213  buf.advance(n);
214  return begin;
215  };
216 
217  result.bits = advance(bits);
220  CHECK_EQ(buf.data() - result.data.data(), bytes());
221 
222  return result;
223  }
224 
226  uint8_t* buf = nullptr;
227  if (size > 0) {
228  buf = static_cast<uint8_t*>(malloc(bytes() + 7));
229  }
230  folly::MutableByteRange bufRange(buf, bytes());
231  return openList(bufRange);
232  }
233 
234  size_t size = 0;
235  size_t upperBound = 0;
236 
237  // Sizes in bytes.
238  size_t bits = 0;
239  size_t skipPointers = 0;
240  size_t forwardPointers = 0;
241 };
242 
243 template <
244  class Encoder,
245  class Instructions = instructions::Default,
246  bool kUnchecked = false>
247 class BitVectorReader : detail::ForwardPointers<Encoder::forwardQuantum>,
248  detail::SkipPointers<Encoder::skipQuantum> {
249  public:
251  typedef typename Encoder::ValueType ValueType;
252  // A bitvector can only be as large as its largest value.
253  typedef typename Encoder::ValueType SizeType;
254  typedef typename Encoder::SkipValueType SkipValueType;
255 
256  explicit BitVectorReader(const typename Encoder::CompressedList& list)
257  : detail::ForwardPointers<Encoder::forwardQuantum>(list.forwardPointers),
258  detail::SkipPointers<Encoder::skipQuantum>(list.skipPointers),
259  bits_(list.bits),
260  size_(list.size),
261  upperBound_(
262  (kUnchecked || UNLIKELY(list.size == 0)) ? 0 : list.upperBound) {
263  reset();
264  }
265 
266  void reset() {
267  block_ = (bits_ != nullptr) ? folly::loadUnaligned<uint64_t>(bits_) : 0;
268  outer_ = 0;
269  position_ = -1;
270  value_ = kInvalidValue;
271  }
272 
273  bool next() {
274  if (!kUnchecked && UNLIKELY(position() + 1 >= size_)) {
275  return setDone();
276  }
277 
278  while (block_ == 0) {
279  outer_ += sizeof(uint64_t);
280  block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
281  }
282 
283  ++position_;
284  auto inner = Instructions::ctz(block_);
285  block_ = Instructions::blsr(block_);
286 
287  return setValue(inner);
288  }
289 
290  bool skip(SizeType n) {
291  CHECK_GT(n, 0);
292 
293  if (!kUnchecked && position() + n >= size_) {
294  return setDone();
295  }
296  // Small skip optimization.
297  if (LIKELY(n < kLinearScanThreshold)) {
298  for (size_t i = 0; i < n; ++i) {
299  next();
300  }
301  return true;
302  }
303 
304  position_ += n;
305 
306  // Use forward pointer.
307  if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) {
308  const size_t steps = position_ / Encoder::forwardQuantum;
309  const size_t dest = folly::loadUnaligned<SkipValueType>(
310  this->forwardPointers_ + (steps - 1) * sizeof(SkipValueType));
311 
312  reposition(dest);
313  n = position_ + 1 - steps * Encoder::forwardQuantum;
314  }
315 
316  size_t cnt;
317  // Find necessary block.
318  while ((cnt = Instructions::popcount(block_)) < n) {
319  n -= cnt;
320  outer_ += sizeof(uint64_t);
321  block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
322  }
323 
324  // Skip to the n-th one in the block.
325  DCHECK_GT(n, 0);
326  auto inner = select64<Instructions>(block_, n - 1);
327  block_ &= (uint64_t(-1) << inner) << 1;
328 
329  return setValue(inner);
330  }
331 
332  bool skipTo(ValueType v) {
333  // Also works when value_ == kInvalidValue.
334  if (v != kInvalidValue) {
335  DCHECK_GE(v + 1, value_ + 1);
336  }
337 
338  if (!kUnchecked && v > upperBound_) {
339  return setDone();
340  } else if (v == value_) {
341  return true;
342  }
343 
344  // Small skip optimization.
345  if (v - value_ < kLinearScanThreshold) {
346  do {
347  next();
348  } while (value() < v);
349 
350  return true;
351  }
352 
353  if (Encoder::skipQuantum > 0 && v - value_ > Encoder::skipQuantum) {
354  size_t q = v / Encoder::skipQuantum;
355  auto skipPointer = folly::loadUnaligned<SkipValueType>(
356  this->skipPointers_ + (q - 1) * sizeof(SkipValueType));
357  position_ = static_cast<SizeType>(skipPointer) - 1;
358 
359  reposition(q * Encoder::skipQuantum);
360  }
361 
362  // Find the value.
363  size_t outer = v / 64 * 8;
364 
365  while (outer_ < outer) {
366  position_ += Instructions::popcount(block_);
367  outer_ += sizeof(uint64_t);
368  block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
369  }
370 
371  DCHECK_EQ(outer_, outer);
372  uint64_t mask = ~((uint64_t(1) << (v % 64)) - 1);
373  position_ += Instructions::popcount(block_ & ~mask) + 1;
374  block_ &= mask;
375 
376  while (block_ == 0) {
377  outer_ += sizeof(uint64_t);
378  block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
379  }
380 
381  auto inner = Instructions::ctz(block_);
382  block_ = Instructions::blsr(block_);
383 
384  setValue(inner);
385  return true;
386  }
387 
388  SizeType size() const {
389  return size_;
390  }
391 
392  bool valid() const {
393  return position() < size(); // Also checks that position() != -1.
394  }
395 
396  SizeType position() const {
397  return position_;
398  }
399  ValueType value() const {
400  DCHECK(valid());
401  return value_;
402  }
403 
404  bool jump(SizeType n) {
405  reset();
406  return skip(n + 1);
407  }
408 
409  bool jumpTo(ValueType v) {
410  reset();
411  return skipTo(v);
412  }
413 
414  bool setDone() {
415  value_ = kInvalidValue;
416  position_ = size_;
417  return false;
418  }
419 
420  private:
421  constexpr static ValueType kInvalidValue =
422  std::numeric_limits<ValueType>::max(); // Must hold kInvalidValue + 1 ==
423  // 0.
424 
425  bool setValue(size_t inner) {
426  value_ = static_cast<ValueType>(8 * outer_ + inner);
427  return true;
428  }
429 
430  void reposition(size_t dest) {
431  outer_ = dest / 64 * 8;
432  // We maintain the invariant that outer_ is divisible by 8.
433  block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
434  block_ &= ~((uint64_t(1) << (dest % 64)) - 1);
435  }
436 
437  constexpr static size_t kLinearScanThreshold = 4;
438 
439  const uint8_t* const bits_;
441  SizeType outer_;
442  SizeType position_;
443  ValueType value_;
444 
445  const SizeType size_;
446  const ValueType upperBound_;
447 };
448 
449 } // namespace compression
450 } // namespace folly
constexpr unsigned int popcount(T const v)
Definition: Bits.h:130
MutableBitVectorCompressedList MutableCompressedList
BitVectorCompressedListBase(const BitVectorCompressedListBase< OtherPointer > &other)
BitVectorEncoder< uint32_t, uint32_t, 128, 128 > Encoder
LogLevel max
Definition: LogLevel.cpp:31
const MutableCompressedList & finish() const
Encoder::SkipValueType SkipValueType
BitVectorCompressedListBase< const uint8_t * > BitVectorCompressedList
#define LIKELY(x)
Definition: Likely.h:47
void advance(size_type n)
Definition: Range.h:672
MutableCompressedList allocList() const
dest
Definition: upload.py:394
BitVectorCompressedListBase< uint8_t * > MutableBitVectorCompressedList
constexpr size_type size() const
Definition: Range.h:431
auto begin(TestAdlIterable &instance)
Definition: ForeachTest.cpp:56
folly::std T
constexpr auto kIsLittleEndian
Definition: Portability.h:278
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
detail::Skip skip(size_t count)
Definition: Base-inl.h:2598
BitVectorEncoder(const MutableCompressedList &result)
BitVectorEncoder(size_t size, ValueType upperBound)
static MutableCompressedList encode(RandomAccessIterator begin, RandomAccessIterator end)
bool Value(const T &value, M matcher)
auto end(TestAdlIterable &instance)
Definition: ForeachTest.cpp:62
constexpr Iter data() const
Definition: Range.h:446
Range subpiece(size_type first, size_type length=npos) const
Definition: Range.h:686
Encoder::MutableCompressedList list
BitVectorCompressedList CompressedList
BitVectorReader(const typename Encoder::CompressedList &list)
static const char *const value
Definition: Conv.cpp:50
void free()
static Layout fromUpperBoundAndSize(size_t upperBound, size_t size)
auto free() -> decltype(::free(T(nullptr)))
uint64_t value(const typename LockFreeRingBuffer< T, Atom >::Cursor &rbcursor)
BitVectorCompressedListBase< typename Range::iterator > openList(Range &buf) const
#define UNLIKELY(x)
Definition: Likely.h:48
def next(obj)
Definition: ast.py:58