proxygen
GroupVarint.h
Go to the documentation of this file.
1 /*
2  * Copyright 2012-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cstdint>
20 #include <limits>
21 
22 #include <glog/logging.h>
23 
24 #if !defined(__GNUC__) && !defined(_MSC_VER)
25 #error GroupVarint.h requires GCC or MSVC
26 #endif
27 
28 #include <folly/Portability.h>
29 
30 #if FOLLY_X64 || defined(__i386__) || FOLLY_PPC64 || FOLLY_AARCH64
31 #define HAVE_GROUP_VARINT 1
32 
33 #include <folly/Range.h>
35 #include <folly/lang/Bits.h>
37 
38 #if FOLLY_SSE >= 3
39 #include <nmmintrin.h>
40 namespace folly {
41 namespace detail {
42 extern const std::array<std::array<std::uint32_t, 4>, 256> groupVarintSSEMasks;
43 } // namespace detail
44 } // namespace folly
45 #endif
46 
47 namespace folly {
48 namespace detail {
49 extern const std::array<std::uint8_t, 256> groupVarintLengths;
50 } // namespace detail
51 } // namespace folly
52 
53 namespace folly {
54 
55 template <typename T>
56 class GroupVarint;
57 
68 template <>
69 class GroupVarint<uint32_t> : public detail::GroupVarintBase<uint32_t> {
70  public:
74  static size_t size(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
75  return kHeaderSize + kGroupSize + key(a) + key(b) + key(c) + key(d);
76  }
77 
82  static size_t size(const uint32_t* p) {
83  return size(p[0], p[1], p[2], p[3]);
84  }
85 
92  static size_t partialSize(const type* p, size_t count) {
93  DCHECK_LE(count, kGroupSize);
94  size_t s = kHeaderSize + count;
95  for (; count; --count, ++p) {
96  s += key(*p);
97  }
98  return s;
99  }
100 
105  static size_t partialCount(const char* p, size_t size) {
106  uint8_t v = uint8_t(*p);
107  size_t s = kHeaderSize;
108  s += 1 + b0key(v);
109  if (s > size) {
110  return 0;
111  }
112  s += 1 + b1key(v);
113  if (s > size) {
114  return 1;
115  }
116  s += 1 + b2key(v);
117  if (s > size) {
118  return 2;
119  }
120  s += 1 + b3key(v);
121  if (s > size) {
122  return 3;
123  }
124  return 4;
125  }
126 
131  static size_t encodedSize(const char* p) {
132  return kHeaderSize + kGroupSize + b0key(uint8_t(*p)) + b1key(uint8_t(*p)) +
133  b2key(uint8_t(*p)) + b3key(uint8_t(*p));
134  }
135 
141  static char* encode(char* p, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
142  uint8_t b0key = key(a);
143  uint8_t b1key = key(b);
144  uint8_t b2key = key(c);
145  uint8_t b3key = key(d);
146  *p++ = (b3key << 6) | (b2key << 4) | (b1key << 2) | b0key;
147  storeUnaligned(p, a);
148  p += b0key + 1;
149  storeUnaligned(p, b);
150  p += b1key + 1;
151  storeUnaligned(p, c);
152  p += b2key + 1;
153  storeUnaligned(p, d);
154  p += b3key + 1;
155  return p;
156  }
157 
162  static char* encode(char* p, const uint32_t* src) {
163  return encode(p, src[0], src[1], src[2], src[3]);
164  }
165 
172  static const char* decode_simple(
173  const char* p,
174  uint32_t* a,
175  uint32_t* b,
176  uint32_t* c,
177  uint32_t* d) {
178  size_t k = loadUnaligned<uint8_t>(p);
179  const char* end = p + detail::groupVarintLengths[k];
180  ++p;
181  size_t k0 = b0key(k);
182  *a = loadUnaligned<uint32_t>(p) & kMask[k0];
183  p += k0 + 1;
184  size_t k1 = b1key(k);
185  *b = loadUnaligned<uint32_t>(p) & kMask[k1];
186  p += k1 + 1;
187  size_t k2 = b2key(k);
188  *c = loadUnaligned<uint32_t>(p) & kMask[k2];
189  p += k2 + 1;
190  size_t k3 = b3key(k);
191  *d = loadUnaligned<uint32_t>(p) & kMask[k3];
192  // p += k3+1;
193  return end;
194  }
195 
200  static const char* decode_simple(const char* p, uint32_t* dest) {
201  return decode_simple(p, dest, dest + 1, dest + 2, dest + 3);
202  }
203 
204 #if FOLLY_SSE >= 3
205 
209  static const char* decode(const char* p, uint32_t* dest) {
210  uint8_t key = uint8_t(p[0]);
211  __m128i val = _mm_loadu_si128((const __m128i*)(p + 1));
212  __m128i mask =
213  _mm_load_si128((const __m128i*)detail::groupVarintSSEMasks[key].data());
214  __m128i r = _mm_shuffle_epi8(val, mask);
215  _mm_storeu_si128((__m128i*)dest, r);
216  return p + detail::groupVarintLengths[key];
217  }
218 
223  static const char*
224  decode(const char* p, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d) {
225  uint8_t key = uint8_t(p[0]);
226  __m128i val = _mm_loadu_si128((const __m128i*)(p + 1));
227  __m128i mask =
228  _mm_load_si128((const __m128i*)detail::groupVarintSSEMasks[key].data());
229  __m128i r = _mm_shuffle_epi8(val, mask);
230 
231  // Extracting 32 bits at a time out of an XMM register is a SSE4 feature
232 #if FOLLY_SSE >= 4
233  *a = uint32_t(_mm_extract_epi32(r, 0));
234  *b = uint32_t(_mm_extract_epi32(r, 1));
235  *c = uint32_t(_mm_extract_epi32(r, 2));
236  *d = uint32_t(_mm_extract_epi32(r, 3));
237 #else /* !__SSE4__ */
238  *a = _mm_extract_epi16(r, 0) + (_mm_extract_epi16(r, 1) << 16);
239  *b = _mm_extract_epi16(r, 2) + (_mm_extract_epi16(r, 3) << 16);
240  *c = _mm_extract_epi16(r, 4) + (_mm_extract_epi16(r, 5) << 16);
241  *d = _mm_extract_epi16(r, 6) + (_mm_extract_epi16(r, 7) << 16);
242 #endif /* __SSE4__ */
243 
244  return p + detail::groupVarintLengths[key];
245  }
246 
247 #else /* !__SSSE3__ */
248  static const char*
249  decode(const char* p, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d) {
250  return decode_simple(p, a, b, c, d);
251  }
252 
253  static const char* decode(const char* p, uint32_t* dest) {
254  return decode_simple(p, dest);
255  }
256 #endif /* __SSSE3__ */
257 
258  private:
259  static uint8_t key(uint32_t x) {
260  // __builtin_clz is undefined for the x==0 case
261  return uint8_t(3 - (__builtin_clz(x | 1) / 8));
262  }
263  static size_t b0key(size_t x) {
264  return x & 3;
265  }
266  static size_t b1key(size_t x) {
267  return (x >> 2) & 3;
268  }
269  static size_t b2key(size_t x) {
270  return (x >> 4) & 3;
271  }
272  static size_t b3key(size_t x) {
273  return (x >> 6) & 3;
274  }
275 
276  static const uint32_t kMask[];
277 };
278 
289 template <>
290 class GroupVarint<uint64_t> : public detail::GroupVarintBase<uint64_t> {
291  public:
295  static size_t
297  return kHeaderSize + kGroupSize + key(a) + key(b) + key(c) + key(d) +
298  key(e);
299  }
300 
305  static size_t size(const uint64_t* p) {
306  return size(p[0], p[1], p[2], p[3], p[4]);
307  }
308 
315  static size_t partialSize(const type* p, size_t count) {
316  DCHECK_LE(count, kGroupSize);
317  size_t s = kHeaderSize + count;
318  for (; count; --count, ++p) {
319  s += key(*p);
320  }
321  return s;
322  }
323 
328  static size_t partialCount(const char* p, size_t size) {
329  uint16_t v = loadUnaligned<uint16_t>(p);
330  size_t s = kHeaderSize;
331  s += 1 + b0key(v);
332  if (s > size) {
333  return 0;
334  }
335  s += 1 + b1key(v);
336  if (s > size) {
337  return 1;
338  }
339  s += 1 + b2key(v);
340  if (s > size) {
341  return 2;
342  }
343  s += 1 + b3key(v);
344  if (s > size) {
345  return 3;
346  }
347  s += 1 + b4key(v);
348  if (s > size) {
349  return 4;
350  }
351  return 5;
352  }
353 
358  static size_t encodedSize(const char* p) {
359  uint16_t n = loadUnaligned<uint16_t>(p);
360  return kHeaderSize + kGroupSize + b0key(n) + b1key(n) + b2key(n) +
361  b3key(n) + b4key(n);
362  }
363 
369  static char*
370  encode(char* p, uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t e) {
371  uint16_t b0key = key(a);
372  uint16_t b1key = key(b);
373  uint16_t b2key = key(c);
374  uint16_t b3key = key(d);
375  uint16_t b4key = key(e);
376  storeUnaligned<uint16_t>(
377  p,
378  uint16_t(
379  (b4key << 12) | (b3key << 9) | (b2key << 6) | (b1key << 3) |
380  b0key));
381  p += 2;
382  storeUnaligned(p, a);
383  p += b0key + 1;
384  storeUnaligned(p, b);
385  p += b1key + 1;
386  storeUnaligned(p, c);
387  p += b2key + 1;
388  storeUnaligned(p, d);
389  p += b3key + 1;
390  storeUnaligned(p, e);
391  p += b4key + 1;
392  return p;
393  }
394 
399  static char* encode(char* p, const uint64_t* src) {
400  return encode(p, src[0], src[1], src[2], src[3], src[4]);
401  }
402 
409  static const char* decode(
410  const char* p,
411  uint64_t* a,
412  uint64_t* b,
413  uint64_t* c,
414  uint64_t* d,
415  uint64_t* e) {
416  uint16_t k = loadUnaligned<uint16_t>(p);
417  p += 2;
418  uint8_t k0 = b0key(k);
419  *a = loadUnaligned<uint64_t>(p) & kMask[k0];
420  p += k0 + 1;
421  uint8_t k1 = b1key(k);
422  *b = loadUnaligned<uint64_t>(p) & kMask[k1];
423  p += k1 + 1;
424  uint8_t k2 = b2key(k);
425  *c = loadUnaligned<uint64_t>(p) & kMask[k2];
426  p += k2 + 1;
427  uint8_t k3 = b3key(k);
428  *d = loadUnaligned<uint64_t>(p) & kMask[k3];
429  p += k3 + 1;
430  uint8_t k4 = b4key(k);
431  *e = loadUnaligned<uint64_t>(p) & kMask[k4];
432  p += k4 + 1;
433  return p;
434  }
435 
440  static const char* decode(const char* p, uint64_t* dest) {
441  return decode(p, dest, dest + 1, dest + 2, dest + 3, dest + 4);
442  }
443 
444  private:
445  enum { kHeaderBytes = 2 };
446 
447  static uint8_t key(uint64_t x) {
448  // __builtin_clzll is undefined for the x==0 case
449  return uint8_t(7 - (__builtin_clzll(x | 1) / 8));
450  }
451 
452  static uint8_t b0key(uint16_t x) {
453  return x & 7u;
454  }
455  static uint8_t b1key(uint16_t x) {
456  return (x >> 3) & 7u;
457  }
458  static uint8_t b2key(uint16_t x) {
459  return (x >> 6) & 7u;
460  }
461  static uint8_t b3key(uint16_t x) {
462  return (x >> 9) & 7u;
463  }
464  static uint8_t b4key(uint16_t x) {
465  return (x >> 12) & 7u;
466  }
467 
468  static const uint64_t kMask[];
469 };
470 
471 typedef GroupVarint<uint32_t> GroupVarint32;
472 typedef GroupVarint<uint64_t> GroupVarint64;
473 
482 template <class T, class Output>
483 class GroupVarintEncoder {
484  public:
485  typedef GroupVarint<T> Base;
486  typedef T type;
487 
488  explicit GroupVarintEncoder(Output out) : out_(out), count_(0) {}
489 
490  ~GroupVarintEncoder() {
491  finish();
492  }
493 
497  void add(type val) {
498  buf_[count_++] = val;
499  if (count_ == Base::kGroupSize) {
500  char* p = Base::encode(tmp_, buf_);
501  out_(StringPiece(tmp_, p));
502  count_ = 0;
503  }
504  }
505 
511  void finish() {
512  if (count_) {
513  // This is not strictly necessary, but it makes testing easy;
514  // uninitialized bytes are guaranteed to be recorded as taking one byte
515  // (not more).
516  for (size_t i = count_; i < Base::kGroupSize; i++) {
517  buf_[i] = 0;
518  }
519  Base::encode(tmp_, buf_);
520  out_(StringPiece(tmp_, Base::partialSize(buf_, count_)));
521  count_ = 0;
522  }
523  }
524 
528  Output& output() {
529  return out_;
530  }
531  const Output& output() const {
532  return out_;
533  }
534 
539  void clear() {
540  count_ = 0;
541  }
542 
543  private:
544  Output out_;
545  char tmp_[Base::kMaxSize];
546  type buf_[Base::kGroupSize];
547  size_t count_;
548 };
549 
555 template <typename T>
556 class GroupVarintDecoder {
557  public:
558  typedef GroupVarint<T> Base;
559  typedef T type;
560 
561  GroupVarintDecoder() = default;
562 
563  explicit GroupVarintDecoder(StringPiece data, size_t maxCount = (size_t)-1)
564  : rrest_(data.end()),
565  p_(data.data()),
566  end_(data.end()),
567  limit_(end_),
568  pos_(0),
569  count_(0),
570  remaining_(maxCount) {}
571 
572  void reset(StringPiece data, size_t maxCount = (size_t)-1) {
573  rrest_ = data.end();
574  p_ = data.data();
575  end_ = data.end();
576  limit_ = end_;
577  pos_ = 0;
578  count_ = 0;
579  remaining_ = maxCount;
580  }
581 
585  bool next(type* val) {
586  if (pos_ == count_) {
587  // refill
588  size_t rem = size_t(end_ - p_);
589  if (rem == 0 || remaining_ == 0) {
590  return false;
591  }
592  // next() attempts to read one full group at a time, and so we must have
593  // at least enough bytes readable after its end to handle the case if the
594  // last group is full.
595  //
596  // The best way to ensure this is to ensure that data has at least
597  // Base::kMaxSize - 1 bytes readable *after* the end, otherwise we'll copy
598  // into a temporary buffer.
599  if (limit_ - p_ < Base::kMaxSize) {
600  memcpy(tmp_, p_, rem);
601  p_ = tmp_;
602  end_ = p_ + rem;
603  limit_ = tmp_ + sizeof(tmp_);
604  }
605  pos_ = 0;
606  const char* n = Base::decode(p_, buf_);
607  if (n <= end_) {
608  // Full group could be decoded
609  if (remaining_ >= Base::kGroupSize) {
610  remaining_ -= Base::kGroupSize;
611  count_ = Base::kGroupSize;
612  p_ = n;
613  } else {
614  count_ = remaining_;
615  remaining_ = 0;
616  p_ += Base::partialSize(buf_, count_);
617  }
618  } else {
619  // Can't decode a full group
620  count_ = Base::partialCount(p_, size_t(end_ - p_));
621  if (remaining_ >= count_) {
622  remaining_ -= count_;
623  p_ = end_;
624  } else {
625  count_ = remaining_;
626  remaining_ = 0;
627  p_ += Base::partialSize(buf_, count_);
628  }
629  if (count_ == 0) {
630  return false;
631  }
632  }
633  }
634  *val = buf_[pos_++];
635  return true;
636  }
637 
638  StringPiece rest() const {
639  // This is only valid after next() returned false
640  CHECK(pos_ == count_ && (p_ == end_ || remaining_ == 0));
641  // p_ may point to the internal buffer (tmp_), but we want
642  // to return subpiece of the original data
643  size_t size = size_t(end_ - p_);
644  return StringPiece(rrest_ - size, rrest_);
645  }
646 
647  private:
648  const char* rrest_;
649  const char* p_;
650  const char* end_;
651  const char* limit_;
652  char tmp_[2 * Base::kMaxSize];
653  type buf_[Base::kGroupSize];
654  size_t pos_;
655  size_t count_;
656  size_t remaining_;
657 };
658 
659 typedef GroupVarintDecoder<uint32_t> GroupVarint32Decoder;
660 typedef GroupVarintDecoder<uint64_t> GroupVarint64Decoder;
661 
662 } // namespace folly
663 
664 #endif /* FOLLY_X64 || defined(__i386__) || FOLLY_PPC64 */
Definition: InvokeTest.cpp:58
unique_ptr< IOBuf > encode(vector< HPACKHeader > &headers, HPACKEncoder &encoder)
char b
auto add
Definition: BaseTest.cpp:70
PskType type
TokenBindingMessage decode(folly::io::Cursor &cursor)
Definition: Types.cpp:132
dest
Definition: upload.py:394
double val
Definition: String.cpp:273
folly::std T
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
constexpr auto size(C const &c) -> decltype(c.size())
Definition: Access.h:45
auto end(TestAdlIterable &instance)
Definition: ForeachTest.cpp:62
constexpr auto data(C &c) -> decltype(c.data())
Definition: Access.h:71
AtomicCounter< T, DeterministicAtomic > Base
char a
void storeUnaligned(void *p, T value)
Definition: Bits.h:352
std::string & out_
Definition: json.cpp:185
int * count
static set< string > s
Range< const char * > StringPiece
char c
KeyT k
uintptr_t end_
def next(obj)
Definition: ast.py:58