22 #include <glog/logging.h> 24 #if !defined(__GNUC__) && !defined(_MSC_VER) 25 #error GroupVarint.h requires GCC or MSVC 30 #if FOLLY_X64 || defined(__i386__) || FOLLY_PPC64 || FOLLY_AARCH64 31 #define HAVE_GROUP_VARINT 1 39 #include <nmmintrin.h> 42 extern const std::array<std::array<std::uint32_t, 4>, 256> groupVarintSSEMasks;
49 extern const std::array<std::uint8_t, 256> groupVarintLengths;
69 class GroupVarint<
uint32_t> :
public detail::GroupVarintBase<uint32_t> {
75 return kHeaderSize + kGroupSize + key(a) + key(b) + key(c) + key(d);
83 return size(p[0], p[1], p[2], p[3]);
92 static size_t partialSize(
const type* p,
size_t count) {
93 DCHECK_LE(count, kGroupSize);
94 size_t s = kHeaderSize +
count;
105 static size_t partialCount(
const char* p,
size_t size) {
107 size_t s = kHeaderSize;
131 static size_t encodedSize(
const char* p) {
132 return kHeaderSize + kGroupSize + b0key(
uint8_t(*p)) + b1key(
uint8_t(*p)) +
146 *p++ = (b3key << 6) | (b2key << 4) | (b1key << 2) | b0key;
163 return encode(p, src[0], src[1], src[2], src[3]);
172 static const char* decode_simple(
178 size_t k = loadUnaligned<uint8_t>(p);
179 const char*
end = p + detail::groupVarintLengths[
k];
181 size_t k0 = b0key(k);
182 *a = loadUnaligned<uint32_t>(p) & kMask[k0];
184 size_t k1 = b1key(k);
185 *b = loadUnaligned<uint32_t>(p) & kMask[k1];
187 size_t k2 = b2key(k);
188 *c = loadUnaligned<uint32_t>(p) & kMask[k2];
190 size_t k3 = b3key(k);
191 *d = loadUnaligned<uint32_t>(p) & kMask[k3];
200 static const char* decode_simple(
const char* p,
uint32_t*
dest) {
201 return decode_simple(p, dest, dest + 1, dest + 2, dest + 3);
211 __m128i
val = _mm_loadu_si128((
const __m128i*)(p + 1));
213 _mm_load_si128((
const __m128i*)detail::groupVarintSSEMasks[key].
data());
214 __m128i r = _mm_shuffle_epi8(val, mask);
215 _mm_storeu_si128((__m128i*)dest, r);
216 return p + detail::groupVarintLengths[key];
226 __m128i val = _mm_loadu_si128((
const __m128i*)(p + 1));
228 _mm_load_si128((
const __m128i*)detail::groupVarintSSEMasks[key].
data());
229 __m128i r = _mm_shuffle_epi8(val, mask);
233 *a =
uint32_t(_mm_extract_epi32(r, 0));
234 *b =
uint32_t(_mm_extract_epi32(r, 1));
235 *c =
uint32_t(_mm_extract_epi32(r, 2));
236 *d =
uint32_t(_mm_extract_epi32(r, 3));
238 *a = _mm_extract_epi16(r, 0) + (_mm_extract_epi16(r, 1) << 16);
239 *b = _mm_extract_epi16(r, 2) + (_mm_extract_epi16(r, 3) << 16);
240 *c = _mm_extract_epi16(r, 4) + (_mm_extract_epi16(r, 5) << 16);
241 *d = _mm_extract_epi16(r, 6) + (_mm_extract_epi16(r, 7) << 16);
244 return p + detail::groupVarintLengths[key];
250 return decode_simple(p, a, b, c, d);
254 return decode_simple(p, dest);
261 return uint8_t(3 - (__builtin_clz(x | 1) / 8));
263 static size_t b0key(
size_t x) {
266 static size_t b1key(
size_t x) {
269 static size_t b2key(
size_t x) {
272 static size_t b3key(
size_t x) {
290 class GroupVarint<
uint64_t> :
public detail::GroupVarintBase<uint64_t> {
297 return kHeaderSize + kGroupSize + key(a) + key(b) + key(c) + key(d) +
306 return size(p[0], p[1], p[2], p[3], p[4]);
315 static size_t partialSize(
const type* p,
size_t count) {
316 DCHECK_LE(count, kGroupSize);
317 size_t s = kHeaderSize +
count;
328 static size_t partialCount(
const char* p,
size_t size) {
329 uint16_t v = loadUnaligned<uint16_t>(p);
330 size_t s = kHeaderSize;
358 static size_t encodedSize(
const char* p) {
359 uint16_t n = loadUnaligned<uint16_t>(p);
360 return kHeaderSize + kGroupSize + b0key(n) + b1key(n) + b2key(n) +
376 storeUnaligned<uint16_t>(
379 (b4key << 12) | (b3key << 9) | (b2key << 6) | (b1key << 3) |
400 return encode(p, src[0], src[1], src[2], src[3], src[4]);
409 static const char*
decode(
419 *a = loadUnaligned<uint64_t>(p) & kMask[k0];
422 *b = loadUnaligned<uint64_t>(p) & kMask[k1];
425 *c = loadUnaligned<uint64_t>(p) & kMask[k2];
428 *d = loadUnaligned<uint64_t>(p) & kMask[k3];
431 *e = loadUnaligned<uint64_t>(p) & kMask[k4];
441 return decode(p, dest, dest + 1, dest + 2, dest + 3, dest + 4);
445 enum { kHeaderBytes = 2 };
449 return uint8_t(7 - (__builtin_clzll(x | 1) / 8));
456 return (x >> 3) & 7u;
459 return (x >> 6) & 7u;
462 return (x >> 9) & 7u;
465 return (x >> 12) & 7u;
471 typedef GroupVarint<uint32_t> GroupVarint32;
472 typedef GroupVarint<uint64_t> GroupVarint64;
482 template <
class T,
class Output>
483 class GroupVarintEncoder {
485 typedef GroupVarint<T>
Base;
488 explicit GroupVarintEncoder(Output out) :
out_(out), count_(0) {}
490 ~GroupVarintEncoder() {
498 buf_[count_++] =
val;
499 if (count_ == Base::kGroupSize) {
516 for (
size_t i = count_;
i < Base::kGroupSize;
i++) {
531 const Output&
output()
const {
545 char tmp_[Base::kMaxSize];
546 type buf_[Base::kGroupSize];
555 template <
typename T>
556 class GroupVarintDecoder {
558 typedef GroupVarint<T>
Base;
561 GroupVarintDecoder() =
default;
563 explicit GroupVarintDecoder(
StringPiece data,
size_t maxCount = (
size_t)-1)
564 : rrest_(data.
end()),
570 remaining_(maxCount) {}
572 void reset(
StringPiece data,
size_t maxCount = (
size_t)-1) {
579 remaining_ = maxCount;
585 bool next(type* val) {
586 if (pos_ == count_) {
588 size_t rem = size_t(
end_ - p_);
589 if (rem == 0 || remaining_ == 0) {
599 if (limit_ - p_ < Base::kMaxSize) {
600 memcpy(tmp_, p_, rem);
603 limit_ = tmp_ +
sizeof(tmp_);
609 if (remaining_ >= Base::kGroupSize) {
610 remaining_ -= Base::kGroupSize;
611 count_ = Base::kGroupSize;
616 p_ += Base::partialSize(buf_, count_);
620 count_ = Base::partialCount(p_,
size_t(
end_ - p_));
621 if (remaining_ >= count_) {
622 remaining_ -= count_;
627 p_ += Base::partialSize(buf_, count_);
640 CHECK(pos_ == count_ && (p_ ==
end_ || remaining_ == 0));
643 size_t size = size_t(
end_ - p_);
652 char tmp_[2 * Base::kMaxSize];
653 type buf_[Base::kGroupSize];
659 typedef GroupVarintDecoder<uint32_t> GroupVarint32Decoder;
660 typedef GroupVarintDecoder<uint64_t> GroupVarint64Decoder;
unique_ptr< IOBuf > encode(vector< HPACKHeader > &headers, HPACKEncoder &encoder)
TokenBindingMessage decode(folly::io::Cursor &cursor)
—— Concurrent Priority Queue Implementation ——
constexpr auto size(C const &c) -> decltype(c.size())
auto end(TestAdlIterable &instance)
constexpr auto data(C &c) -> decltype(c.data())
AtomicCounter< T, DeterministicAtomic > Base
void storeUnaligned(void *p, T value)
Range< const char * > StringPiece