38 #include <boost/preprocessor/arithmetic/add.hpp> 39 #include <boost/preprocessor/arithmetic/sub.hpp> 40 #include <boost/preprocessor/repetition/repeat_from_to.hpp> 45 #if defined(FOLLY_X64) && FOLLY_SSE_PREREQ(4, 2) 47 namespace crc32_detail {
49 #define CRCtriplet(crc, buf, offset) \ 50 crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \ 51 crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset)); \ 52 crc##2 = _mm_crc32_u64(crc##2, *(buf##2 + offset)); \ 55 #define CRCduplet(crc, buf, offset) \ 56 crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \ 57 crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset)); 59 #define CRCsinglet(crc, buf, offset) \ 60 crc = _mm_crc32_u64(crc, *(uint64_t*)(buf + offset)); \ 63 #define CASEREPEAT_TRIPLET(unused, count, total) \ 64 case BOOST_PP_ADD(1, BOOST_PP_SUB(total, count)): \ 65 CRCtriplet(crc, next, -BOOST_PP_ADD(1, BOOST_PP_SUB(total, count))); 67 #define CASEREPEAT_SINGLET(unused, count, total) \ 68 case BOOST_PP_SUB(total, count): \ 69 CRCsinglet(crc0, next, -BOOST_PP_SUB(total, count) * 8); 74 0x14cd00bd6, 0x105ec76f0, 0x0ba4fc28e, 0x14cd00bd6,
75 0x1d82c63da, 0x0f20c0dfe, 0x09e4addf8, 0x0ba4fc28e,
76 0x039d3b296, 0x1384aa63a, 0x102f9b8a2, 0x1d82c63da,
77 0x14237f5e6, 0x01c291d04, 0x00d3b6092, 0x09e4addf8,
78 0x0c96cfdc0, 0x0740eef02, 0x18266e456, 0x039d3b296,
79 0x0daece73e, 0x0083a6eec, 0x0ab7aff2a, 0x102f9b8a2,
80 0x1248ea574, 0x1c1733996, 0x083348832, 0x14237f5e6,
81 0x12c743124, 0x02ad91c30, 0x0b9e02b86, 0x00d3b6092,
82 0x018b33a4e, 0x06992cea2, 0x1b331e26a, 0x0c96cfdc0,
83 0x17d35ba46, 0x07e908048, 0x1bf2e8b8a, 0x18266e456,
84 0x1a3e0968a, 0x11ed1f9d8, 0x0ce7f39f4, 0x0daece73e,
85 0x061d82e56, 0x0f1d0f55e, 0x0d270f1a2, 0x0ab7aff2a,
86 0x1c3f5f66c, 0x0a87ab8a8, 0x12ed0daac, 0x1248ea574,
87 0x065863b64, 0x08462d800, 0x11eef4f8e, 0x083348832,
88 0x1ee54f54c, 0x071d111a8, 0x0b3e32c28, 0x12c743124,
89 0x0064f7f26, 0x0ffd852c6, 0x0dd7e3b0c, 0x0b9e02b86,
90 0x0f285651c, 0x0dcb17aa4, 0x010746f3c, 0x018b33a4e,
91 0x1c24afea4, 0x0f37c5aee, 0x0271d9844, 0x1b331e26a,
92 0x08e766a0c, 0x06051d5a2, 0x093a5f730, 0x17d35ba46,
93 0x06cb08e5c, 0x11d5ca20e, 0x06b749fb2, 0x1bf2e8b8a,
94 0x1167f94f2, 0x021f3d99c, 0x0cec3662e, 0x1a3e0968a,
95 0x19329634a, 0x08f158014, 0x0e6fc4e6a, 0x0ce7f39f4,
96 0x08227bb8a, 0x1a5e82106, 0x0b0cd4768, 0x061d82e56,
97 0x13c2b89c4, 0x188815ab2, 0x0d7a4825c, 0x0d270f1a2,
98 0x10f5ff2ba, 0x105405f3e, 0x00167d312, 0x1c3f5f66c,
99 0x0f6076544, 0x0e9adf796, 0x026f6a60a, 0x12ed0daac,
100 0x1a2adb74e, 0x096638b34, 0x19d34af3a, 0x065863b64,
101 0x049c3cc9c, 0x1e50585a0, 0x068bce87a, 0x11eef4f8e,
102 0x1524fa6c6, 0x19f1c69dc, 0x16cba8aca, 0x1ee54f54c,
103 0x042d98888, 0x12913343e, 0x1329d9f7e, 0x0b3e32c28,
104 0x1b1c69528, 0x088f25a3a, 0x02178513a, 0x0064f7f26,
105 0x0e0ac139e, 0x04e36f0b0, 0x0170076fa, 0x0dd7e3b0c,
106 0x141a1a2e2, 0x0bd6f81f8, 0x16ad828b4, 0x0f285651c,
107 0x041d17b64, 0x19425cbba, 0x1fae1cc66, 0x010746f3c,
108 0x1a75b4b00, 0x18db37e8a, 0x0f872e54c, 0x1c24afea4,
109 0x01e41e9fc, 0x04c144932, 0x086d8e4d2, 0x0271d9844,
110 0x160f7af7a, 0x052148f02, 0x05bb8f1bc, 0x08e766a0c,
111 0x0a90fd27a, 0x0a3c6f37a, 0x0b3af077a, 0x093a5f730,
112 0x04984d782, 0x1d22c238e, 0x0ca6ef3ac, 0x06cb08e5c,
113 0x0234e0b26, 0x063ded06a, 0x1d88abd4a, 0x06b749fb2,
114 0x04597456a, 0x04d56973c, 0x0e9e28eb4, 0x1167f94f2,
115 0x07b3ff57a, 0x19385bf2e, 0x0c9c8b782, 0x0cec3662e,
116 0x13a9cba9e, 0x0e417f38a, 0x093e106a4, 0x19329634a,
117 0x167001a9c, 0x14e727980, 0x1ddffc5d4, 0x0e6fc4e6a,
118 0x00df04680, 0x0d104b8fc, 0x02342001e, 0x08227bb8a,
119 0x00a2a8d7e, 0x05b397730, 0x168763fa6, 0x0b0cd4768,
120 0x1ed5a407a, 0x0e78eb416, 0x0d2c3ed1a, 0x13c2b89c4,
121 0x0995a5724, 0x1641378f0, 0x19b1afbc4, 0x0d7a4825c,
122 0x109ffedc0, 0x08d96551c, 0x0f2271e60, 0x10f5ff2ba,
123 0x00b0bf8ca, 0x00bf80dd2, 0x123888b7a, 0x00167d312,
124 0x1e888f7dc, 0x18dcddd1c, 0x002ee03b2, 0x0f6076544,
125 0x183e8d8fe, 0x06a45d2b2, 0x133d7a042, 0x026f6a60a,
126 0x116b0f50c, 0x1dd3e10e8, 0x05fabe670, 0x1a2adb74e,
127 0x130004488, 0x0de87806c, 0x000bcf5f6, 0x19d34af3a,
128 0x18f0c7078, 0x014338754, 0x017f27698, 0x049c3cc9c,
129 0x058ca5f00, 0x15e3e77ee, 0x1af900c24, 0x068bce87a,
130 0x0b5cfca28, 0x0dd07448e, 0x0ded288f8, 0x1524fa6c6,
131 0x059f229bc, 0x1d8048348, 0x06d390dec, 0x16cba8aca,
132 0x037170390, 0x0a3e3e02c, 0x06353c1cc, 0x042d98888,
133 0x0c4584f5c, 0x0d73c7bea, 0x1f16a3418, 0x1329d9f7e,
134 0x0531377e2, 0x185137662, 0x1d8d9ca7c, 0x1b1c69528,
135 0x0b25b29f2, 0x18a08b5bc, 0x19fb2a8b0, 0x02178513a,
136 0x1a08fe6ac, 0x1da758ae0, 0x045cddf4e, 0x0e0ac139e,
137 0x1a91647f2, 0x169cf9eb0, 0x1a0f717c4, 0x0170076fa,
151 const auto multiplier =
152 *(
reinterpret_cast<const __m128i*
>(clmul_constants) + block_size - 1);
153 const auto crc0_xmm = _mm_set_epi64x(0, crc0);
154 const auto res0 = _mm_clmulepi64_si128(crc0_xmm, multiplier, 0x00);
155 const auto crc1_xmm = _mm_set_epi64x(0, crc1);
156 const auto res1 = _mm_clmulepi64_si128(crc1_xmm, multiplier, 0x10);
157 const auto res = _mm_xor_si128(res0, res1);
158 crc0 = _mm_cvtsi128_si64(res);
159 crc0 = crc0 ^ *((
uint64_t*)next2 - 1);
160 crc2 = _mm_crc32_u64(crc2, crc0);
169 const unsigned char*&
next) {
172 crc32bit = _mm_crc32_u32(crc32bit, *(
uint32_t*)next);
176 crc32bit = _mm_crc32_u16(crc32bit, *(
uint16_t*)next);
180 crc32bit = _mm_crc32_u8(crc32bit, *(next));
191 const unsigned char*& next,
196 const uint64_t* next1 = next0 + block_size;
197 const uint64_t* next2 = next1 + block_size;
202 switch (block_size) {
206 CRCtriplet(crc, next, -128);
214 BOOST_PP_REPEAT_FROM_TO(0, 64, CASEREPEAT_TRIPLET, 126);
215 BOOST_PP_REPEAT_FROM_TO(0, 62, CASEREPEAT_TRIPLET, 62);
220 CRCduplet(crc, next, -1);
221 crc0 = CombineCRC(block_size, crc0, crc1, crc2, next2);
235 next = (
const unsigned char*)next2;
243 const unsigned char* next = (
const unsigned char*)buf;
251 size_t align = (8 - (uintptr_t)next) & 7;
252 crc32_detail::align_to_8(align, crc0, next);
257 size_t n = count >> 7;
258 size_t block_size = count & 127;
259 if (block_size == 0) {
267 crc32_detail::triplet_loop(block_size, crc0, next, n);
270 size_t count2 = len >> 3;
272 next += (count2 * 8);
279 BOOST_PP_REPEAT_FROM_TO(0, 27, CASEREPEAT_SINGLET, 27);
285 crc32_detail::align_to_8(len, crc0, next);
293 throw std::runtime_error(
"crc32_hw is not implemented on this platform");
#define FOLLY_ALWAYS_INLINE
—— Concurrent Priority Queue Implementation ——
uint32_t crc32c_hw(const uint8_t *data, size_t nbytes, uint32_t startingChecksum=~0U)
#define FOLLY_FALLTHROUGH
#define FOLLY_TARGET_ATTRIBUTE(target)