proxygen
Crc32CombineDetail.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2018-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <array>
20 
21 #include <folly/Bits.h>
22 #include <folly/ConstexprMath.h>
23 
24 namespace folly {
25 
26 // Standard galois-field multiply. The only modification is that a,
27 // b, m, and p are all bit-reflected.
28 //
29 // https://en.wikipedia.org/wiki/Finite_field_arithmetic
30 static constexpr uint32_t
32  // clang-format off
33  return i == 32 ? p : gf_multiply_sw_1(
34  /* i = */ i + 1,
35  /* p = */ p ^ (-((b >> 31) & 1) & a),
36  /* a = */ (a >> 1) ^ (-(a & 1) & m),
37  /* b = */ b << 1,
38  /* m = */ m);
39  // clang-format on
40 }
42  return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a, b, m);
43 }
44 
45 static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) {
46  return gf_multiply_sw(a, a, m);
47 }
48 
49 namespace {
50 
51 template <size_t i, uint32_t m>
52 struct gf_powers_memo {
53  static constexpr uint32_t value =
55 };
56 template <uint32_t m>
57 struct gf_powers_memo<0, m> {
58  static constexpr uint32_t value = m;
59 };
60 
61 template <uint32_t m>
62 struct gf_powers_make {
63  template <size_t... i>
64  constexpr auto operator()(index_sequence<i...>) const {
65  return std::array<uint32_t, sizeof...(i)>{{gf_powers_memo<i, m>::value...}};
66  }
67 };
68 
69 } // namespace
70 
71 #if FOLLY_SSE_PREREQ(4, 2)
72 
73 // Reduction taken from
74 // https://www.nicst.de/crc.pdf
75 //
76 // This is an intrinsics-based implementation of listing 3.
78  const auto crc1_xmm = _mm_set_epi64x(0, crc1);
79  const auto crc2_xmm = _mm_set_epi64x(0, crc2);
80  const auto count = _mm_set_epi64x(0, 1);
81  const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
82  const auto res1 = _mm_sll_epi64(res0, count);
83 
84  // Use hardware crc32c to do reduction from 64 -> 32 bytes
85  const auto res2 = _mm_cvtsi128_si64(res1);
86  const auto res3 = _mm_crc32_u32(0, res2);
87  const auto res4 = _mm_extract_epi32(res1, 1);
88  return res3 ^ res4;
89 }
90 
92  const auto crc1_xmm = _mm_set_epi64x(0, crc1);
93  const auto crc2_xmm = _mm_set_epi64x(0, crc2);
94  const auto count = _mm_set_epi64x(0, 1);
95  const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
96  const auto res1 = _mm_sll_epi64(res0, count);
97 
98  // Do barrett reduction of 64 -> 32 bytes
99  const auto mask32 = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF);
100  const auto barrett_reduction_constants =
101  _mm_set_epi32(0x1, 0xDB710641, 0x1, 0xF7011641);
102  const auto res2 = _mm_clmulepi64_si128(
103  _mm_and_si128(res1, mask32), barrett_reduction_constants, 0x00);
104  const auto res3 = _mm_clmulepi64_si128(
105  _mm_and_si128(res2, mask32), barrett_reduction_constants, 0x10);
106  return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(res3, res1), 4));
107 }
108 
109 #else
110 
112  return 0;
113 }
115  return 0;
116 }
117 
118 #endif
119 
120 static constexpr uint32_t crc32c_m = 0x82f63b78;
121 static constexpr uint32_t crc32_m = 0xedb88320;
122 
123 /*
124  * Pre-calculated powers tables for crc32c and crc32.
125  */
126 static constexpr std::array<uint32_t, 62> const crc32c_powers =
127  gf_powers_make<crc32c_m>{}(make_index_sequence<62>{});
128 static constexpr std::array<uint32_t, 62> const crc32_powers =
129  gf_powers_make<crc32_m>{}(make_index_sequence<62>{});
130 
131 template <typename F>
133  F mult,
134  uint32_t crc,
135  size_t len,
136  uint32_t polynomial,
137  std::array<uint32_t, 62> const& powers_array) {
138  auto powers = powers_array.data();
139 
140  // Append by multiplying by consecutive powers of two of the zeroes
141  // array
142  len >>= 2;
143 
144  while (len) {
145  // Advance directly to next bit set.
146  auto r = findFirstSet(len) - 1;
147  len >>= r;
148  powers += r;
149 
150  crc = mult(crc, *powers, polynomial);
151 
152  len >>= 1;
153  powers++;
154  }
155 
156  return crc;
157 }
158 
159 namespace detail {
160 
161 uint32_t crc32_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
162  return crc2 ^
163  crc32_append_zeroes(gf_multiply_sw, crc1, crc2len, crc32_m, crc32_powers);
164 }
165 
166 uint32_t crc32_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
167  return crc2 ^
169  gf_multiply_crc32_hw, crc1, crc2len, crc32_m, crc32_powers);
170 }
171 
172 uint32_t crc32c_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
173  return crc2 ^
175  gf_multiply_sw, crc1, crc2len, crc32c_m, crc32c_powers);
176 }
177 
178 uint32_t crc32c_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
179  return crc2 ^
181  gf_multiply_crc32c_hw, crc1, crc2len, crc32c_m, crc32c_powers);
182 }
183 
184 } // namespace detail
185 
186 } // namespace folly
static constexpr uint32_t gf_multiply_sw_1(size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m)
char b
BitIterator< BaseIter > findFirstSet(BitIterator< BaseIter >, BitIterator< BaseIter >)
Definition: BitIterator.h:170
static constexpr uint32_t crc32_m
uint32_t crc32_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len)
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
static constexpr std::array< uint32_t, 62 > const crc32_powers
uint32_t crc32c_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len)
uint32_t crc32_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len)
uint32_t crc32c_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len)
make_integer_sequence< std::size_t, Size > make_index_sequence
Definition: Utility.h:209
static map< string, int > m
char a
static uint32_t crc32_append_zeroes(F mult, uint32_t crc, size_t len, uint32_t polynomial, std::array< uint32_t, 62 > const &powers_array)
static constexpr uint32_t crc32c_m
static uint32_t gf_multiply_crc32c_hw(uint64_t, uint64_t, uint32_t)
int * count
uint64_t value(const typename LockFreeRingBuffer< T, Atom >::Cursor &rbcursor)
static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m)
static constexpr uint32_t value
static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m)
static constexpr std::array< uint32_t, 62 > const crc32c_powers
static uint32_t gf_multiply_crc32_hw(uint64_t, uint64_t, uint32_t)