proxygen: proxygen/folly/folly/hash/detail/Crc32CombineDetail.cpp Source File

Go to the documentation of this file.
 /*
  * Copyright 2018-present Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 #include <folly/hash/detail/ChecksumDetail.h>
 
 #include <array>
 
 #include <folly/Bits.h>
 #include <folly/ConstexprMath.h>
 
 namespace folly {
 
 // Standard galois-field multiply.  The only modification is that a,
 // b, m, and p are all bit-reflected.
 //
 // https://en.wikipedia.org/wiki/Finite_field_arithmetic
 static constexpr uint32_t
 gf_multiply_sw_1(size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) {
   // clang-format off
   return i == 32 ? p : gf_multiply_sw_1(
       /* i = */ i + 1,
       /* p = */ p ^ (-((b >> 31) & 1) & a),
       /* a = */ (a >> 1) ^ (-(a & 1) & m),
       /* b = */ b << 1,
       /* m = */ m);
   // clang-format on
 }
 static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) {
   return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a, b, m);
 }
 
 static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) {
   return gf_multiply_sw(a, a, m);
 }
 
 namespace {
 
 template <size_t i, uint32_t m>
 struct gf_powers_memo {
   static constexpr uint32_t value =
       gf_square_sw(gf_powers_memo<i - 1, m>::value, m);
 };
 template <uint32_t m>
 struct gf_powers_memo<0, m> {
   static constexpr uint32_t value = m;
 };
 
 template <uint32_t m>
 struct gf_powers_make {
   template <size_t... i>
   constexpr auto operator()(index_sequence<i...>) const {
     return std::array<uint32_t, sizeof...(i)>{{gf_powers_memo<i, m>::value...}};
   }
 };
 
 } // namespace
 
 #if FOLLY_SSE_PREREQ(4, 2)
 
 // Reduction taken from
 // https://www.nicst.de/crc.pdf
 //
 // This is an intrinsics-based implementation of listing 3.
 static uint32_t gf_multiply_crc32c_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
   const auto crc1_xmm = _mm_set_epi64x(0, crc1);
   const auto crc2_xmm = _mm_set_epi64x(0, crc2);
   const auto count = _mm_set_epi64x(0, 1);
   const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
   const auto res1 = _mm_sll_epi64(res0, count);
 
   // Use hardware crc32c to do reduction from 64 -> 32 bytes
   const auto res2 = _mm_cvtsi128_si64(res1);
   const auto res3 = _mm_crc32_u32(0, res2);
   const auto res4 = _mm_extract_epi32(res1, 1);
   return res3 ^ res4;
 }
 
 static uint32_t gf_multiply_crc32_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
   const auto crc1_xmm = _mm_set_epi64x(0, crc1);
   const auto crc2_xmm = _mm_set_epi64x(0, crc2);
   const auto count = _mm_set_epi64x(0, 1);
   const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
   const auto res1 = _mm_sll_epi64(res0, count);
 
   // Do barrett reduction of 64 -> 32 bytes
   const auto mask32 = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF);
   const auto barrett_reduction_constants =
       _mm_set_epi32(0x1, 0xDB710641, 0x1, 0xF7011641);
   const auto res2 = _mm_clmulepi64_si128(
       _mm_and_si128(res1, mask32), barrett_reduction_constants, 0x00);
   const auto res3 = _mm_clmulepi64_si128(
       _mm_and_si128(res2, mask32), barrett_reduction_constants, 0x10);
   return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(res3, res1), 4));
 }
 
 #else
 
 static uint32_t gf_multiply_crc32c_hw(uint64_t, uint64_t, uint32_t) {
   return 0;
 }
 static uint32_t gf_multiply_crc32_hw(uint64_t, uint64_t, uint32_t) {
   return 0;
 }
 
 #endif
 
 static constexpr uint32_t crc32c_m = 0x82f63b78;
 static constexpr uint32_t crc32_m = 0xedb88320;
 
 /*
  * Pre-calculated powers tables for crc32c and crc32.
  */
 static constexpr std::array<uint32_t, 62> const crc32c_powers =
     gf_powers_make<crc32c_m>{}(make_index_sequence<62>{});
 static constexpr std::array<uint32_t, 62> const crc32_powers =
     gf_powers_make<crc32_m>{}(make_index_sequence<62>{});
 
 template <typename F>
 static uint32_t crc32_append_zeroes(
     F mult,
     uint32_t crc,
     size_t len,
     uint32_t polynomial,
     std::array<uint32_t, 62> const& powers_array) {
   auto powers = powers_array.data();
 
   // Append by multiplying by consecutive powers of two of the zeroes
   // array
   len >>= 2;
 
   while (len) {
     // Advance directly to next bit set.
     auto r = findFirstSet(len) - 1;
     len >>= r;
     powers += r;
 
     crc = mult(crc, *powers, polynomial);
 
     len >>= 1;
     powers++;
   }
 
   return crc;
 }
 
 namespace detail {
 
 uint32_t crc32_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
   return crc2 ^
       crc32_append_zeroes(gf_multiply_sw, crc1, crc2len, crc32_m, crc32_powers);
 }
 
 uint32_t crc32_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
   return crc2 ^
       crc32_append_zeroes(
              gf_multiply_crc32_hw, crc1, crc2len, crc32_m, crc32_powers);
 }
 
 uint32_t crc32c_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
   return crc2 ^
       crc32_append_zeroes(
              gf_multiply_sw, crc1, crc2len, crc32c_m, crc32c_powers);
 }
 
 uint32_t crc32c_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
   return crc2 ^
       crc32_append_zeroes(
              gf_multiply_crc32c_hw, crc1, crc2len, crc32c_m, crc32c_powers);
 }
 
 } // namespace detail
 
 } // namespace folly