proxygen
Instructions.h
Go to the documentation of this file.
1 /*
2  * Copyright 2015-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <glog/logging.h>
20 
21 #ifdef _MSC_VER
22 #include <immintrin.h>
23 #endif
24 
25 #include <folly/CpuId.h>
26 #include <folly/Portability.h>
27 #include <folly/lang/Assume.h>
29 
30 namespace folly {
31 namespace compression {
32 namespace instructions {
33 
34 // NOTE: It's recommended to compile EF coding with -msse4.2, starting
35 // with Nehalem, Intel CPUs support POPCNT instruction and gcc will emit
36 // it for __builtin_popcountll intrinsic.
37 // But we provide an alternative way for the client code: it can switch to
38 // the appropriate version of EliasFanoReader<> at runtime (client should
39 // implement this switching logic itself) by specifying instruction set to
40 // use explicitly.
41 
42 struct Default {
43  static bool supported(const folly::CpuId& /* cpuId */ = {}) {
44  return true;
45  }
47  return uint64_t(__builtin_popcountll(value));
48  }
50  DCHECK_GT(value, 0u);
51  return __builtin_ctzll(value);
52  }
54  DCHECK_GT(value, 0u);
55  return __builtin_clzll(value);
56  }
58  return value & (value - 1);
59  }
60 
61  // Extract `length` bits starting from `start` from value. Only bits [0:63]
62  // will be extracted. All higher order bits in the
63  // result will be zeroed. If no bits are extracted, return 0.
66  if (start > 63) {
67  return 0ULL;
68  }
69  if (start + length > 64) {
70  length = 64 - start;
71  }
72 
73  return (value >> start) &
74  ((length == 64) ? (~0ULL) : ((1ULL << length) - 1ULL));
75  }
76 
77  // Clear high bits starting at position index.
79  if (index > 63) {
80  return 0;
81  }
82  return value & ((uint64_t(1) << index) - 1);
83  }
84 };
85 
86 struct Nehalem : public Default {
87  static bool supported(const folly::CpuId& cpuId = {}) {
88  return cpuId.popcnt();
89  }
90 
92 // POPCNT is supported starting with Intel Nehalem, AMD K10.
93 #if defined(__GNUC__) || defined(__clang__)
94  // GCC and Clang won't inline the intrinsics.
95  uint64_t result;
96  asm("popcntq %1, %0" : "=r"(result) : "r"(value));
97  return result;
98 #else
99  return uint64_t(_mm_popcnt_u64(value));
100 #endif
101  }
102 };
103 
104 struct Haswell : public Nehalem {
105  static bool supported(const folly::CpuId& cpuId = {}) {
106  return Nehalem::supported(cpuId) && cpuId.bmi1() && cpuId.bmi2();
107  }
108 
110 // BMI1 is supported starting with Intel Haswell, AMD Piledriver.
111 // BLSR combines two instructions into one and reduces register pressure.
112 #if defined(__GNUC__) || defined(__clang__)
113  // GCC and Clang won't inline the intrinsics.
114  uint64_t result;
115  asm("blsrq %1, %0" : "=r"(result) : "r"(value));
116  return result;
117 #else
118  return _blsr_u64(value);
119 #endif
120  }
121 
124 #if defined(__GNUC__) || defined(__clang__)
125  // GCC and Clang won't inline the intrinsics.
126  // Encode parameters in `pattern` where `pattern[0:7]` is `start` and
127  // `pattern[8:15]` is `length`.
128  // Ref: Intel Advanced Vector Extensions Programming Reference
129  uint64_t pattern = start & 0xFF;
130  pattern = pattern | ((length & 0xFF) << 8);
131  uint64_t result;
132  asm("bextrq %2, %1, %0" : "=r"(result) : "r"(value), "r"(pattern));
133  return result;
134 #else
135  return _bextr_u64(value, start, length);
136 #endif
137  }
138 
140 #if defined(__GNUC__) || defined(__clang__)
141  // GCC and Clang won't inline the intrinsics.
142  const uint64_t index64 = index;
143  uint64_t result;
144  asm("bzhiq %2, %1, %0" : "=r"(result) : "r"(value), "r"(index64));
145  return result;
146 #else
147  return _bzhi_u64(value, index);
148 #endif
149  }
150 };
151 
152 enum class Type {
153  DEFAULT,
154  NEHALEM,
155  HASWELL,
156 };
157 
158 inline Type detect() {
159  const static Type type = [] {
161  VLOG(2) << "Will use folly::compression::instructions::Haswell";
162  return Type::HASWELL;
163  } else if (instructions::Nehalem::supported()) {
164  VLOG(2) << "Will use folly::compression::instructions::Nehalem";
165  return Type::NEHALEM;
166  } else {
167  VLOG(2) << "Will use folly::compression::instructions::Default";
168  return Type::DEFAULT;
169  }
170  }();
171  return type;
172 }
173 
174 template <class F>
175 auto dispatch(Type type, F&& f) -> decltype(f(std::declval<Default>())) {
176  switch (type) {
177  case Type::HASWELL:
178  return f(Haswell());
179  case Type::NEHALEM:
180  return f(Nehalem());
181  case Type::DEFAULT:
182  return f(Default());
183  }
184 
186 }
187 
188 template <class F>
189 auto dispatch(F&& f) -> decltype(f(std::declval<Default>())) {
190  return dispatch(detect(), std::forward<F>(f));
191 }
192 
193 } // namespace instructions
194 } // namespace compression
195 } // namespace folly
auto f
#define FOLLY_ALWAYS_INLINE
Definition: CPortability.h:151
PskType type
static FOLLY_ALWAYS_INLINE uint64_t popcount(uint64_t value)
Definition: Instructions.h:91
static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value)
Definition: Instructions.h:57
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
static FOLLY_ALWAYS_INLINE uint64_t bextr(uint64_t value, uint32_t start, uint32_t length)
Definition: Instructions.h:65
static FOLLY_ALWAYS_INLINE int clz(uint64_t value)
Definition: Instructions.h:53
static FOLLY_ALWAYS_INLINE int ctz(uint64_t value)
Definition: Instructions.h:49
FOLLY_ALWAYS_INLINE void assume_unreachable()
Definition: Assume.h:59
static bool supported(const folly::CpuId &cpuId={})
Definition: Instructions.h:105
auto start
static FOLLY_ALWAYS_INLINE uint64_t bzhi(uint64_t value, uint32_t index)
Definition: Instructions.h:78
static FOLLY_ALWAYS_INLINE uint64_t bzhi(uint64_t value, uint32_t index)
Definition: Instructions.h:139
static bool supported(const folly::CpuId &cpuId={})
Definition: Instructions.h:87
auto dispatch(Type type, F &&f) -> decltype(f(std::declval< Default >()))
Definition: Instructions.h:175
uint64_t value(const typename LockFreeRingBuffer< T, Atom >::Cursor &rbcursor)
static FOLLY_ALWAYS_INLINE uint64_t bextr(uint64_t value, uint32_t start, uint32_t length)
Definition: Instructions.h:123
static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value)
Definition: Instructions.h:109
static bool supported(const folly::CpuId &={})
Definition: Instructions.h:43
static FOLLY_ALWAYS_INLINE uint64_t popcount(uint64_t value)
Definition: Instructions.h:46